Enable intel-openvino ChromeOS build with intel-gnalib library (#4761 )

We use the header files from /usr/include. Enable necessary build changes for the same. - Remove unused stb_lib libraries. - Set SSE4.2 implementation as default - Add div_by_n to the CMake builder rules
Chromeos enabling (#4704 )
2021-03-12 20:01:22 +03:00 · 2021-03-11 12:10:53 +03:00
66 changed files with 1978 additions and 11510 deletions
--- a/cmake/os_flags.cmake
+++ b/cmake/os_flags.cmake
@@ -139,7 +139,7 @@ if(WIN32)
 else()
    # TODO: enable for C sources as well
    # ie_add_compiler_flags(-Werror)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error")
    ie_add_compiler_flags(-ffunction-sections -fdata-sections)
    ie_add_compiler_flags(-fvisibility=hidden)
    ie_add_compiler_flags(-fdiagnostics-show-option)
--- a/inference-engine/CMakeLists.txt
+++ b/inference-engine/CMakeLists.txt
@@ -85,11 +85,15 @@ add_cpplint_report_target()
 ie_cpack_add_component(cpp_samples REQUIRED DEPENDS core)

 if(UNIX)
-    install(DIRECTORY samples/
-            DESTINATION ${IE_CPACK_IE_DIR}/samples/cpp
-            COMPONENT cpp_samples
-            USE_SOURCE_PERMISSIONS
-            PATTERN *.bat EXCLUDE)
+	if (${TARGET_OS} STREQUAL "CHROMEOS")
+		message(STATUS " Skipping  cpp samples")
+	else()
+		install(DIRECTORY samples/
+				DESTINATION ${IE_CPACK_IE_DIR}/samples/cpp
+				COMPONENT cpp_samples
+				USE_SOURCE_PERMISSIONS
+				PATTERN *.bat EXCLUDE)
+	endif()
 elseif(WIN32)
    install(DIRECTORY samples
            DESTINATION ${IE_CPACK_IE_DIR}/samples/cpp
@@ -103,31 +107,42 @@ endif()
 ie_cpack_add_component(c_samples REQUIRED DEPENDS core)

 if(UNIX)
-    install(PROGRAMS samples/build_samples.sh
-            DESTINATION ${IE_CPACK_IE_DIR}/samples/c
-            COMPONENT c_samples)
+	if (${TARGET_OS} STREQUAL "CHROMEOS")
+		message(STATUS " Skipping  cpp samples")
+	else()
+		install(PROGRAMS samples/build_samples.sh
+				DESTINATION ${IE_CPACK_IE_DIR}/samples/c
+				COMPONENT c_samples)
+	endif()
 elseif(WIN32)
    install(PROGRAMS samples/build_samples_msvc.bat
            DESTINATION ${IE_CPACK_IE_DIR}/samples/c
            COMPONENT c_samples)
 endif()

-install(DIRECTORY ie_bridges/c/samples/
-        DESTINATION ${IE_CPACK_IE_DIR}/samples/c
-        COMPONENT c_samples
-        PATTERN ie_bridges/c/samples/CMakeLists.txt EXCLUDE)
+if (${TARGET_OS} STREQUAL "CHROMEOS")
+	message(STATUS " Skipping ie_bridges c/cpp samples")
+else()
+	install(DIRECTORY ie_bridges/c/samples/
+			DESTINATION ${IE_CPACK_IE_DIR}/samples/c
+			COMPONENT c_samples
+			PATTERN ie_bridges/c/samples/CMakeLists.txt EXCLUDE)

-install(FILES samples/CMakeLists.txt
-        DESTINATION ${IE_CPACK_IE_DIR}/samples/c
-        COMPONENT c_samples)
+	install(FILES samples/CMakeLists.txt
+			DESTINATION ${IE_CPACK_IE_DIR}/samples/c
+			COMPONENT c_samples)
+endif()

 # install Python samples
+if (${TARGET_OS} STREQUAL "CHROMEOS")
+	message(STATUS "Skipping python samples")
+else()
+	ie_cpack_add_component(python_samples REQUIRED DEPENDS core)

-ie_cpack_add_component(python_samples REQUIRED DEPENDS core)
-
-install(DIRECTORY ${ie_python_api_SOURCE_DIR}/sample/
-        DESTINATION ${IE_CPACK_IE_DIR}/samples/python
-        COMPONENT python_samples)
+	install(DIRECTORY ${ie_python_api_SOURCE_DIR}/sample/
+			DESTINATION ${IE_CPACK_IE_DIR}/samples/python
+			COMPONENT python_samples)
+endif()

 # Custom target to build only Inference Engine Developer Package targets

--- a/inference-engine/cmake/FindlibGNA.cmake
+++ b/inference-engine/cmake/FindlibGNA.cmake
@@ -35,7 +35,9 @@ else()
    set(GNA_LIB_DIR x64 CACHE STRING "" FORCE)
    set(libGNA_INCLUDE_DIRS "${GNA}/include" CACHE STRING "" FORCE)
 endif()
-set(libGNA_LIBRARIES_BASE_PATH ${GNA}/${GNA_PLATFORM_DIR}/${GNA_LIB_DIR} CACHE STRING "" FORCE)
+#set(libGNA_LIBRARIES_BASE_PATH ${GNA}/${GNA_PLATFORM_DIR}/${GNA_LIB_DIR} CACHE STRING "" FORCE)
+# Ebuild fails to copy x64 named folder during build process
+set(libGNA_LIBRARIES_BASE_PATH ${GNA}/${GNA_PLATFORM_DIR} CACHE STRING "" FORCE)

 add_library(libGNA::KERNEL SHARED IMPORTED)
 find_library(GNA_KERNEL_LIBRARY
--- a/inference-engine/cmake/dependencies.cmake
+++ b/inference-engine/cmake/dependencies.cmake
@@ -49,26 +49,31 @@ endif ()

 ## Intel OMP package
 if (THREADING STREQUAL "OMP")
-    if (WIN32)
-        RESOLVE_DEPENDENCY(OMP
-                ARCHIVE_WIN "iomp.zip"
-                TARGET_PATH "${TEMP}/omp"
-                ENVIRONMENT "OMP"
-                VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
-    elseif(LINUX)
-        RESOLVE_DEPENDENCY(OMP
-                ARCHIVE_LIN "iomp.tgz"
-                TARGET_PATH "${TEMP}/omp"
-                ENVIRONMENT "OMP"
-                VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
-    else(APPLE)
-        RESOLVE_DEPENDENCY(OMP
-                ARCHIVE_MAC "iomp_20190130_mac.tgz"
-                TARGET_PATH "${TEMP}/omp"
-                ENVIRONMENT "OMP"
-                VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
-    endif()
-    log_rpath_from_dir(OMP "${OMP}/lib")
+	if (NOT DEFINED OMP_DIR)
+		if (WIN32)
+			RESOLVE_DEPENDENCY(OMP
+					ARCHIVE_WIN "iomp.zip"
+					TARGET_PATH "${TEMP}/omp"
+					ENVIRONMENT "OMP"
+					VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
+		elseif(LINUX)
+			RESOLVE_DEPENDENCY(OMP
+					ARCHIVE_LIN "iomp.tgz"
+					TARGET_PATH "${TEMP}/omp"
+					ENVIRONMENT "OMP"
+					VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
+		else(APPLE)
+			RESOLVE_DEPENDENCY(OMP
+					ARCHIVE_MAC "iomp_20190130_mac.tgz"
+					TARGET_PATH "${TEMP}/omp"
+					ENVIRONMENT "OMP"
+					VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
+		endif()
+		log_rpath_from_dir(OMP "${OMP}/lib")
+	else()
+		set(OMP ${IE_MAIN_SOURCE_DIR}/${OMP_DIR})
+		log_rpath_from_dir(OMP "${OMP}/lib")
+	endif()
    debug_message(STATUS "intel_omp=" ${OMP})
 endif ()

@@ -183,22 +188,26 @@ if (ENABLE_GNA)
            GNA_LIB_DIR
            libGNA_INCLUDE_DIRS
            libGNA_LIBRARIES_BASE_PATH)
-    if (GNA_LIBRARY_VERSION STREQUAL "GNA1")
-        RESOLVE_DEPENDENCY(GNA
-                ARCHIVE_UNIFIED "gna_20181120.zip"
-                TARGET_PATH "${TEMP}/gna")
-    else()
-        if(GNA_LIBRARY_VERSION STREQUAL "GNA1_1401")
-            set(GNA_VERSION "01.00.00.1401")
-        endif()
-        if(GNA_LIBRARY_VERSION STREQUAL "GNA2")
-            set(GNA_VERSION "02.00.00.0587")
-        endif()
-        RESOLVE_DEPENDENCY(GNA
-                ARCHIVE_UNIFIED "GNA_${GNA_VERSION}.zip"
-                TARGET_PATH "${TEMP}/gna_${GNA_VERSION}"
-                VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*")
-    endif()
+	if (NOT DEFINED GNA_DIR)
+		if (GNA_LIBRARY_VERSION STREQUAL "GNA1")
+			RESOLVE_DEPENDENCY(GNA
+					ARCHIVE_UNIFIED "gna_20181120.zip"
+					TARGET_PATH "${TEMP}/gna")
+		else()
+			if(GNA_LIBRARY_VERSION STREQUAL "GNA1_1401")
+				set(GNA_VERSION "01.00.00.1401")
+			endif()
+			if(GNA_LIBRARY_VERSION STREQUAL "GNA2")
+				set(GNA_VERSION "02.00.00.0587")
+			endif()
+			RESOLVE_DEPENDENCY(GNA
+					ARCHIVE_UNIFIED "GNA_${GNA_VERSION}.zip"
+					TARGET_PATH "${TEMP}/gna_${GNA_VERSION}"
+					VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*")
+		endif()
+	else()
+		set(GNA ${IE_MAIN_SOURCE_DIR}/${GNA_DIR})
+	endif()
    debug_message(STATUS "gna=" ${GNA})
 endif()

--- a/inference-engine/cmake/linux_name.cmake
+++ b/inference-engine/cmake/linux_name.cmake
@@ -4,29 +4,33 @@

 if (LINUX)
    function(get_linux_name res_var)
-        if (NOT EXISTS "/etc/lsb-release")
-            execute_process(COMMAND find -L /etc/ -maxdepth 1 -type f -name *-release -exec cat {} \;
-                    OUTPUT_VARIABLE release_data RESULT_VARIABLE result)
-            set(name_regex "NAME=\"([^ \"\n]*).*\"\n")
-            set(version_regex "VERSION=\"([0-9]+(\\.[0-9]+)?)[^\n]*\"")
-        else ()
-            #linux version detection using cat /etc/lsb-release
-            file(READ "/etc/lsb-release" release_data)
-            set(name_regex "DISTRIB_ID=([^ \n]*)\n")
-            set(version_regex "DISTRIB_RELEASE=([0-9]+(\\.[0-9]+)?)")
-        endif ()
+		if (NOT DEFINED TARGET_OS)
+			if (NOT EXISTS "/etc/lsb-release")
+				execute_process(COMMAND find -L /etc/ -maxdepth 1 -type f -name *-release -exec cat {} \;
+						OUTPUT_VARIABLE release_data RESULT_VARIABLE result)
+				set(name_regex "NAME=\"([^ \"\n]*).*\"\n")
+				set(version_regex "VERSION=\"([0-9]+(\\.[0-9]+)?)[^\n]*\"")
+			else ()
+				#linux version detection using cat /etc/lsb-release
+				file(READ "/etc/lsb-release" release_data)
+				set(name_regex "DISTRIB_ID=([^ \n]*)\n")
+				set(version_regex "DISTRIB_RELEASE=([0-9]+(\\.[0-9]+)?)")
+			endif ()

-        string(REGEX MATCH ${name_regex} name ${release_data})
-        set(os_name ${CMAKE_MATCH_1})
+			string(REGEX MATCH ${name_regex} name ${release_data})
+			set(os_name ${CMAKE_MATCH_1})

-        string(REGEX MATCH ${version_regex} version ${release_data})
-        set(os_name "${os_name} ${CMAKE_MATCH_1}")
-
-        if (os_name)
-            set(${res_var} ${os_name} PARENT_SCOPE)
-        else ()
-            set(${res_var} NOTFOUND PARENT_SCOPE)
-        endif ()
+			string(REGEX MATCH ${version_regex} version ${release_data})
+			set(os_name "${os_name} ${CMAKE_MATCH_1}")

+			if (os_name)
+				set(${res_var} ${os_name} PARENT_SCOPE)
+			else ()
+				set(${res_var} NOTFOUND PARENT_SCOPE)
+			endif ()
+		else()
+			set(os_name ${TARGET_OS})
+			set(${res_var} ${os_name} PARENT_SCOPE)
+		endif()
    endfunction()
 endif ()
--- a/inference-engine/include/builders/ie_divbyn_layer.hpp
+++ b/inference-engine/include/builders/ie_divbyn_layer.hpp
@@ -0,0 +1,55 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
+#include <string>
+
+namespace InferenceEngine {
+namespace Builder {
+
+/**
+ * @brief The class represents a builder for Log layer
+ */
+class INFERENCE_ENGINE_API_CLASS(DivByNLayer): public LayerDecorator {
+public:
+    /**
+     * @brief The constructor creates a builder with the name
+     * @param name Layer name
+     */
+    explicit DivByNLayer(const std::string& name = "");
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer pointer to generic builder
+     */
+    explicit DivByNLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit DivByNLayer(const Layer::CPtr& layer);
+    /**
+     * @brief Sets the name for the layer
+     * @param name Layer name
+     * @return reference to layer builder
+     */
+    DivByNLayer& setName(const std::string& name);
+
+    /**
+     * @brief Returns port with shapes for the layer
+     * @return Port with shapes
+     */
+    const Port& getPort() const;
+    /**
+     * @brief Sets port shapes for the layer
+     * @param port Port with shapes
+     * @return reference to layer builder
+     */
+    DivByNLayer& setPort(const Port& port);
+};
+
+}  // namespace Builder
+}  // namespace InferenceEngine
--- a/inference-engine/include/builders/ie_exp_layer.hpp
+++ b/inference-engine/include/builders/ie_exp_layer.hpp
@@ -0,0 +1,55 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
+#include <string>
+
+namespace InferenceEngine {
+namespace Builder {
+
+/**
+ * @brief The class represents a builder for Log layer
+ */
+class INFERENCE_ENGINE_API_CLASS(ExpLayer): public LayerDecorator {
+public:
+    /**
+     * @brief The constructor creates a builder with the name
+     * @param name Layer name
+     */
+    explicit ExpLayer(const std::string& name = "");
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer pointer to generic builder
+     */
+    explicit ExpLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit ExpLayer(const Layer::CPtr& layer);
+    /**
+     * @brief Sets the name for the layer
+     * @param name Layer name
+     * @return reference to layer builder
+     */
+    ExpLayer& setName(const std::string& name);
+
+    /**
+     * @brief Returns port with shapes for the layer
+     * @return Port with shapes
+     */
+    const Port& getPort() const;
+    /**
+     * @brief Sets port shapes for the layer
+     * @param port Port with shapes
+     * @return reference to layer builder
+     */
+    ExpLayer& setPort(const Port& port);
+};
+
+}  // namespace Builder
+}  // namespace InferenceEngine
--- a/inference-engine/include/builders/ie_identity_layer.hpp
+++ b/inference-engine/include/builders/ie_identity_layer.hpp
@@ -0,0 +1,55 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
+#include <string>
+
+namespace InferenceEngine {
+namespace Builder {
+
+/**
+ * @brief The class represents a builder for Log layer
+ */
+class INFERENCE_ENGINE_API_CLASS(IdentityLayer): public LayerDecorator {
+public:
+    /**
+     * @brief The constructor creates a builder with the name
+     * @param name Layer name
+     */
+    explicit IdentityLayer(const std::string& name = "");
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer pointer to generic builder
+     */
+    explicit IdentityLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit IdentityLayer(const Layer::CPtr& layer);
+    /**
+     * @brief Sets the name for the layer
+     * @param name Layer name
+     * @return reference to layer builder
+     */
+    IdentityLayer& setName(const std::string& name);
+
+    /**
+     * @brief Returns port with shapes for the layer
+     * @return Port with shapes
+     */
+    const Port& getPort() const;
+    /**
+     * @brief Sets port shapes for the layer
+     * @param port Port with shapes
+     * @return reference to layer builder
+     */
+    IdentityLayer& setPort(const Port& port);
+};
+
+}  // namespace Builder
+}  // namespace InferenceEngine
--- a/inference-engine/include/builders/ie_log_layer.hpp
+++ b/inference-engine/include/builders/ie_log_layer.hpp
@@ -0,0 +1,55 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
+#include <string>
+
+namespace InferenceEngine {
+namespace Builder {
+
+/**
+ * @brief The class represents a builder for Log layer
+ */
+class INFERENCE_ENGINE_API_CLASS(LogLayer): public LayerDecorator {
+public:
+    /**
+     * @brief The constructor creates a builder with the name
+     * @param name Layer name
+     */
+    explicit LogLayer(const std::string& name = "");
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer pointer to generic builder
+     */
+    explicit LogLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit LogLayer(const Layer::CPtr& layer);
+    /**
+     * @brief Sets the name for the layer
+     * @param name Layer name
+     * @return reference to layer builder
+     */
+    LogLayer& setName(const std::string& name);
+
+    /**
+     * @brief Returns port with shapes for the layer
+     * @return Port with shapes
+     */
+    const Port& getPort() const;
+    /**
+     * @brief Sets port shapes for the layer
+     * @param port Port with shapes
+     * @return reference to layer builder
+     */
+    LogLayer& setPort(const Port& port);
+};
+
+}  // namespace Builder
+}  // namespace InferenceEngine
--- a/inference-engine/include/builders/ie_neghalf_log_layer.hpp
+++ b/inference-engine/include/builders/ie_neghalf_log_layer.hpp
@@ -0,0 +1,55 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
+#include <string>
+
+namespace InferenceEngine {
+namespace Builder {
+
+/**
+ * @brief The class represents a builder for Log layer
+ */
+class INFERENCE_ENGINE_API_CLASS(NegHalfLogLayer): public LayerDecorator {
+public:
+    /**
+     * @brief The constructor creates a builder with the name
+     * @param name Layer name
+     */
+    explicit NegHalfLogLayer(const std::string& name = "");
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer pointer to generic builder
+     */
+    explicit NegHalfLogLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit NegHalfLogLayer(const Layer::CPtr& layer);
+    /**
+     * @brief Sets the name for the layer
+     * @param name Layer name
+     * @return reference to layer builder
+     */
+    NegHalfLogLayer& setName(const std::string& name);
+
+    /**
+     * @brief Returns port with shapes for the layer
+     * @return Port with shapes
+     */
+    const Port& getPort() const;
+    /**
+     * @brief Sets port shapes for the layer
+     * @param port Port with shapes
+     * @return reference to layer builder
+     */
+    NegHalfLogLayer& setPort(const Port& port);
+};
+
+}  // namespace Builder
+}  // namespace InferenceEngine
--- a/inference-engine/include/gna/gna_config.hpp
+++ b/inference-engine/include/gna/gna_config.hpp
@@ -56,7 +56,7 @@ DECLARE_GNA_CONFIG_KEY(SCALE_FACTOR);
 * currently supported values are I16, I8
 */
 DECLARE_GNA_CONFIG_KEY(PRECISION);
-
+DECLARE_GNA_CONFIG_KEY(IDENTITY_SCALE_FACTOR);

 /**
 * @brief if turned on, dump GNA firmware model into specified file
--- a/inference-engine/include/ie_builders.hpp
+++ b/inference-engine/include/ie_builders.hpp
@@ -50,6 +50,11 @@
 #include <builders/ie_roi_pooling_layer.hpp>
 #include <builders/ie_scale_shift_layer.hpp>
 #include <builders/ie_sigmoid_layer.hpp>
+#include <builders/ie_log_layer.hpp>
+#include <builders/ie_neghalf_log_layer.hpp>
+#include <builders/ie_exp_layer.hpp>
+#include <builders/ie_divbyn_layer.hpp>
+#include <builders/ie_identity_layer.hpp>
 #include <builders/ie_simpler_nms_layer.hpp>
 #include <builders/ie_softmax_layer.hpp>
 #include <builders/ie_split_layer.hpp>
--- a/inference-engine/include/ie_layers.h
+++ b/inference-engine/include/ie_layers.h
@@ -2079,4 +2079,127 @@ public:
    virtual ~ScatterLayer();
 };

+class INFERENCE_ENGINE_API_CLASS(TanHLayer): public CNNLayer {
+public:
+    /**
+    * @brief A default constructor. Creates a new ReLULayer instance and initializes layer parameters with the given values.
+    * @param prms Initial layer parameters
+    */
+    //explicit TanHLayer(const LayerParams &prms) : CNNLayer(prms), negative_slope(0.0f) {}
+
+    using CNNLayer::CNNLayer;
+
+    virtual ~TanHLayer();
+
+    /**
+     * @brief Negative slope is used to takle negative inputs instead of setting them to 0
+     */
+    float negative_slope;
+};
+
+class INFERENCE_ENGINE_API_CLASS(SigmoidLayer): public CNNLayer {
+public:
+    /**
+    * @brief A default constructor. Creates a new ReLULayer instance and initializes layer parameters with the given values.
+    * @param prms Initial layer parameters
+    */
+    //explicit SigmoidLayer(const LayerParams &prms) : CNNLayer(prms), negative_slope(0.0f) {}
+
+    using CNNLayer::CNNLayer;
+
+    virtual ~SigmoidLayer();
+
+    /**
+     * @brief Negative slope is used to takle negative inputs instead of setting them to 0
+     */
+    float negative_slope;
+ };
+
+class INFERENCE_ENGINE_API_CLASS(DivByNLayer) : public CNNLayer {
+public:
+    /**
+    * @brief A default constructor. Creates a new ReLULayer instance and initializes layer parameters with the given values.
+    * @param prms Initial layer parameters
+    */
+    // explicit LogLayer(const LayerParams &prms) : CNNLayer(prms), negative_slope(0.0f) {}
+
+    using CNNLayer::CNNLayer;
+
+    virtual ~DivByNLayer();
+
+    /**
+     * @brief Negative slope is used to takle negative inputs instead of setting them to 0
+     */
+    float negative_slope;
+};
+class INFERENCE_ENGINE_API_CLASS(IdentityLayer) : public CNNLayer {
+public:
+    /**
+    * @brief A default constructor. Creates a new ReLULayer instance and initializes layer parameters with the given values.
+    * @param prms Initial layer parameters
+    */
+    // explicit LogLayer(const LayerParams &prms) : CNNLayer(prms), negative_slope(0.0f) {}
+
+    using CNNLayer::CNNLayer;
+
+    virtual ~IdentityLayer();
+
+    /**
+     * @brief Negative slope is used to takle negative inputs instead of setting them to 0
+     */
+    float negative_slope;
+};
+class INFERENCE_ENGINE_API_CLASS(ExpLayer) : public CNNLayer {
+public:
+    /**
+    * @brief A default constructor. Creates a new ReLULayer instance and initializes layer parameters with the given values.
+    * @param prms Initial layer parameters
+    */
+    // explicit LogLayer(const LayerParams &prms) : CNNLayer(prms), negative_slope(0.0f) {}
+
+    using CNNLayer::CNNLayer;
+
+    virtual ~ExpLayer();
+    /**
+     * @brief Negative slope is used to takle negative inputs instead of setting them to 0
+     */
+    float negative_slope;
+};
+
+class INFERENCE_ENGINE_API_CLASS(NegHalfLogLayer) : public CNNLayer {
+public:
+    /**
+    * @brief A default constructor. Creates a new ReLULayer instance and initializes layer parameters with the given values.
+    * @param prms Initial layer parameters
+    */
+    // explicit LogLayer(const LayerParams &prms) : CNNLayer(prms), negative_slope(0.0f) {}
+
+    using CNNLayer::CNNLayer;
+
+    virtual ~NegHalfLogLayer();
+
+    /**
+     * @brief Negative slope is used to takle negative inputs instead of setting them to 0
+     */
+    float negative_slope;
+};
+
+class INFERENCE_ENGINE_API_CLASS(LogLayer) : public CNNLayer {
+public:
+    /**
+    * @brief A default constructor. Creates a new ReLULayer instance and initializes layer parameters with the given values.
+    * @param prms Initial layer parameters
+    */
+    // explicit LogLayer(const LayerParams &prms) : CNNLayer(prms), negative_slope(0.0f) {}
+
+    using CNNLayer::CNNLayer;
+
+    virtual ~LogLayer();
+
+    /**
+     * @brief Negative slope is used to takle negative inputs instead of setting them to 0
+     */
+    float negative_slope;
+  };
+ 
 }  // namespace InferenceEngine
--- a/inference-engine/include/ie_plugin_config.hpp
+++ b/inference-engine/include/ie_plugin_config.hpp
@@ -235,6 +235,7 @@ DECLARE_CONFIG_KEY(GPU_THROUGHPUT_STREAMS);
 * PluginConfigParams::YES or PluginConfigParams::NO
 */
 DECLARE_CONFIG_KEY(PERF_COUNT);
+DECLARE_CONFIG_KEY(IDENTITY_SCALE_FACTOR);

 /**
 * @brief The key defines dynamic limit of batch processing.
--- a/inference-engine/samples/common/format_reader/bmp.h
+++ b/inference-engine/samples/common/format_reader/bmp.h
@@ -23,14 +23,14 @@ class BitMap : public Reader {
 private:
    static Register<BitMap> reg;

-    typedef struct {
+    typedef struct BmpHeader{
        unsigned short type   = 0u;              /* Magic identifier            */
        unsigned int size     = 0u;              /* File size in bytes          */
        unsigned int reserved = 0u;
        unsigned int offset   = 0u;              /* Offset to image data, bytes */
    } BmpHeader;

-    typedef struct {
+    typedef struct BmpInfoHeader{
        unsigned int size = 0u;                  /* Header size in bytes      */
        int width = 0, height = 0;               /* Width and height of image */
        unsigned short planes = 0u;              /* Number of colour planes   */
--- a/inference-engine/src/gna_plugin/CMakeLists.txt
+++ b/inference-engine/src/gna_plugin/CMakeLists.txt
@@ -26,9 +26,13 @@ else()
 endif()

 #saving rpath to GNA shared library be used by CI
-log_rpath_from_dir(GNA ${libGNA_LIBRARIES_BASE_PATH})
+#log_rpath_from_dir(GNA ${libGNA_LIBRARIES_BASE_PATH})

-target_link_libraries(${TARGET_NAME} PRIVATE inference_engine ${INTEL_ITT_LIBS} Threads::Threads libGNA)
+if(TARGET_OS STREQUAL "CHROMEOS")
+    target_link_libraries(${TARGET_NAME} PRIVATE inference_engine ${INTEL_ITT_LIBS} Threads::Threads gna)
+else()
+    target_link_libraries(${TARGET_NAME} PRIVATE inference_engine ${INTEL_ITT_LIBS} Threads::Threads libGNA)
+endif()
 target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
 target_compile_definitions(${TARGET_NAME}
    PRIVATE
@@ -45,7 +49,11 @@ target_compile_definitions(${TARGET_NAME}_test_static
            GNA_LIB_VER=${GNA_LIBRARY_VERSION_NUMBER}
            INTEGER_LOW_P
            USE_STATIC_IE)
-target_link_libraries(${TARGET_NAME}_test_static PUBLIC inference_engine_preproc_s libGNA::API)
+if(TARGET_OS STREQUAL "CHROMEOS")
+    target_link_libraries(${TARGET_NAME}_test_static PUBLIC inference_engine_preproc_s gna)
+else()
+    target_link_libraries(${TARGET_NAME}_test_static PUBLIC inference_engine_preproc_s libGNA::API)
+endif()
 target_include_directories(${TARGET_NAME}_test_static PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
 set_target_properties(${TARGET_NAME}_test_static PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}_test_static)

--- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
+++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
@@ -309,6 +309,65 @@ void GNAPluginNS::backend::AMIntelDNN::InitPiecewiseLinearComponentPrivate(intel
    }
 }

+void GNAPluginNS::backend::AMIntelDNN::InitInterleaveComponentPrivate(intel_dnn_component_t &comp,
+                                                                      uint32_t num_rows_in,
+                                                                      uint32_t num_columns_in,
+                                                                      uint32_t num_bytes_per_input,
+                                                                      uint32_t num_bytes_per_output,
+                                                                      float output_scale_factor,
+                                                                      void *&ptr_inputs,
+                                                                      void *&ptr_outputs,
+                                                                      bool postInitMem) {
+    comp.num_rows_in = num_rows_in;
+    comp.num_columns_in = num_columns_in;
+    comp.num_rows_out = num_columns_in;
+    comp.num_columns_out = num_rows_in;
+    comp.num_bytes_per_input = num_bytes_per_input;
+    comp.num_bytes_per_output = num_bytes_per_output;
+    comp.operation = kDnnInterleaveOp;
+    comp.macro_operation = kDnnMacroOpNone;
+    comp.orientation_in = kDnnNonInterleavedOrientation;
+    comp.orientation_out = kDnnInterleavedOrientation;
+    comp.output_scale_factor = output_scale_factor;
+    comp.input_scale_factor = output_scale_factor;
+    if (!postInitMem) {
+        comp.ptr_inputs = ptr_inputs;
+        comp.ptr_outputs = ptr_outputs;
+    } else {
+        ptr_inputs = &comp.ptr_inputs;
+        ptr_outputs = &comp.ptr_outputs;
+    }
+}
+
+void GNAPluginNS::backend::AMIntelDNN::InitDeinterleaveComponentPrivate(intel_dnn_component_t &comp,
+                                                                        uint32_t num_rows_in,
+                                                                        uint32_t num_columns_in,
+                                                                        uint32_t num_bytes_per_input,
+                                                                        uint32_t num_bytes_per_output,
+                                                                        float output_scale_factor,
+                                                                        void *&ptr_inputs,
+                                                                        void *&ptr_outputs,
+                                                                        bool postInitMem) {
+    comp.num_rows_in = num_rows_in;
+    comp.num_columns_in = num_columns_in;
+    comp.num_rows_out = num_columns_in;
+    comp.num_columns_out = num_rows_in;
+    comp.num_bytes_per_input = num_bytes_per_input;
+    comp.num_bytes_per_output = num_bytes_per_output;
+    comp.operation = kDnnDeinterleaveOp;
+    comp.macro_operation = kDnnMacroOpNone;
+    comp.orientation_in = kDnnInterleavedOrientation;
+    comp.orientation_out = kDnnInterleavedOrientation;
+    comp.output_scale_factor = output_scale_factor;
+    comp.input_scale_factor = output_scale_factor;
+    if (!postInitMem) {
+        comp.ptr_inputs = ptr_inputs;
+        comp.ptr_outputs = ptr_outputs;
+    } else {
+        ptr_inputs = &comp.ptr_inputs;
+        ptr_outputs = &comp.ptr_outputs;
+    }
+}

 void GNAPluginNS::backend::AMIntelDNN::Propagate() {
    for (uint32_t i = 0; i < component.size(); i++) {
--- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp
+++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp
@@ -192,6 +192,46 @@ public:
                                            ptr_segments,
                                            true);
    }
+    template<class A, class B>
+    static void InitDeinterleaveComponent(intel_dnn_component_t &cmp,
+                                          uint32_t num_rows_in,
+                                          uint32_t num_columns_in,
+                                          uint32_t num_bytes_per_input,
+                                          uint32_t num_bytes_per_output,
+                                          float output_scale_factor,
+                                          A *&ptr_inputs,
+                                          B *&ptr_outputs) {
+        InitDeinterleaveComponentPrivate(cmp,
+                                         num_rows_in,
+                                         num_columns_in,
+                                         num_bytes_per_input,
+                                         num_bytes_per_output,
+                                         output_scale_factor,
+                                         (void *&) ptr_inputs,
+                                         (void *&) ptr_outputs,
+                                         true);
+    }
+
+    template<class A, class B>
+    static void InitInterleaveComponent(intel_dnn_component_t &cmp,
+                                        uint32_t num_rows_in,
+                                        uint32_t num_columns_in,
+                                        uint32_t num_bytes_per_input,
+                                        uint32_t num_bytes_per_output,
+                                        float output_scale_factor,
+                                        A *&ptr_inputs,
+                                        B *&ptr_outputs) {
+        InitInterleaveComponentPrivate(cmp,
+                                       num_rows_in,
+                                       num_columns_in,
+                                       num_bytes_per_input,
+                                       num_bytes_per_output,
+                                       output_scale_factor,
+                                       (void *&) ptr_inputs,
+                                       (void *&) ptr_outputs,
+                                       true);
+    }
+


    template<class A, class B>
@@ -342,6 +382,26 @@ private:
                                                    void *&ptr_outputs,
                                                    intel_pwl_segment_t *ptr_segments,
                                                    bool postInitMem);
+    static void InitInterleaveComponentPrivate(intel_dnn_component_t &cmp,
+                                                    uint32_t num_rows_in,
+                                                    uint32_t num_columns_in,
+                                                    uint32_t num_bytes_per_input,
+                                                    uint32_t num_bytes_per_output,
+                                                    float output_scale_factor,
+                                                    void *&ptr_inputs,
+                                                    void *&ptr_outputs,
+                                                    bool postInitMem);
+
+    static void InitDeinterleaveComponentPrivate(intel_dnn_component_t &cmp,
+                                                    uint32_t num_rows_in,
+                                                    uint32_t num_columns_in,
+                                                    uint32_t num_bytes_per_input,
+                                                    uint32_t num_bytes_per_output,
+                                                    float output_scale_factor,
+                                                    void *&ptr_inputs,
+                                                    void *&ptr_outputs,
+                                                    bool postInitMem);
+

    static void InitConvolutional1DComponentPrivate(intel_dnn_component_t &comp,
                                                    uint32_t num_rows_in,
--- a/inference-engine/src/gna_plugin/backend/dnn_types.h
+++ b/inference-engine/src/gna_plugin/backend/dnn_types.h
@@ -19,6 +19,10 @@ enum DnnActivationType : uint8_t {
    kActIdentity,
    kActKaldiLstmClipping,
    kActCustom,
+    kActExp,
+    kActLog,
+    kActNegHalfLog,
+    kActDivByN,
    kActNumType
 };

@@ -47,7 +51,11 @@ static const char *intel_dnn_activation_name[kActNumType] = {
        "kActLeakyRelu",
        "kActIdentity",
        "kActKaldiLstmClipping",
-        "kActCustom"
+        "kActCustom",
+        "kActExp",
+        "kActLog",
+        "kActNegHalfLog",
+        "kActDivByN"
 };

 typedef enum DnnSoftmaxType {
--- a/inference-engine/src/gna_plugin/backend/make_pwl.cpp
+++ b/inference-engine/src/gna_plugin/backend/make_pwl.cpp
@@ -2,6 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //

+#define PWL_FROM_FILE
+
 #include <vector>
 #include <iostream>

@@ -16,9 +18,13 @@ void make_gna_pwl(const DnnActivation  fun,
                  const double u_bound,
                  const double in_scale,
                  const double out_scale,
-                  std::vector<intel_pwl_segment_t> &gna_pwl) {
+                  std::vector<intel_pwl_segment_t> &gna_pwl,
+                  const uint32_t n) {
    pwl_gna_slope_scale_t s;
    uint32_t pwl_size = static_cast<int32_t>(pwl.size());
+    gnalog() << "make_gna_pwl\n";
+    gnalog() << "   in_scale  " << in_scale << "\n";
+    gnalog() << "   out_scale " << out_scale << "\n";
    switch (fun) {
        case kActSigmoid:
        case kActTanh: {
@@ -46,7 +52,7 @@ void make_gna_pwl(const DnnActivation  fun,
            gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
            gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index;

-            gnalog() << (gna_pwl[1].xBase/in_scale)
+            gnalog() << ((int32_t)(gna_pwl[1].xBase & XBASEMASK) / in_scale)
                     << " " << (gna_pwl[1].yBase) / out_scale
                     << " " << pwl[0].m
                     << "\n";
@@ -75,6 +81,130 @@ void make_gna_pwl(const DnnActivation  fun,
                     << "\n";
            break;
        }
+        case kActExp: {
+            auto n_segments = static_cast<int32_t> (pwl_size) + 1;
+            gna_pwl.resize(n_segments);
+            // insert extra segment for x values < l_bound
+            gna_pwl[0].xBase = static_cast<int32_t> (INT32_MIN & XBASEMASK);  // zero out the 2 lsb
+            gnalog() << "=========================== Exp Segments ===========================\n";
+            gna_pwl[0].yBase = gna_pwl[1].yBase = 0;
+            gna_pwl[1].xBase = (static_cast<int32_t> (in_scale * (-pwl[0].b / pwl[0].m))) & XBASEMASK;
+            gna_pwl[0].slope = 0;
+
+            gnalog() << (gna_pwl[0].xBase) / in_scale
+                << " " << (gna_pwl[0].yBase) / out_scale
+                << " " << 0.0
+                << "\n";
+
+            s = gna_slope(pwl[0].m, in_scale, out_scale);
+            gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
+            gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index;
+
+            gnalog() << (gna_pwl[1].xBase / in_scale)
+                << " " << (gna_pwl[1].yBase) / out_scale
+                << " " << pwl[0].m
+                << "\n";
+
+            for (uint32_t i = 1; i < pwl_size - 1; ++i) {
+                s = gna_slope(pwl[i].m, in_scale, out_scale);
+                gna_pwl[i + 1].xBase = (static_cast<int32_t> (in_scale * pwl[i].alpha)) & XBASEMASK;
+                gna_pwl[i + 1].yBase = FLOAT_TO_INT16(pwl[i].beta * out_scale);
+                gna_pwl[i + 1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
+                gna_pwl[i + 1].xBase = gna_pwl[i + 1].xBase | s.slope_scale_index;
+
+                gnalog() << (pwl[i].alpha)
+                    << " " << pwl[i].beta
+                    << " " << pwl[i].m
+                    << "\n";
+            }
+            // insert extra segment for xvalues > u_bound
+            gna_pwl[n_segments - 1].xBase =
+                ((uint32_t)(in_scale * (INT16_MAX/out_scale - pwl[pwl_size - 2].b) / pwl[pwl_size - 2].m)) & XBASEMASK;
+            gna_pwl[n_segments - 1].yBase = INT16_MAX;
+            gna_pwl[n_segments - 1].slope = 0;
+
+            gnalog() << (gna_pwl[n_segments - 1].xBase / in_scale)
+                << " " << 1.0
+                << " " << 0.0
+                << "\n";
+            break;
+        }
+        case kActLog: {
+            auto n_segments = static_cast<int32_t> (pwl_size);
+            gna_pwl.resize(n_segments);
+            // insert extra segment for x values < l_bound
+            gna_pwl[0].xBase = static_cast<int32_t> (INT32_MIN & XBASEMASK);  // zero out the 2 lsb
+            gnalog() << "=========================== Exp Segments ===========================\n";
+            gna_pwl[0].yBase = gna_pwl[1].yBase = INT16_MIN;
+            gna_pwl[1].xBase = (static_cast<int32_t> (1 + ~XBASEMASK));  // smallest representable value
+            gna_pwl[0].slope = 0;
+
+            gnalog() << (gna_pwl[0].xBase) / in_scale
+                << " " << (gna_pwl[0].yBase) / out_scale
+                << " " << 0.0
+                << "\n";
+
+            s = gna_slope(pwl[0].m, in_scale, out_scale);
+            gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
+            gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index;
+
+            gnalog() << (gna_pwl[1].xBase / in_scale)
+                << " " << (gna_pwl[1].yBase) / out_scale
+                << " " << pwl[0].m
+                << "\n";
+
+            for (uint32_t i = 1; i < pwl_size - 1; ++i) {
+                s = gna_slope(pwl[i].m, in_scale, out_scale);
+                gna_pwl[i + 1].xBase = (static_cast<int32_t> (in_scale * pwl[i].alpha)) & XBASEMASK;
+                gna_pwl[i + 1].yBase = FLOAT_TO_INT16(pwl[i].beta * out_scale);
+                gna_pwl[i + 1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
+                gna_pwl[i + 1].xBase = gna_pwl[i + 1].xBase | s.slope_scale_index;
+
+                gnalog() << (pwl[i].alpha)
+                    << " " << pwl[i].beta
+                    << " " << pwl[i].m
+                    << "\n";
+            }
+            break;
+        }
+        case kActNegHalfLog: {
+            auto n_segments = static_cast<int32_t> (pwl_size);
+            gna_pwl.resize(n_segments);
+            // insert extra segment for x values < l_bound
+            gna_pwl[0].xBase = static_cast<int32_t> (INT32_MIN & XBASEMASK);  // zero out the 2 lsb
+            gnalog() << "=========================== NegHalfLog Segments ===========================\n";
+            gna_pwl[0].yBase = gna_pwl[1].yBase = INT16_MAX;
+            gna_pwl[1].xBase = (static_cast<int32_t> (1 + ~XBASEMASK));  // smallest representable value
+            gna_pwl[0].slope = 0;
+
+            gnalog() << gna_pwl[0].xBase / in_scale
+                << " " << (gna_pwl[0].yBase) / out_scale
+                << " " << 0.0
+                << "\n";
+
+            s = gna_slope(pwl[0].m, in_scale, out_scale);
+            gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
+            gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index;
+
+            gnalog() << ((gna_pwl[1].xBase & XBASEMASK) / in_scale)
+                << " " << (gna_pwl[1].yBase) / out_scale
+                << " " << pwl[0].m
+                << "\n";
+
+            for (uint32_t i = 1; i < pwl_size - 1; ++i) {
+                s = gna_slope(pwl[i].m, in_scale, out_scale);
+                gna_pwl[i + 1].xBase = (static_cast<int32_t> (in_scale * pwl[i].alpha)) & XBASEMASK;
+                gna_pwl[i + 1].yBase = FLOAT_TO_INT16(pwl[i].beta * out_scale);
+                gna_pwl[i + 1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
+                gna_pwl[i + 1].xBase = gna_pwl[i + 1].xBase | s.slope_scale_index;
+
+                gnalog() << (pwl[i].alpha)
+                    << " " << pwl[i].beta
+                    << " " << pwl[i].m
+                    << "\n";
+            }
+            break;
+        }
        case kActRelu:
        case kActLeakyRelu: {
            auto n_segments = 2;
@@ -106,7 +236,8 @@ void make_gna_pwl(const DnnActivation  fun,
            break;
        }
        case kActIdentity:
-        case kActKaldiLstmClipping: {
+        case kActKaldiLstmClipping:
+        case kActDivByN: {
            int32_t x_lower = INT32_MIN;
            int32_t x_upper = INT32_MAX;
            int16_t y_lower = INT16_MIN;
@@ -130,14 +261,20 @@ void make_gna_pwl(const DnnActivation  fun,
                        x_upper = FLOAT_TO_INT32(y_upper  * in_scale / out_scale);
                    }
                }
-            } else {
+            } else if (fun == kActIdentity) {
                gnalog() << "=========================== Identity Segments ===========================\n";
                if (x_lower < y_lower * in_scale / out_scale) x_lower = FLOAT_TO_INT32(y_lower * in_scale / out_scale);
                if (x_upper > y_upper * in_scale / out_scale) x_upper = FLOAT_TO_INT32(y_upper * in_scale / out_scale);
                if (y_lower < x_lower * out_scale / in_scale) y_lower = FLOAT_TO_INT16(x_lower * out_scale / in_scale);
                if (y_upper > x_upper * out_scale / in_scale) y_upper = FLOAT_TO_INT16(x_upper * out_scale / in_scale);
+            } else {
+                gnalog() << "=========================== DivByN Segments ===========================\n";
+                if (x_lower < y_lower * (float)n * in_scale / out_scale) x_lower = FLOAT_TO_INT32(y_lower * (float)n * in_scale / out_scale);
+                if (x_upper > y_upper * (float)n * in_scale / out_scale) x_upper = FLOAT_TO_INT32(y_upper * (float)n * in_scale / out_scale);
+                if (y_lower < x_lower * (1.0 / n) * out_scale / in_scale) y_lower = FLOAT_TO_INT16(x_lower * (1.0 / n) * out_scale / in_scale);
+                if (y_upper > x_upper* (1.0 / n) * out_scale / in_scale) y_upper = FLOAT_TO_INT16(x_upper * (1.0 / n) * out_scale / in_scale);
            }
-            gna_pwl.resize(n_segments);
+ 	    gna_pwl.resize(n_segments);
            gna_pwl[0].xBase = INT32_MIN & XBASEMASK;  // zero out the 2 lsb
            gna_pwl[0].yBase = y_lower;
            gna_pwl[0].slope = 0;
@@ -148,13 +285,19 @@ void make_gna_pwl(const DnnActivation  fun,

            gna_pwl[1].xBase = x_lower & XBASEMASK;  // zero out the 2 lsb
            gna_pwl[1].yBase = y_lower;
-            s = gna_slope(1.0, in_scale, out_scale);
+            if (fun == kActDivByN) {
+                s = gna_slope(1.0 / n, in_scale, out_scale);
+            } else {
+                s = gna_slope(1.0, in_scale, out_scale);
+            }
            gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
            gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index;
-            gnalog() << gna_pwl[1].xBase / in_scale
-                     << " " << gna_pwl[1].yBase / out_scale
-                     << " " << 1.0
-                     << "\n";
+            int32_t round_scale = FLOAT_TO_INT16(0.5f / s.slope) & XBASEMASK;
+            gna_pwl[1].xBase = (gna_pwl[1].xBase - round_scale) | s.slope_scale_index;
+            gnalog() << (int32_t)(gna_pwl[1].xBase & XBASEMASK) / in_scale
+                    << " " << gna_pwl[1].yBase / out_scale
+                    << " " << 1.0
+                    << "\n";

            if (INT32_MAX > x_upper) {  // need a right segment
                gna_pwl.push_back({
@@ -162,10 +305,10 @@ void make_gna_pwl(const DnnActivation  fun,
                                          y_upper,
                                          0 });

-                gnalog() << gna_pwl[n_segments].xBase / in_scale
-                         << " " << gna_pwl[n_segments].yBase / out_scale
-                         << " " << 0
-                         << "\n";
+                gnalog() << (x_upper & XBASEMASK) / in_scale
+                    << " " << gna_pwl[n_segments].yBase / out_scale
+                    << " " << 0
+                    << "\n";
            }
            break;
        }
--- a/inference-engine/src/gna_plugin/backend/make_pwl.hpp
+++ b/inference-engine/src/gna_plugin/backend/make_pwl.hpp
@@ -14,4 +14,5 @@ void make_gna_pwl(const DnnActivation  fun,
                  const double u_bound,
                  const double in_scale,
                  const double out_scale,
-                  std::vector<intel_pwl_segment_t> &gna_pwl);
+                  std::vector<intel_pwl_segment_t> &gna_pwl,
+                  const uint32_t n);
--- a/inference-engine/src/gna_plugin/frontend/quantization.cpp
+++ b/inference-engine/src/gna_plugin/frontend/quantization.cpp
@@ -6,6 +6,8 @@
 #include <iostream>
 #include <details/ie_exception.hpp>
 #include "quantization.h"
+#include <xmmintrin.h>
+#include <smmintrin.h>

 void QuantizeAffine16(float *ptr_float_weights,
                      float *ptr_float_biases,
@@ -20,7 +22,7 @@ void QuantizeAffine16(float *ptr_float_weights,
                      uint32_t num_columns_padded) {
    uint32_t num_saturate = 0;

-    if (*ptr_weight_scale_factor == 1.0) {
+/*    if (*ptr_weight_scale_factor == 1.0) {
        // scale factor for weights is not calculated yet
        float mean_weight = 0.0;
        float mean_weight_squared = 0.0;
@@ -48,7 +50,7 @@ void QuantizeAffine16(float *ptr_float_weights,
            *ptr_weight_scale_factor = static_cast<float>(MAX_VAL_2B_WEIGHT) / max_weight;
        }
        *ptr_output_scale_factor = input_scale_factor * *ptr_weight_scale_factor;
-    }
+    }*/

    for (uint32_t row = 0; row < num_rows; row++) {
        for (uint32_t col = 0; col < num_columns; col++) {
@@ -104,26 +106,156 @@ void QuantizeAffine16(float *ptr_float_weights,
    }
 }

-float ScaleFactorForQuantization(void *ptr_float_memory, float target_max, size_t num_elements) {
-    float *ptr_float_feat = reinterpret_cast<float *>(ptr_float_memory);
-    float max = 0.0;
+__attribute__ ((target ("default")))
+float ScaleFactorForQuantization(void *ptr_float_memory, float target_max, size_t num_elements)
+{
+    float *ptr_float_feat = (float*)ptr_float_memory;
+    float min = 0.0;
+    float buf[4];
    float scale_factor;
+    float abs_f;
+    __m128 zero = _mm_setzero_ps();
+    __m128 total_abs = _mm_setzero_ps();
+    char not_aligned_buffer[32];
+    __m128 v, neg_v, abs;
+    __m128 v2, neg_v2, abs2;

-    for (size_t i = 0; i < num_elements; i++) {
-        if (fabs(ptr_float_feat[i]) > max) {
-            max = fabs(ptr_float_feat[i]);
+
+    uint32_t moves = num_elements >>3;
+    uint32_t mod = num_elements % 8;
+    uint32_t i;
+
+    v = _mm_load_ps(ptr_float_feat);
+    neg_v = _mm_sub_ps(zero, v);
+    abs = _mm_max_ps(v, neg_v);
+    total_abs = _mm_max_ps(total_abs, abs);
+
+    for (i = 0; i<moves; i++, ptr_float_feat +=8)
+    {
+        v = _mm_load_ps(ptr_float_feat);
+        v2 = _mm_load_ps(ptr_float_feat+4);
+        neg_v = _mm_sub_ps(zero, v);
+        abs = _mm_max_ps(v, neg_v);
+        neg_v2 = _mm_sub_ps(zero, v2);
+        abs2 = _mm_max_ps(v2, neg_v2);
+        total_abs = _mm_min_ps(total_abs, abs);
+        total_abs = _mm_min_ps(total_abs, abs2);
+    }
+    _mm_storeu_ps(buf, total_abs);
+    float single_min_lo = buf[0] < buf[1] ? buf[0] : buf[1];
+    float single_min_hi = buf[2] < buf[3] ? buf[2] : buf[3];
+    float single_min = single_min_lo < single_min_hi ? single_min_lo : single_min_hi;
+
+    for (i = 0; i < mod; i++)
+    {
+        abs_f = fabs(ptr_float_feat[i]);
+        if (abs_f < min) {
+            min = abs_f;
        }
    }

-    if (max == 0) {
-        scale_factor = -1.0f;  // need to handle all zeros as a special case
-    } else {
-        scale_factor = target_max / max;
+    return(single_min != 0 ? (single_min < 1.0 ? 1 / single_min : 1.0f) : 1.0f);
+}
+
+float accessmember(__m128 v, int index)
+{
+    union vec{ __m128 sse;
+        float f[4];
+    };
+
+    vec U;
+    U.sse = v;
+    return U.f[index];
+}
+
+__attribute__ ((target ("default")))
+void QuantizeBias8(float *ptr_float_biases, intel_compound_bias_t  *ptr_int_biases, float *ptr_output_scale_factor, uint32_t num_rows)
+{
+    float *ptr_float_feat = (float*)ptr_float_biases;
+    intel_compound_bias_t *ptr_int = (intel_compound_bias_t*)ptr_int_biases;
+
+    uint32_t moves = num_rows / 4;
+    uint32_t mod = num_rows % 4;
+    uint32_t i, j;
+
+    __m128 v, zero, half, neg_half, scale_factores, mask, rounding_values, min, max, values;
+
+#ifdef ROUND_AND_CAST
+    __m128i tmp;
+#endif
+
+    zero = _mm_setzero_ps();
+    half = _mm_set1_ps(0.5f);
+    neg_half = _mm_set1_ps(-0.5f);
+    max = _mm_set1_ps(2147483647.0f);
+    min = _mm_set1_ps(-2147483648.0f);
+
+    scale_factores = _mm_set1_ps(*ptr_output_scale_factor);
+
+    for (i = 0; i < moves; i++, ptr_float_feat += 4, ptr_int += 4) {
+
+        v = _mm_load_ps(ptr_float_feat);
+
+        //rounding_values = (v>0) ? 0.5f : -0.5f;
+        mask = _mm_min_ps(v, zero);
+        rounding_values = _mm_blendv_ps(half, neg_half, mask);
+
+        // values = v * scale_factores +  rounding_values
+        values = _mm_mul_ps(v, scale_factores);
+        values = _mm_add_ps(values, rounding_values);
+
+        // shrink to <-2147483648.0f, 2147483647.0f>
+        values = _mm_min_ps(values, max);
+        values = _mm_max_ps(values, min);
+
+#ifdef ROUND_AND_CAST
+        // round and cast float to int16 ... much faster  than "only cast" in MS compiler ??
+        tmp = _mm_cvtps_epi32(values);
+        ptr_int[0].bias = tmp.m128i_i32[0];
+        ptr_int[1].bias = tmp.m128i_i32[1];
+        ptr_int[2].bias = tmp.m128i_i32[2];
+        ptr_int[3].bias = tmp.m128i_i32[3];
+#else
+        // only cast float to int16
+        for (j = 0; j < 4; j++)
+            ptr_int[j].bias = (int32_t)accessmember(values, j);
+#endif
    }

-    return (scale_factor);
+    for (i = 0; i < mod; i++) {
+        float rounding_value = (ptr_float_feat[i]>0) ? 0.5f : -0.5f;
+        float value = ptr_float_feat[i] * *ptr_output_scale_factor + rounding_value;
+        if (value > 2147483647.0) {
+            ptr_int[i].bias = 2147483647L;
+        }
+        else if (value < -2147483648.0) {
+            ptr_int[i].bias = -2147483648LL;
+        }
+        else {
+            ptr_int[i].bias = (int32_t)value;
+        }
+    }
 }

+/*__attribute__ ((target ("default")))
+void QuantizeBias8(float *ptr_float_biases, intel_compound_bias_t  *ptr_int_biases, float *ptr_output_scale_factor, uint32_t num_rows)
+{
+    uint32_t num_saturate = 0;
+    for (uint32_t j = 0; j < num_rows; j++) {
+        float rounding_value = (ptr_float_biases[j] > 0) ? 0.5f : -0.5f;
+        float value = ptr_float_biases[j] * *ptr_output_scale_factor + rounding_value;
+        if (value > 2147483647.0) {
+            ptr_int_biases[j].bias = 2147483647L;
+            num_saturate++;
+        } else if (value < -2147483648.0) {
+            ptr_int_biases[j].bias = -2147483648LL;
+            num_saturate++;
+        } else {
+            ptr_int_biases[j].bias = (int32_t) value;
+        }
+    }
+}*/
+
 void QuantizeVector16(float *ptr_float_memory, int16_t *ptr_int_memory, uint32_t num_elements, float scale_factor) {
    float *ptr_float_feat = reinterpret_cast<float *>(ptr_float_memory);
    uint32_t num_saturate = 0;
@@ -158,7 +290,7 @@ void QuantizeAffine8(float *ptr_float_weights, float *ptr_float_biases,
    }
    uint32_t num_saturate = 0;

-    if (*ptr_weight_scale_factor == 1.0) {
+    /*if (*ptr_weight_scale_factor == 1.0) {
        // scale factor for weights is not calculated yet
        float mean_weight = 0.0;
        float mean_weight_squared = 0.0;
@@ -191,7 +323,7 @@ void QuantizeAffine8(float *ptr_float_weights, float *ptr_float_biases,
        // 4. quantize and store scaled row
        *ptr_weight_scale_factor = MAX_OUT_MULTIPLIER * *ptr_weight_scale_factor;  //  increase dynamic range by max multiplier
        *ptr_output_scale_factor = input_scale_factor * *ptr_weight_scale_factor;
-    }
+    }*/
    float valueAcc = 0.0;
    for (uint32_t row = 0; row < num_rows; row++) {
        float scaled_row_max = 0;
@@ -237,19 +369,7 @@ void QuantizeAffine8(float *ptr_float_weights, float *ptr_float_biases,

    // bias value of the bas will be only used when input bias provided
    if (ptr_float_biases != nullptr) {
-        for (uint32_t j = 0; j < num_rows; j++) {
-            float rounding_value = (ptr_float_biases[j] > 0) ? 0.5f : -0.5f;
-            float value = ptr_float_biases[j] * *ptr_output_scale_factor + rounding_value;
-            if (value > 2147483647.0) {
-                ptr_int_biases[j].bias = 2147483647L;
-                num_saturate++;
-            } else if (value < -2147483648.0) {
-                ptr_int_biases[j].bias = -2147483648LL;
-                num_saturate++;
-            } else {
-                ptr_int_biases[j].bias = (int32_t) value;
-            }
-        }
+        QuantizeBias8(ptr_float_biases, ptr_int_biases, ptr_output_scale_factor, num_rows);
    }

    if (num_saturate > 0) {
--- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
+++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
@@ -18,6 +18,7 @@
 #include "gna_slope_scale.h"

 namespace GNAPluginNS {
+extern float identity_SF;
 namespace frontend {
 struct ScaleFactorUpdateResult {
    InferenceEngine::CNNLayer *restartLayer = nullptr;
@@ -53,7 +54,7 @@ template<>
 class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
 private :
    const float activation_scale_factor = 2048.f;
-    const float identity_scale_factor = 2049.0f;
+    const float identity_scale_factor = identity_SF;
    const float k = 5;
    const float k_identity = 6;

@@ -472,7 +473,7 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
                quant->_weights_quant.scale = 1.0f;
            }

-            if (wl->_biases) {
+            /*if (wl->_biases) {
                quant->_bias_quant.scale = ScaleFactorForQuantization(wl->_biases->buffer().as<float *>(),
                                                                      MAX_VAL_4B_BIAS,
                                                                      wl->_biases->size());
@@ -480,7 +481,7 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
                    quant->_bias_quant.scale = std::min(quant->_weights_quant.scale * quant->_src_quant.scale, quant->_bias_quant.scale);
                    quant->_weights_quant.scale = quant->_bias_quant.scale / quant->_src_quant.scale;
                }
-            }
+            }*/

            // TODO: findout why ???
            if (weightsSize == 1) {
@@ -552,7 +553,7 @@ class ScaleFactorPerLayer<InferenceEngine::ConvolutionLayer*> : public ScaleFact
 */
 class ScaleFactorCalculator {
    using Cnt = std::vector<InferenceEngine::CNNLayerPtr>;
-    Cnt  net;
+    Cnt net;
    mutable Cnt::const_iterator idx;
    mutable bool needRestart = false;
    int weightsBytesSize;
--- a/inference-engine/src/gna_plugin/gna2_model_export_helper.cpp
+++ b/inference-engine/src/gna_plugin/gna2_model_export_helper.cpp
@@ -59,8 +59,10 @@ void ExportLdForNoMmu(uint32_t modelId, std::ostream & outStream) {

    status = Gna2ModelExportConfigSetSource(exportConfig, 0, modelId);
    GNADeviceHelper::checkGna2Status(status);
+#if GNA_LIB_VER != 2 && GNA_LIB_VER != 1
    status = Gna2ModelExportConfigSetTarget(exportConfig, Gna2DeviceVersionEmbedded3_0);
    GNADeviceHelper::checkGna2Status(status);
+#endif

    void * ldNoMmu;
    uint32_t ldNoMmuSize;
--- a/inference-engine/src/gna_plugin/gna_device.cpp
+++ b/inference-engine/src/gna_plugin/gna_device.cpp
@@ -24,7 +24,10 @@
 #include "details/ie_exception.hpp"
 #include "gna_plugin_log.hpp"

+std::mutex GNADeviceHelper::acrossPluginsSync{};
+
 uint8_t* GNADeviceHelper::alloc(uint32_t size_requested, uint32_t *size_granted) {
+    std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
    void * memPtr;
 #if GNA_LIB_VER == 1
    memPtr = GNAAlloc(nGNAHandle, size_requested, size_granted);
@@ -41,6 +44,7 @@ uint8_t* GNADeviceHelper::alloc(uint32_t size_requested, uint32_t *size_granted)
 }

 void GNADeviceHelper::free(void * ptr) {
+    std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
 #if GNA_LIB_VER == 1
    GNAFree(nGNAHandle);
 #else
@@ -53,6 +57,7 @@ void GNADeviceHelper::free(void * ptr) {
 uint32_t GNADeviceHelper::propagate(const intel_nnet_type_t *pNeuralNetwork,
                   const uint32_t *pActiveIndices,
                   uint32_t nActiveIndices) {
+    std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
    uint32_t reqId;

    nGNAStatus = GNAPropagateForward(nGNAHandle, pNeuralNetwork,
@@ -62,14 +67,17 @@ uint32_t GNADeviceHelper::propagate(const intel_nnet_type_t *pNeuralNetwork,
 }
 #else
 void GNADeviceHelper::setUpActiveList(const uint32_t requestConfigId, uint32_t layerIndex, uint32_t* ptr_active_indices, uint32_t num_active_indices) {
+    std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
    const auto status = Gna2RequestConfigEnableActiveList(requestConfigId, layerIndex, num_active_indices, ptr_active_indices);
    checkGna2Status(status);
 }
 void GNADeviceHelper::propagateSync(const uint32_t requestConfigId) {
+    std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
    wait(propagate(requestConfigId));
 }

 uint32_t GNADeviceHelper::propagate(const uint32_t requestConfigId) {
+    std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
    uint32_t reqId;
    const auto status = Gna2RequestEnqueue(requestConfigId, &reqId);
    checkGna2Status(status);
@@ -77,6 +85,7 @@ uint32_t GNADeviceHelper::propagate(const uint32_t requestConfigId) {
 }

 uint32_t GNADeviceHelper::createModel(const Gna2Model& gnaModel) const {
+    std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
    uint32_t modelId;
    const auto status = Gna2ModelCreate(nGnaDeviceIndex, &gnaModel, &modelId);
    checkGna2Status(status);
@@ -84,11 +93,13 @@ uint32_t GNADeviceHelper::createModel(const Gna2Model& gnaModel) const {
 }

 void GNADeviceHelper::releseModel(const uint32_t model_id) {
+    std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
    const auto status = Gna2ModelRelease(model_id);
    checkGna2Status(status);
 }

 uint32_t GNADeviceHelper::createRequestConfig(const uint32_t model_id) {
+    std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
    uint32_t reqConfId;
    auto status = Gna2RequestConfigCreate(model_id, &reqConfId);
    checkGna2Status(status);
@@ -121,6 +132,7 @@ void GNADeviceHelper::checkGna2Status(Gna2Status status) {
 #endif

 void GNADeviceHelper::wait(uint32_t reqId) {
+    std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
 #if GNA_LIB_VER == 2
    const auto status = Gna2RequestWait(reqId, GNA_TIMEOUT);
    checkGna2Status(status);
@@ -220,9 +232,11 @@ void GNADeviceHelper::open(uint8_t n_threads) {

 void GNADeviceHelper::close() {
 #if GNA_LIB_VER == 1
+    std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
    GNADeviceClose(nGNAHandle);
    nGNAHandle = 0;
 #else
+    std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
    const auto status = Gna2DeviceClose(nGnaDeviceIndex);
    checkGna2Status(status);
 #endif
--- a/inference-engine/src/gna_plugin/gna_device.hpp
+++ b/inference-engine/src/gna_plugin/gna_device.hpp
@@ -30,7 +30,10 @@
 /**
 * holds gna - style handle in RAII way
 */
+typedef uint32_t gna_device_id; // TODO:3:API redesign: remove and use uint32_t instead.
+
 class GNADeviceHelper {
+    static std::mutex acrossPluginsSync;
 #if GNA_LIB_VER == 1
    intel_gna_status_t nGNAStatus = GNA_NOERROR;
    intel_gna_handle_t nGNAHandle = 0;
@@ -159,6 +162,7 @@ public:
    void setOMPThreads(uint8_t const n_threads);

    void initGnaPerfCounters() {
+        std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
 #if GNA_LIB_VER == 1
        nGNAPerfResults = {{0, 0, 0, 0, 0, 0, 0}, {0, 0}, {0, 0, 0}, {0, 0}};
        nGNAPerfResultsTotal = {{0, 0, 0, 0, 0, 0, 0}, {0, 0}, {0, 0, 0}, {0, 0}};
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@@ -803,6 +803,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
    uint32_t num_rows_in = FROM_IR_DIM(inputs, 1);
    uint32_t num_columns_in = FROM_IR_DIM(inputs, 2);
    uint32_t num_rows_out = isDiag ? num_rows_in : FROM_IR_DIM(outputs, 1);
+
    uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
    uint32_t num_padding_out = isDiag ? num_padding : 0;

@@ -1190,6 +1191,7 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
        num_rows = FROM_IR_DIM(inputs, 1);
    }

+    uint32_t non_batch_dim = (orientation == kDnnNonInterleavedOrientation) ? num_columns : num_rows;
    size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()))
        * outputs->getPrecision().size();

@@ -1198,6 +1200,10 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {

    static InferenceEngine::details::caseless_unordered_map<std::string, DnnActivationType> supportedActivations = {
        {"sigmoid", kActSigmoid},
+        {"divbyn", kActDivByN},
+        {"log", kActLog},
+        {"neghalflog", kActNegHalfLog},
+        {"exp", kActExp},
        {"tanh", kActTanh},
        {"relu", kActRelu},
        {"leakyrelu", kActLeakyRelu},
@@ -1262,12 +1268,14 @@ case name:\
                &*ptr_pwl_segments.begin(),
                static_cast<uint32_t>(ptr_pwl_segments.size()),
                input_pwl_scale_factor,
-                output_pwl_scale_factor);
+                output_pwl_scale_factor,
+                non_batch_dim);
        } else {
            PwlDesignOpt16(activation_type,
                ptr_pwl_segments,
                input_pwl_scale_factor,
-                output_pwl_scale_factor);
+                output_pwl_scale_factor,
+                non_batch_dim);
        }
        ptr_pwl_segments_target = reinterpret_cast<intel_pwl_segment_t*>(&ptr_pwl_segments_target);
    }
@@ -1298,30 +1306,85 @@ case name:\
 }

 void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
+    static int count = 0;
+    count++;
+    if (LayerInfo(layer).isTrivialPermute()) {
+        return;
+    }
    auto layerOrder = layer->GetParamAsInts("order");
-
-    string dimMessage;
-    if (layerOrder == vector<int>({0, 3, 2, 1})) {
-        return;  // supported case
+    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
+    if (layer->insData.empty()) {
+        THROW_GNA_EXCEPTION << "Input layer pointer is unexpectedly absent";
+    }
+    auto inputs = layer->insData.begin()->lock();
+    auto inputsOrder = inputs->getTensorDesc().getDims();
+    auto outputs = layer->outData.front();
+    // squeeze order vector
+    SizeVector squeezedInputOrder;
+    for (auto input_shape : inputsOrder) {
+        if (input_shape != 1) squeezedInputOrder.push_back(input_shape);
+    }
+    SizeVector squeezedOutputOrder;
+    for (auto output_shape : layerOrder) {
+        if (output_shape != 0) squeezedOutputOrder.push_back(output_shape);
    }

-    if (layerOrder == vector<int>({1, 0, 2})) {
-        IE_ASSERT(!layer->insData.empty());
-        auto inputs = layer->insData.begin()->lock();
-        auto inputs_size = inputs->getTensorDesc().getDims().size();
-        if (inputs_size != layerOrder.size()) {
-            THROW_IE_EXCEPTION << "[GNA plugin] Invalid input tensor size for permute layer " <<
-                               layer->GetParamAsString("order");
-        }
-        auto permuteDim0 = FROM_IR_DIM(inputs, inputs_size);
-        auto permuteDim1 = FROM_IR_DIM(inputs, inputs_size - 1);
-        if (permuteDim0 == 1 || permuteDim1 == 1) {
-            return;  // supported case
-        }
-        dimMessage = " (with first dim = " + to_string(permuteDim0) + ", second dim = " + to_string(permuteDim1) + ")";
+    void* ptr_inputs = nullptr;
+    void* ptr_outputs = nullptr;
+
+    if (squeezedInputOrder.size() > 2) {
+        THROW_GNA_EXCEPTION << "unsupported permute (requested transpose is not 2D)";
    }
-    THROW_IE_EXCEPTION << "[GNA plugin] Unsupported permute order: was " << layer->GetParamAsString("order") <<
-                       dimMessage << ", but only support 1,0,2 (with first or second dim = 1) and 0,3,2,1";
+
+    if (count%2 == 0) {
+        auto temp = squeezedInputOrder[0];
+        squeezedInputOrder[0] =  squeezedInputOrder[1];
+        squeezedInputOrder[1] = temp;
+    }
+
+    if (std::min(squeezedInputOrder[0], squeezedInputOrder[1]) > 8) {
+        THROW_GNA_EXCEPTION << "unsupported permute (minor dimension="
+                            << std::min(squeezedInputOrder[0], squeezedInputOrder[1]) << " > 8)";
+    }
+
+    // now this can be run on GNA
+    if (squeezedInputOrder[0] < squeezedInputOrder[1]) {  // interleave case
+        if (ALIGN(squeezedInputOrder[1], 8) != squeezedInputOrder[1]) {
+            THROW_GNA_EXCEPTION << "unsupported permute (row size not a multiple of 8)";
+        } else {
+            auto& currentComponent = dnnComponents.addComponent(layer->name, "interleave");
+            dnn->InitInterleaveComponent(currentComponent,
+                                         squeezedInputOrder[0],
+                                         squeezedInputOrder[1],
+                                         inputs->getPrecision().size(),
+                                         outputs->getPrecision().size(),
+                                         (quantized == nullptr) ? 1.0f : quantized->_dst_quant.scale,
+                                         ptr_inputs,
+                                         ptr_outputs);
+        }
+
+    } else {  // deinterleave case
+        if (ALIGN(squeezedInputOrder[0], 8) != squeezedInputOrder[0]) {
+            THROW_GNA_EXCEPTION << "[GNA plugin] unsupported permute (column size not a multiple of 8)";
+        } else {
+            auto& currentComponent = dnnComponents.addComponent(layer->name, "deinterleave");
+            dnn->InitDeinterleaveComponent(currentComponent,
+                                           squeezedInputOrder[0],
+                                           squeezedInputOrder[1],
+                                           inputs->getPrecision().size(),
+                                           outputs->getPrecision().size(),
+                                           quantized == nullptr ? 1 : quantized->_dst_quant.scale,
+                                           ptr_inputs,
+                                           ptr_outputs);
+        }
+    }
+    size_t num_data_bytes_out = ALIGN(InferenceEngine::details::product(
+            begin(outputs->getDims()), end(outputs->getDims())), 8)
+                                * outputs->getPrecision().size();
+    size_t num_data_bytes_in = squeezedInputOrder[0] * squeezedInputOrder[1] * inputs->getPrecision().size();
+
+    connectInput(layer, ptr_inputs, num_data_bytes_in);
+    connectOutput(layer, ptr_outputs, num_data_bytes_out);
 }

 void SKIP(GNAGraphCompiler*, CNNLayerPtr) {}
@@ -1338,7 +1401,7 @@ void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) {
        {{"Split"}, SKIP},  // skip information about which part of prev layer need to consume handle during layer creation
        {{"Slice"}, SKIP},
        {{"link"}, SKIP},
-        {{"clamp", "sigmoid", "relu", "tanh", "identity"}, CREATE(PWLPrimitive)},
+        {{"clamp", "sigmoid", "relu", "tanh", "log", "neghalflog", "divbyn", "exp", "identity"}, CREATE(PWLPrimitive)},
        {{"Convolution"}, CREATE(ConvolutionPrimitive)},
        {{"Permute"}, CREATE(PermutePrimitive)},  // permute of certain form (2D transpose) can be assimilated in followed FC layer
        {{"Pooling"}, CREATE(PoolingPrimitive)},
@@ -1644,11 +1707,14 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
    }

    if (LayerInfo(prevLayer).isPermute()) {
-        gnalog()  << "Skipping permute layer: " << prevLayer->name << "\n";
-        return {connectInput(prevLayer, ptr, num_data_bytes_in, offset, 0).input, true, prevLayer};
+        if (!LayerInfo(prevLayer).isTrivialPermute()) {
+            // we should have GNA primitive for it
+	    THROW_GNA_EXCEPTION << "missed gna primitive for permute: " << prevLayer->name;
+        }
+				                                     gnalog()  << "Skipping trivial permute layer: " << prevLayer->name << "\n";
+        return connectInput(prevLayer, ptr, num_data_bytes_in, offset, 0);
    }

-
    THROW_GNA_EXCEPTION << "Cannot connect input for: " << layer->name;
 }

--- a/inference-engine/src/gna_plugin/gna_graph_compiler.hpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.hpp
@@ -14,6 +14,7 @@
 #include "descriptions/gna_input_desc.hpp"
 #include "descriptions/gna_flags.hpp"
 #include "cpp_interfaces/base/ie_plugin_base.hpp"
+#include "cpp_interfaces/impl/ie_memory_state_internal.hpp"
 #include "connection_details.hpp"
 #include "backend/dnn.hpp"
 #include "memory/polymorph_allocator.hpp"
@@ -52,6 +53,7 @@ public:
    GNAPluginNS::backend::DnnComponents dnnComponents;
    MemoryConnection memory_connection;
    ConcatConnection concat_connection;
+    std::vector<InferenceEngine::IMemoryStateInternal::Ptr> memoryStates;

    void setGNAMemoryPtr(std::shared_ptr<GNAPluginNS::gna_memory_type> gnaMemPtr);
    void setDNNPtr(std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnnPtr);
--- a/inference-engine/src/gna_plugin/gna_helper.cpp
+++ b/inference-engine/src/gna_plugin/gna_helper.cpp
@@ -18,8 +18,15 @@

 #include "gna_lib_ver_selector.hpp"

+typedef struct _nnet_type_t
+{
+    uint32_t nLayers;               // The number of layers in the network.
+    uint32_t nGroup;                // Input vector grouping level.
+    intel_nnet_layer_t *pLayers;    // Layer configurations.
+
+} intel_nnet_type_t;
+
 #ifndef WIN32
-#include <profiler.h>

 void clearTimeB(timeb & tb) {
    tb.time = 0;
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -57,6 +57,7 @@ uint32_t ToByteSize(const Gna2DataType type) {
    }
 }

+float GNAPluginNS::identity_SF = 256.0f;
 constexpr uint32_t GNAPluginNS::GNAPlugin::FAKE_REQUEST_CONFIG_ID;
 #endif
 using namespace InferenceEngine;
@@ -501,11 +502,11 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
        // auto idx = std::distance(outputsDataMap.begin(), outputPort);
        auto & desc = outputsDesc[idx];
        auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
-
        desc.ptrs.resize(gnaFlags->gna_lib_async_threads_num);
        desc.orientation = component.orientation_out;
        desc.num_bytes_per_element = component.num_bytes_per_output;
        desc.scale_factor = quantized != nullptr ? quantized->_dst_quant.scale : 1.0f;
+        
        // TODO: this need to be fixed
        desc.num_elements = component.num_rows_out;

@@ -518,6 +519,18 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
        // gets output layer pointer in original topology not in cloned
        auto outLayer = outPort.second->getCreatorLayer().lock();

+        // Memory layers are not dnnComponents hence we need to make switch with identity layer
+        if (outLayer->type == "Memory") {
+            // traverse memory connection to find corresponding output_memory
+            for (auto && memConnection : graphCompiler.memory_connection) {
+                if (memConnection.second.getInput()->name == outLayer->name) {
+                    // if connection is found, replace memory input layer with memory output layer
+                    outLayer = memConnection.second.getOutput();
+                    break;
+                }
+            }
+        }
+
        // searching for outData represented in GNA blob
        // using ufs - upper first search
        gnalog() << "[UFS] searching for : "<< outPort.first << " representation in GNA\n";
@@ -693,10 +706,25 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
    num_rotate_rows = dnn->num_rotate_rows;
    num_rotate_columns = dnn->num_rotate_columns;

+    for (auto& gnaMemoryConn : graphCompiler.memory_connection) {
+        std::string name = gnaMemoryConn.first;
+        GNAMemoryLayer memLayer = gnaMemoryConn.second;
+
+        InferenceEngine::CNNLayerPtr layer = memLayer.getInput();
+        auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
+        auto scale_factor = quantized != nullptr ? quantized->_dst_quant.scale : 1.0f;
+
+        auto ptr = make_blob_with_precision(TensorDesc(InferenceEngine::Precision::I16,
+                                            memLayer.getDims(),
+                                            memLayer.getDims().size() == 2 ? NC : NCHW),
+                                            memLayer.gna_ptr);
+        graphCompiler.memoryStates.emplace_back(std::make_shared<memory::GNAMemoryState>(name, ptr, scale_factor));
+    }
+
    DumpXNNToFile();

 #ifdef PLOT
-    dnn->WriteGraphWizModel("gna-blob.dot");
+    dnn->WriteGraphWizModel("/data/local/tmp/gna-blob.dot");
 #endif
 #if GNA_LIB_VER == 2
    createRequestConfigsForGnaModels();
@@ -1047,7 +1075,7 @@ std::vector<InferenceEngine::MemoryStateInternal::Ptr>  GNAPlugin::QueryState()
        return {};
    }

-    return {std::make_shared<memory::GNAMemoryState>(shared_from_this())};
+    return graphCompiler.memoryStates;
 }

 std::string GNAPlugin::GetName() const noexcept {
@@ -1400,6 +1428,14 @@ void GNAPlugin::SetConfig(const std::map<std::string, std::string> &config) {
        }
    });

+    if_set(CONFIG_KEY(IDENTITY_SCALE_FACTOR), [&] {
+        auto idScaleFactor = InferenceEngine::CNNLayer::ie_parse_float(value);
+            if (fp32eq(idScaleFactor, 0.0f)) {
+                THROW_GNA_EXCEPTION << "identity scale factor of 0.0f not supported";
+            }
+        identity_SF = idScaleFactor;
+    });
+
    if_set(GNA_CONFIG_KEY(LIB_N_THREADS), [&] {
        uint64_t lib_threads = std::stoul(value, NULL, 10);
        if (lib_threads == 0 || lib_threads > std::numeric_limits<uint8_t>::max()/2-1) {
--- a/inference-engine/src/gna_plugin/gna_plugin.hpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.hpp
@@ -28,6 +28,7 @@
 #endif

 namespace GNAPluginNS {
+extern float identity_SF;
 class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::enable_shared_from_this<GNAPlugin> {
 protected:
    std::string _pluginName = "GNA";
--- a/inference-engine/src/gna_plugin/gna_plugin_log.hpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_log.hpp
@@ -6,8 +6,28 @@

 #include <ostream>
 #include <details/ie_exception.hpp>
+#include "sys/timeb.h"

-// #define GNA_DEBUG
+typedef unsigned long long time_tsc;
+
+
+typedef struct
+{
+    time_tsc            start;      // time value on profiler start
+    time_tsc            stop;       // time value on profiler stop
+    time_tsc            passed;     // time passed between start and stop
+} intel_gna_profiler_tsc;
+
+typedef struct timeb    time_rtc;
+
+typedef struct
+{
+    time_rtc            start;      // time value on profiler start
+    time_rtc            stop;       // time value on profiler stop
+    time_rtc            passed;     // time passed between start and stop
+} intel_gna_profiler_rtc;
+
+//#define GNA_DEBUG
 #ifdef  GNA_DEBUG
 #include <iostream>
 /**
@@ -58,7 +78,7 @@ inline GnaLog & gnawarn() {
 #ifdef __PRETTY_FUNCTION__
 #undef __PRETTY_FUNCTION__
 #endif
-#ifdef _WIN32
+#if defined _WIN32
 # define __PRETTY_FUNCTION__ __FUNCSIG__
 #else
 # define __PRETTY_FUNCTION__ __FUNCTION__
--- a/inference-engine/src/gna_plugin/gna_plugin_query_api.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_query_api.cpp
@@ -111,6 +111,7 @@ std::map<std::string, std::string> GNAPlugin::supportedConfigKeysWithDefaults()
        {GNA_CONFIG_KEY(PRECISION), Precision(Precision::I8).name()},
        {GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN), CONFIG_VALUE(YES)},
        {CONFIG_KEY(PERF_COUNT), CONFIG_VALUE(NO)},
+        {CONFIG_KEY(IDENTITY_SCALE_FACTOR), "2048.0"},
        {GNA_CONFIG_KEY(LIB_N_THREADS), "1"},
        {CONFIG_KEY(SINGLE_THREAD), CONFIG_VALUE(YES)}
    };
--- a/inference-engine/src/gna_plugin/gna_slope_scale.h
+++ b/inference-engine/src/gna_plugin/gna_slope_scale.h
@@ -6,7 +6,7 @@

 #include <cstdint>

-typedef struct {
+typedef struct pwl_gna_slope_scale_t{
    double slope;
    uint64_t slope_scale = 0;
    uint32_t slope_scale_index;
--- a/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
+++ b/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
@@ -10,7 +10,7 @@
 #include "details/caseless.hpp"
 #include "ie_algorithm.hpp"
 #include "gna-api.h"
-
+#include "gna_permute.hpp"

 namespace GNAPluginNS {

@@ -68,7 +68,7 @@ class LayerInfo {
        IS_VALID();
        static InferenceEngine::details::caseless_set<std::string> activations =
            { "clamp", "sigmoid", "identity", "relu",
-              "leakyrelu", "tanh", "prelu", "exp", "log", "sign", "abs", "neghalflog"};
+              "leakyrelu", "tanh", "prelu", "exp", "log", "sign", "abs", "neghalflog", "divbyn"};
        return activations.find(layer->type) != activations.end();
    }

@@ -155,6 +155,43 @@ class LayerInfo {
    bool isPermute() const noexcept {
        return isOfType("permute");
    }
+    // @brief this not only mathematically trivial, has some WA for kaldi case
+    bool isTrivialPermute() {
+        if (!isPermute()) return false;
+
+        auto layerOrder = layer->GetParamAsInts("order");
+
+        if (layerOrder == std::vector<int>({ 0, 3, 2, 1 })) {
+            return true;  // supported case
+        }
+        IE_ASSERT(!layer->insData.empty());
+        auto inputs = layer->insData.begin()->lock();
+        auto inputsOrder = inputs->getTensorDesc().getDims();
+
+        // cases when all permutations happened either between 1 and X shape where no other dims in between
+        auto permuteSequence = genPermutations(layerOrder.begin(), layerOrder.end());
+        auto inputsOrderTransformed = inputsOrder;
+        for (auto && permute : permuteSequence) {
+            // check dims of permuted
+            if (inputsOrderTransformed[permute.first] == 1 &&
+                inputsOrderTransformed[permute.second] == 1) {
+                return true;
+            }
+            if (inputsOrderTransformed[permute.first] != 1 &&
+                inputsOrderTransformed[permute.second] != 1) {
+                return false;
+            }
+            // check dims in between
+            for (int j = permute.first + 1; j != permute.second; j++) {
+                if (inputsOrderTransformed[j] != 1) {
+                    return false;
+                }
+            }
+            // apply permutation
+            std::swap(inputsOrderTransformed[permute.first], inputsOrderTransformed[permute.second]);
+        }
+        return true;
+    }
    bool isPooling() const noexcept {
        return isOfType("pooling");
    }
--- a/inference-engine/src/gna_plugin/layers/gna_layer_type.hpp
+++ b/inference-engine/src/gna_plugin/layers/gna_layer_type.hpp
@@ -38,6 +38,11 @@ enum LayerType {
    Memory,
    Power,
    Crop,
+    Exp,
+    Log,
+    NegHalfLog,
+    Identity,
+    DivByN,
    LSTMCell,
    TensorIterator,
    NO_TYPE
@@ -66,6 +71,11 @@ static const InferenceEngine::details::caseless_map<std::string, GNAPluginNS::La
        { "Power" , Power},
        { "Memory" , Memory },
        { "Crop" , Crop },
+        { "Log", Log },
+        { "NegHalfLog" , NegHalfLog },
+        { "DivByN", DivByN },
+        { "Identity", Identity },
+        { "Exp", Exp },
        { "LSTMCell", LSTMCell },
        { "TensorIterator", TensorIterator }
 };
--- a/inference-engine/src/gna_plugin/layers/gna_permute.hpp
+++ b/inference-engine/src/gna_plugin/layers/gna_permute.hpp
@@ -0,0 +1,104 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#pragma once
+#include <vector>
+#include <list>
+#include <utility>
+#include "gna_plugin_log.hpp"
+
+namespace GNAPluginNS {
+template <class T>
+class PermuteSequence {
+public:
+    using cnt_type = std::vector<std::pair<T, T>>;
+
+private:
+    std::vector<T> orderVec;
+    cnt_type permutes;
+
+public:
+    explicit PermuteSequence(std::vector<T> && orderVecIn) : orderVec(std::move(orderVecIn)) {
+        std::vector<bool> counter(orderVec.size());
+        for (auto && x : this->orderVec) {
+            if (x < 0) {
+                THROW_GNA_EXCEPTION << "invalid order: element " << x << " should be >= 0";
+            }
+            if (x >= counter.size()) {
+                THROW_GNA_EXCEPTION << "invalid order: element " << x << " should be < "<< counter.size();
+            }
+            if (counter[x]) {
+                THROW_GNA_EXCEPTION << "invalid order: element " << x << " present more than once";
+            }
+            counter[x] = true;
+        }
+
+        // generating permutation graph
+        std::fill(counter.begin(), counter.end(), false);
+
+        // length of current cycle
+        std::list<cnt_type> permuteCycles;
+        bool newSeq = false;
+
+        for (int i = 0; i != orderVec.size();) {
+            // we have this permutation on the list already
+            if (counter[i]) {
+                newSeq = false;
+                i++;
+                continue;
+            }
+            counter[i] = true;
+            // looks we found a permutation
+            if (orderVec[i] != i) {
+                if (!newSeq) {
+                    newSeq = true;
+                    permuteCycles.push_back({});
+                }
+                permuteCycles.back().push_back({i, orderVec[i]});
+                counter[i] = true;
+                i = orderVec[i];
+                continue;
+            }
+            // this dims not permuted
+            i++;
+        }
+
+        for (auto && cycle : permuteCycles) {
+            for (int i = 0; i + 1 < cycle.size(); i++) {
+                permutes.push_back(cycle[i]);
+            }
+        }
+    }
+    const cnt_type & cnt() const noexcept {
+        return permutes;
+    }
+};
+
+/**
+ * @brief generates permutations sequence in order to reach given order
+ * @tparam Iterator
+ * @return
+ */
+template <class Iterator>
+inline typename PermuteSequence<typename std::iterator_traits<Iterator>::value_type>::cnt_type genPermutations(
+        Iterator beg, Iterator en) {
+    static_assert(
+            std::is_same<std::random_access_iterator_tag,
+                    typename std::iterator_traits<Iterator>::iterator_category>::value,
+            "The genPermutations() function only accepts random access iterators or raw pointers to an array.\n");
+    using value_type = typename std::iterator_traits<Iterator>::value_type;
+    std::vector<value_type> v;
+    for (; beg != en; beg++) {
+        v.push_back(*beg);
+    }
+    auto permute = PermuteSequence<value_type> (std::move(v));
+    return permute.cnt();
+}
+
+template <class T>
+inline typename PermuteSequence<T>::cnt_type genPermutations(const std::initializer_list<T> & lst) {
+    return genPermutations(lst.begin(), lst.end());
+}
+}  // namespace GNAPluginNS
--- a/inference-engine/src/gna_plugin/memory/gna_memory_state.hpp
+++ b/inference-engine/src/gna_plugin/memory/gna_memory_state.hpp
@@ -7,19 +7,54 @@
 #include <memory>
 #include <utility>
 #include <cpp_interfaces/impl/ie_memory_state_internal.hpp>
+#include <ie_blob.h>
 #include "gna_plugin.hpp"
+#include "preprocessing.hpp"

 namespace  GNAPluginNS {
 namespace memory {
 class GNAMemoryState : public InferenceEngine::MemoryStateInternal {
-    std::shared_ptr<GNAPlugin> plg;
+    std::string                 stateName;
+    InferenceEngine::Blob::Ptr  memState;
+    float                       scalefactor;
+
 public:
    using Ptr = InferenceEngine::MemoryStateInternal::Ptr;

-    explicit GNAMemoryState(std::shared_ptr<GNAPlugin> plg)
-        : InferenceEngine::MemoryStateInternal("GNAResetState"), plg(plg) {}
+    explicit GNAMemoryState(std::string name,
+                            InferenceEngine::Blob::Ptr state,
+                            float scale_factor)
+        : InferenceEngine::MemoryStateInternal(name), stateName(name),
+            memState(state), scalefactor(scale_factor) {}
+
    void Reset() override {
-        plg->Reset();
+        std::memset(memState->buffer().as<int16_t*>(), 0, memState->byteSize());
+    }
+
+    void SetState(InferenceEngine::Blob::Ptr newState) override {
+        if (newState->getTensorDesc().getDims().size() != 2) {
+            THROW_GNA_EXCEPTION << "SetState failed for blob dimensions > 2";
+        }
+
+        if ((newState->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) &&
+            (newState->byteSize()/2 == memState->byteSize())) {
+            ConvertToInt16(memState->buffer().as<int16_t*>(),
+                            newState->buffer().as<float*>(),
+                            newState->getTensorDesc().getDims()[0],
+                            newState->getTensorDesc().getDims()[1],
+                            scalefactor);
+        } else if ((newState->getTensorDesc().getPrecision() == InferenceEngine::Precision::I16) &&
+                    (newState->byteSize() == memState->byteSize())) {
+            std::memcpy(memState->buffer().as<uint8_t*>(),
+                        newState->buffer().as<uint8_t*>(),
+                        newState->byteSize());
+        } else {
+            THROW_GNA_EXCEPTION << "SetState call failed. Invalid precision / size";
+        }
+    }
+
+    InferenceEngine::Blob::CPtr GetLastState() const override {
+        return memState;
    }
 };
 }  // namespace memory
--- a/inference-engine/src/gna_plugin/runtime/pwl.cpp
+++ b/inference-engine/src/gna_plugin/runtime/pwl.cpp
@@ -8,6 +8,7 @@
 #include <iostream>
 #include <limits>
 #include <cstdint>
+#include <map>

 #ifdef _NO_MKL_
 #include <cmath>
@@ -30,6 +31,140 @@
 #include "round_float_define.hpp"

 double first_deriv_tanh(const double x) { return(1.0 - tanh(x) * tanh(x)); }
+double first_deriv_exp(const double x) { return(exp(x)); }
+double first_deriv_log(const double x) { return(1.0 / x); }
+double neghalflog(const double x) { return(-0.5*log(x)); }
+double first_deriv_neghalflog(const double x) { return(-0.5 / x); }
+
+std::map<std::string, std::vector<pwl_t>> pwl_search_map {
+    {"log", {{1.0769533473860933e-05 , 8.4918474385631271e-06 , -11.662751279293021 , 92854.532875275778 , -12.451257806448908},
+        {1.7021658371797054e-05 , 1.3421901942456181e-05 , -11.204973371284382 , 58748.682305649265 , -11.993492424439317},
+        {2.6901160981803783e-05 , 2.121301943569138e-05 , -10.747255484868321 , 37173.116828541606 , -11.5358095346374},
+        {4.2508975575310986e-05 , 3.3523097254749539e-05 , -10.289651523932033 , 23524.443637281987 , -11.078263735848511},
+        {6.7159638615065504e-05 , 5.2968285045173431e-05 , -9.8322142997398423 , 14889.895488146327 , -10.620906528248819},
+        {0.00010607938536412906 , 8.3674876369534097e-05 , -9.3749963641228913 , 9426.9022823556988 , -10.163791247146683},
+        {0.00016750484167401942 , 0.0001321476299142904 , -8.9180484530997628 , 5969.97668847147 , -9.7069667231248324},
+        {0.00026440839561966089 , 0.00020863498397988411 , -8.4614207323653048 , 3782.0281676624659 , -9.2504841185370346},
+        {0.00041721037390891224 , 0.00032927394464933357 , -8.0051607849959225 , 2396.8723275762272 , -8.794388391117776},
+        {0.00065803047888043399 , 0.00051945736028058084 , -7.5493154189054579 , 1519.6864462895246 , -8.3387277287491912},
+        {0.001037362756270747 , 0.00081911613772069059 , -7.0939280363180339 , 963.98294035052527 , -7.8835420192465913},
+        {0.0016345178691889663 , 0.0012909987974520291 , -6.6390412024897918 , 611.8012037985186 , -7.4288758208733832},
+        {0.0025739995666866798 , 0.0020336387647946299 , -6.1846931764806961 , 388.50045390148483 , -6.9747627596750643},
+        {0.004051067401148134 , 0.0032016468460507675 , -5.7309215067520842 , 246.84852187761314 , -6.5212432982738378},
+        {0.0063717878399278975 , 0.0050374410331472524 , -5.2777584251958025 , 156.94182309926316 , -6.0683436046929682},
+        {0.010015399744147635 , 0.0079208190427630248 , -4.8252358236823785 , 99.846239345996764 , -5.6160998176424242},
+        {0.015731959416888035 , 0.012446344656992419 , -4.3733791100375914 , 63.564872848992557 , -5.1645294255940524},
+        {0.024693977573234764 , 0.019544055268788288 , -3.9222140374798418 , 40.495703741299124 , -4.7136643095482684},
+        {0.038733765155746253 , 0.030667594605217476 , -3.4717584839571183 , 25.817268111660667 , -4.2635119962197363},
+        {0.060710894498842323 , 0.048087203788268418 , -3.0220317632779459 , 16.471508256546421 , -3.8141005375106394},
+        {0.095087507492990303 , 0.07534567638130947 , -2.5730436069008258 , 10.516628591549917 , -3.3654261013821722},
+        {0.14881589164648179 , 0.1179673976691515 , -2.1248067941840345 , 6.7197124509762807 , -2.9175137851107023},
+        {0.23272951343232598 , 0.18456019070518948 , -1.6773223736744836 , 4.2968336299589422 , -2.4703468078481774},
+        {0.36368021928596278 , 0.28852643303350278 , -1.2305967272577258 , 2.7496683816440872 , -2.0239487374384986},
+        {0.56789454936972605 , 0.45071871177964989 , -0.78462174664264084 , 1.7608902939988476 , -1.5782879515390904},
+        {0.88610468810159015 , 0.70355895583540895 , -0.33939781495255494 , 1.1285348259949077 , -1.1333885987534271},
+        {1.3816335789411809 , 1.0974241494088073 , 0.10509277274225079 , 0.72378090344789758 , -0.68920186958239626},
+        {2.1526786672071778 , 1.7105408556958117 , 0.54885493633765814 , 0.46453751562343976 , -0.24575546313966701},
+        {3.3517682459038256 , 2.6643098700159529 , 0.99191642472855346 , 0.29834998324305195 , 0.19701961965499604},
+        {5.2150907594192395 , 4.1470315870077039 , 1.4342864241471516 , 0.19175121702222525 , 0.63908807030881409},
+        {8.1092990368633888 , 6.4506248753490381 , 1.8760032407108322 , 0.12331522064412515 , 1.0805430109146834},
+        {12.601356824411839 , 10.02743923506598 , 2.3170788926824022 , 0.079356533898219603 , 1.5213360711125516},
+        {19.571181442374591 , 15.578135417317394 , 2.7575629024279547 , 0.051095535695910903 , 1.9615897281366799},
+        {30.377546048984698 , 24.187602070756132 , 3.1974682131414882 , 0.032919051406834175 , 2.4012352971662185},
+        {47.129543514739666 , 37.535139255177242 , 3.6368564758700774 , 0.02121811342575923 , 2.840431633702059},
+        {73.079549140200285 , 58.219475920898645 , 4.0757390773799447 , 0.013683718793633217 , 3.2790801405656671},
+        {113.27901633813272 , 90.261352592829454 , 4.5141911073789212 , 0.0088277602712848978 , 3.7173855249275034},
+        {175.50631439356027 , 139.88074429844423 , 4.9522192021630707 , 0.0056978006942677408 , 4.155206600184707},
+        {271.85671771564853 , 216.69890827860715 , 5.3899137902216161 , 0.0036784082747808367 , 4.5928067328736137},
+        {420.92464268160882 , 335.59785527230628 , 5.8272726607059671 , 0.0023757221568907016 , 5.02998540013055},
+        {651.67787232666603 , 519.59734639612975 , 6.2644043286254485 , 0.0015345004678918 , 5.4670819574652496},
+        {1008.5639944354145 , 804.30768425774158 , 6.7012924752877234 , 0.00099150872479816517 , 5.9038143889239647},
+        {1561.0124734004803 , 1244.8233916434529 , 7.1380676425712917 , 0.00064060987150321626 , 6.3406214896063817},
+        {2415.300015438163 , 1926.4004262204676 , 7.5746926191112163 , 0.00041402724034620179 , 6.7771103668414092},
+        {0 , 2981 , 8.0113255703134367 , 0 , 0}}},
+    {"exp", {{-5.2905549738656035 , -7.6246190071105957 , -0.0029375872840971921 , 0.0050389629907875762 , 0.035482585711588618},
+        {-3.2765565204702316 , -3.966387017312524 , 0.015496108324210485 , 0.037758052285013388 , 0.16525915670649505},
+        {-2.304345998844584 , -2.7128986917228044 , 0.062825386060480992 , 0.099824064363453618 , 0.33363795967454735},
+        {-1.6505759560844804 , -1.9420942111377082 , 0.1397702221420461 , 0.19193932815516293 , 0.512534480241849},
+        {-1.1551903296891044 , -1.3825157257724856 , 0.24717534067313054 , 0.31499757780654358 , 0.6826644455709191},
+        {-0.75535692384923692 , -0.94198675918566444 , 0.38594089810159893 , 0.46984288666759971 , 0.82852667624004861},
+        {-0.41999557300201118 , -0.57832149976517178 , 0.55680643336844471 , 0.65704972856644539 , 0.93679241781329048},
+        {-0.13153483933686491 , -0.2688406706913587 , 0.76015072810791207 , 0.87674872926137593 , 0.99585644451033684},
+        {0.4235904473306159 , 0 , 0.94308787650971637 , 1.5274359002153208 , 0.94308787650971637},
+        {1.0937368353927797 , 0.79581116380659767 , 2.1586384179000495 , 2.9854092373666563 , -0.2171835817276766},
+        { 1.5979006351086957 , 1.3669113028909896 , 3.8636060485839758 , 4.9426451092152783 , -2.8925514173812577},
+        { 2.0041923346353099 , 1.8147648658250355 , 6.0771872710645756 , 7.420098519377718 , -7.3885468228624731},
+        { 2.3452256111159864 , 2.1843822125126793 , 8.8197843979578803 , 10.435626855401082 , -13.975613281399871},
+        { 2.6387547172826245 , 2.4991598196640292 , 12.1046860486254 , 13.99576406829804 , -22.872965156362628},
+        { 0 , 2.7725581832447883 , 15.931105041960471 , -0 , -0}}},
+    {"sigmoid", {{-6.0269768546940687 , -10 , -0.0033685324745532531 , 0.0024011761556240077 , 0.020643229081686823},
+        {-3.4572777895083773 , -4.2646607997060624 , 0.010403027257608216 , 0.029619100828046807 , 0.13671844548152082},
+        {-2.302945392313446 , -2.7960754970003254 , 0.053901203413037058 , 0.082620267964448268 , 0.28491371022403178},
+        {-1.4431692770391085 , -1.8482809500056467 , 0.13220824286098024 , 0.15455301637191463 , 0.41786563878710092},
+        {-0.58709153507881506 , -1.0390898867848257 , 0.25727116250295479 , 0.22963741468060328 , 0.49588507772498291},
+        {0.58709153507881362 , 0 , 0.50411492227501709 , 0.22963741468060325 , 0.50411492227501709},
+        {1.4431692770391091 , 1.0390898867848253 , 0.7427288374970451 , 0.15455301637191451 , 0.58213436121289919},
+        {2.3029453923134513 , 1.8482809500056492 , 0.86779175713902001 , 0.082620267964447991 , 0.71508628977596878},
+        {3.4572777895083746 , 2.7960754970003263 , 0.94609879658696283 , 0.029619100828046918 , 0.86328155451847877},
+        {6.0269768546940705 , 4.2646607997060606 , 0.98959697274239178 , 0.0024011761556240298 , 0.97935677091831308},
+        { 0 , 10 , 1.0033685324745534 , 0 , 0}}},
+    {"tanh", {{-3.0134884273470361 , -5 , -1.0067370649491065 , 0.0096047046224959371 , -0.95871354183662683},
+        {-1.7286388947541886 , -2.1323303998530339 , -0.979193945484784 , 0.11847640331218724 , -0.72656310903695842},
+        {-1.1514726961567241 , -1.3980377485001632 , -0.892197593173926 , 0.3304810718577928 , -0.43017257955193672},
+        {-0.72158463851955434 , -0.92414047500282348 , -0.73558351427803959 , 0.61821206548765828 , -0.16426872242579849},
+        {-0.29354576753940709 , -0.51954494339241275 , -0.48545767499409032 , 0.91854965872241312 , -0.0082298445500341155},
+        {0.29354576753940703 , 0 , 0.0082298445500341155 , 0.91854965872241323 , 0.0082298445500341155},
+        {0.72158463851955434 , 0.51954494339241275 , 0.48545767499409037 , 0.6182120654876585 , 0.16426872242579826},
+        {1.1514726961567245 , 0.92414047500282415 , 0.73558351427804003 , 0.33048107185779213 , 0.43017257955193755},
+        {1.7286388947541889 , 1.3980377485001632 , 0.89219759317392588 , 0.11847640331218723 , 0.72656310903695842},
+        {3.0134884273470322 , 2.1323303998530312 , 0.97919394548478356 , 0.0096047046224960447 , 0.95871354183662627},
+        { 0 , 5 , 1.0067370649491065 , 0 , 0}}},
+    {"NegHalfLog", {{1.0769533473860933e-05 ,8.4918474385631271e-06 ,5.8313756396465104 ,-46427.266437637889 ,6.2256289032244538 },
+        {1.7021658371797054e-05 ,1.3421901942456181e-05 ,5.6024866856421909 ,-29374.341152824632 ,5.9967462122196586 },
+        {2.6901160981803783e-05 ,2.121301943569138e-05 ,5.3736277424341603 ,-18586.558414270803 ,5.7679047673186998 },
+        {4.2508975575310986e-05 ,3.3523097254749539e-05 ,5.1448257619660165 ,-11762.221818640994 ,5.5391318679242554 },
+        {6.7159638615065504e-05 ,5.2968285045173431e-05 ,4.9161071498699211 ,-7444.9477440731634 ,5.3104532641244093 },
+        {0.00010607938536412906 ,8.3674876369534097e-05 ,4.6874981820614456 ,-4713.4511411778494 ,5.0818956235733417 },
+        {0.00016750484167401942 ,0.0001321476299142904 ,4.4590242265498814 ,-2984.988344235735 ,4.8534833615624162 },
+        {0.00026440839561966089 ,0.00020863498397988411 ,4.2307103661826524 ,-1891.0140838312329 ,4.6252420592685173 },
+        {0.00041721037390891224 ,0.00032927394464933357 ,4.0025803924979613 ,-1198.4361637881136 ,4.397194195558888 },
+        {0.00065803047888043399 ,0.00051945736028058084 ,3.7746577094527289 ,-759.84322314476231 ,4.1693638643745956 },
+        {0.001037362756270747 ,0.00081911613772069059 ,3.546964018159017 ,-481.99147017526263 ,3.9417710096232956 },
+        {0.0016345178691889663 ,0.0012909987974520291 ,3.3195206012448959 ,-305.9006018992593 ,3.7144379104366916 },
+        {0.0025739995666866798 ,0.0020336387647946299 ,3.092346588240348 ,-194.25022695074242 ,3.4873813798375322 },
+        {0.004051067401148134 ,0.0032016468460507675 ,2.8654607533760421 ,-123.42426093880657 ,3.2606216491369189 },
+        {0.0063717878399278975 ,0.0050374410331472524 ,2.6388792125979013 ,-78.47091154963158 ,3.0341718023464841 },
+        {0.010015399744147635 ,0.0079208190427630248 ,2.4126179118411892 ,-49.923119672998382 ,2.8080499088212121 },
+        {0.015731959416888035 ,0.012446344656992419 ,2.1866895550187957 ,-31.782436424496279 ,2.5822647127970262 },
+        {0.024693977573234764 ,0.019544055268788288 ,1.9611070187399209 ,-20.247851870649562 ,2.3568321547741342 },
+        {0.038733765155746253 ,0.030667594605217476 ,1.7358792419785591 ,-12.908634055830333 ,2.1317559981098682 },
+        {0.060710894498842323 ,0.048087203788268418 ,1.5110158816389729 ,-8.2357541282732107 ,1.9070502687553197 },
+        {0.095087507492990303 ,0.07534567638130947 ,1.2865218034504129 ,-5.2583142957749587 ,1.6827130506910861 },
+        {0.14881589164648179 ,0.1179673976691515 ,1.0624033970920173 ,-3.3598562254881403 ,1.4587568925553511 },
+        {0.23272951343232598 ,0.18456019070518948 ,0.83866118683724178 ,-2.1484168149794711 ,1.2351734039240887 },
+        {0.36368021928596278 ,0.28852643303350278 ,0.61529836362886292 ,-1.3748341908220436 ,1.0119743687192493 },
+        {0.56789454936972605 ,0.45071871177964989 ,0.39231087332132042 ,-0.88044514699942378 ,0.7891439757695452 },
+        {0.88610468810159015 ,0.70355895583540895 ,0.16969890747627747 ,-0.56426741299745387 ,0.56669429937671356 },
+        {1.3816335789411809 ,1.0974241494088073 ,-0.052546386371125395 ,-0.36189045172394879 ,0.34460093479119813 },
+        {2.1526786672071778 ,1.7105408556958117 ,-0.27442746816882907 ,-0.23226875781171988 ,0.1228777315698335 },
+        {3.3517682459038256 ,2.6643098700159529 ,-0.49595821236427673 ,-0.14917499162152598 ,-0.098509809827498018 },
+        {5.2150907594192395 ,4.1470315870077039 ,-0.71714321207357579 ,-0.095875608511112625 ,-0.31954403515440705 },
+        {8.1092990368633888 ,6.4506248753490381 ,-0.93800162035541612 ,-0.061657610322062573 ,-0.54027150545734171 },
+        {12.601356824411839 ,10.02743923506598 ,-1.1585394463412011 ,-0.039678266949109801 ,-0.76066803555627582 },
+        {19.571181442374591 ,15.578135417317394 ,-1.3787814512139773 ,-0.025547767847955451 ,-0.98079486406833993 },
+        {30.377546048984698 ,24.187602070756132 ,-1.5987341065707441 ,-0.016459525703417088 ,-1.2006176485831093 },
+        {47.129543514739666 ,37.535139255177242 ,-1.8184282379350387 ,-0.010609056712879615 ,-1.4202158168510295 },
+        {73.079549140200285 ,58.219475920898645 ,-2.0378695386899723 ,-0.0068418593968166087 ,-1.6395400702828336 },
+        {113.27901633813272 ,90.261352592829454 ,-2.2570955536894606 ,-0.0044138801356424489 ,-1.8586927624637517 },
+        {175.50631439356027 ,139.88074429844423 ,-2.4761096010815353 ,-0.0028489003471338704 ,-2.0776033000923535 },
+        {271.85671771564853 ,216.69890827860715 ,-2.694956895110808 ,-0.0018392041373904184 ,-2.2964033664368069 },
+        {420.92464268160882 ,335.59785527230628 ,-2.9136363303529835 ,-0.0011878610784453508 ,-2.514992700065275 },
+        {651.67787232666603 ,519.59734639612975 ,-3.1322021643127242 ,-0.00076725023394590001 ,-2.7335409787326248 },
+        {1008.5639944354145 ,804.30768425774158 ,-3.3506462376438617 ,-0.00049575436239908258 ,-2.9519071944619824 },
+        {1561.0124734004803 ,1244.8233916434529 ,-3.5690338212856458 ,-0.00032030493575160813 ,-3.1703107448031909 },
+        {2415.300015438163 ,1926.4004262204676 ,-3.7873463095556081 ,-0.00020701362017310089 ,-3.3885551834207046 },
+        {0 ,2981 ,-4.0056627851567184 ,-0 ,-0 }}}
+};

 double sigmoid(const double x) { return(0.5 * (1.0 + tanh(x / 2))); }
 double first_deriv_sigmoid(const double x) { return(sigmoid(x) * (1.0 - sigmoid(x))); }
@@ -174,6 +309,15 @@ double calculate_error_pct(const DnnActivationType fun,
            min_val = max_val = sigmoid(l_bound); break;
        case kActTanh:
            min_val = max_val = tanh(l_bound); break;\
+        case kActExp:
+            min_val = max_val = exp(l_bound);
+            break;
+        case kActLog:
+            min_val = max_val = log(l_bound);
+            break;
+        case kActNegHalfLog:
+			min_val = max_val = neghalflog(l_bound);
+			break;
        default:
            break;
    }
@@ -188,6 +332,15 @@ double calculate_error_pct(const DnnActivationType fun,
            case kActTanh:
                val = tanh(arg);
                break;
+            case kActExp:
+                val = exp(arg);
+                break;
+            case kActLog:
+                val = log(arg);
+                break;
+            case kActNegHalfLog:
+	            val = neghalflog(arg);
+	            break;
            default:
                break;
        }
@@ -209,6 +362,7 @@ bool split_search(const DnnActivationType fun,
    switch (fun) {
        case kActSigmoid:
        case kActTanh:
+        case kActExp:
            if ((l_bound < 0.0) && (u_bound > 0.0)) {
                is_split = true;
            }
@@ -254,7 +408,9 @@ std::vector<pwl_t> pwl_search(const DnnActivationType fun,
        pwl = pwl_search(fun, l_bound, 0.0, threshold, allowed_err_pct, samples, err_pct1);
        pwl = negative_pwl(pwl);
        pwl2 = pwl_search(fun, 0.0, u_bound, threshold, allowed_err_pct, samples, err_pct2);
-
+        if (fun == kActExp) {
+            pwl2 = negative_pwl(pwl2);  // both regions of exp are concave
+        }
        // merge
        pwl.pop_back();  // remove final alpha and beta from first half
        pwl.insert(pwl.end(), pwl2.begin(), pwl2.end());  // concatenate the two halves
@@ -274,10 +430,12 @@ std::vector<pwl_t> pwl_search(const DnnActivationType fun,
            pwl[0].alpha = pwl[0].t = pwl[0].beta = -std::numeric_limits<float>::infinity();
            pwl[0].m = 0.0;
            pwl[0].b = pwl[0].beta = KALDI_LSTM_CLIP_LOWER;
-            pwl[1].alpha = pwl[0].t = pwl[1].beta = KALDI_LSTM_CLIP_LOWER;
+            //pwl[1].alpha = pwl[0].t = pwl[1].beta = KALDI_LSTM_CLIP_LOWER;
+            pwl[1].alpha = pwl[1].t = pwl[1].beta = KALDI_LSTM_CLIP_LOWER;
            pwl[1].m = 1.0;
            pwl[1].b = 0.0;
-            pwl[2].alpha = pwl[0].t = pwl[1].beta = KALDI_LSTM_CLIP_UPPER;
+            //pwl[2].alpha = pwl[0].t = pwl[1].beta = KALDI_LSTM_CLIP_UPPER;
+            pwl[2].alpha = pwl[2].t = pwl[2].beta = KALDI_LSTM_CLIP_UPPER;
            pwl[2].m = 0.0;
            pwl[2].b = KALDI_LSTM_CLIP_UPPER;
            pwl[3].alpha = pwl[3].beta = std::numeric_limits<float>::infinity();
@@ -294,6 +452,17 @@ std::vector<pwl_t> pwl_search(const DnnActivationType fun,
                    if (u_bound == 0) negative = true;  // make left half convex
                    err = pivot_search(pwl, tanh, first_deriv_tanh, n_segments, l_bound, u_bound, threshold, negative);
                    break;
+                case kActExp:
+                    negative = true;  // make function convex
+                    err = pivot_search(pwl, exp, first_deriv_exp, n_segments, l_bound, u_bound, threshold, negative);
+                    break;
+                case kActLog:
+                    err = pivot_search(pwl, log, first_deriv_log, n_segments, l_bound, u_bound, threshold, negative);
+                    break;
+                case kActNegHalfLog:
+                    negative = true;  // make function convex
+                    err = pivot_search(pwl, neghalflog, first_deriv_neghalflog, n_segments, l_bound, u_bound, threshold, negative);
+                    break;
                default:
                    break;
            }
@@ -308,6 +477,15 @@ std::vector<pwl_t> pwl_search(const DnnActivationType fun,
                    case kActTanh:
                        err = pivot_search(pwl, tanh, first_deriv_tanh, n_segments, l_bound, u_bound, threshold, negative);
                        break;
+                    case kActExp:
+                        err = pivot_search(pwl, exp, first_deriv_exp, n_segments, l_bound, u_bound, threshold, negative);
+                        break;
+                    case kActLog:
+                        err = pivot_search(pwl, log, first_deriv_log, n_segments, l_bound, u_bound, threshold, negative);
+                        break;
+                    case kActNegHalfLog:
+                        err = pivot_search(pwl, neghalflog, first_deriv_neghalflog, n_segments, l_bound, u_bound, threshold, negative);
+                        break;
                    default:
                        break;
                }
@@ -326,30 +504,80 @@ std::vector<pwl_t> pwl_search(const DnnActivationType fun,
 void PwlDesignOpt16(const DnnActivation activation_type,
                    std::vector<intel_pwl_segment_t> &ptr_segment,
                    const float scale_in,
-                    const float scale_out) {
+                    const float scale_out,
+                    const uint32_t n) {
    std::vector<pwl_t> pwl;
    double err_pct = 0.0;
    switch (activation_type) {
        case kActSigmoid:
-            pwl = pwl_search(kActSigmoid, -SIGMOID_DOMAIN, SIGMOID_DOMAIN, PWL_DESIGN_THRESHOLD, PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
-            make_gna_pwl(activation_type, pwl, -SIGMOID_DOMAIN, SIGMOID_DOMAIN, scale_in, scale_out, ptr_segment);
+            if ( pwl_search_map.find("sigmoid") == pwl_search_map.end() ) {
+                pwl = pwl_search(kActSigmoid, -SIGMOID_DOMAIN, SIGMOID_DOMAIN, PWL_DESIGN_THRESHOLD, PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
+            } else {
+                pwl = pwl_search_map["sigmoid"];
+            }
+
+            make_gna_pwl(activation_type, pwl, -SIGMOID_DOMAIN, SIGMOID_DOMAIN, scale_in, scale_out, ptr_segment, n);
            break;
        case kActTanh:
-            pwl = pwl_search(kActTanh, -TANH_DOMAIN, TANH_DOMAIN, PWL_DESIGN_THRESHOLD, PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
-            make_gna_pwl(activation_type, pwl, -TANH_DOMAIN, TANH_DOMAIN, scale_in, scale_out, ptr_segment);
+            if ( pwl_search_map.find("tanh") == pwl_search_map.end() ) {
+                pwl = pwl_search(kActTanh, -TANH_DOMAIN, TANH_DOMAIN, PWL_DESIGN_THRESHOLD, PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
+            } else {
+                pwl = pwl_search_map["tanh"];
+            }
+            make_gna_pwl(activation_type, pwl, -TANH_DOMAIN, TANH_DOMAIN, scale_in, scale_out, ptr_segment, n);
            break;
        case kActRelu:
-            make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment);
+            make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment, n);
            break;
        case kActLeakyRelu:
-            make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment);
+            make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment, n);
            break;
        case kActIdentity:
-            make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment);
+            make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment, n);
            break;
        case kActKaldiLstmClipping:
-            make_gna_pwl(activation_type, pwl, KALDI_LSTM_CLIP_LOWER, KALDI_LSTM_CLIP_UPPER, scale_in, scale_out, ptr_segment);
+            make_gna_pwl(activation_type, pwl, KALDI_LSTM_CLIP_LOWER, KALDI_LSTM_CLIP_UPPER, scale_in, scale_out, ptr_segment, n);
            break;
+        case kActDivByN: {
+            int32_t n_4 = n/4;
+	    make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment, n_4);
+            break;
+	}
+        case kActLog: {
+            double x_min = (1 + ~XBASEMASK) / scale_in;
+            double x_max = ((INT32_MAX / scale_in) < LOG_DOMAIN) ? (INT32_MAX / scale_in) : LOG_DOMAIN;
+            if ( pwl_search_map.find("log") == pwl_search_map.end() ) {
+                pwl = pwl_search(kActLog, x_min, x_max, PWL_DESIGN_THRESHOLD, 0.066*PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
+            } else {
+                pwl = pwl_search_map["log"];
+            }
+            make_gna_pwl(activation_type, pwl, x_min, x_max, scale_in, scale_out, ptr_segment, n);
+            break;
+        }
+        case kActExp: {
+            double x_min = -log(scale_out);
+            double x_max = x_min + log(INT16_MAX);
+            if ( pwl_search_map.find("exp") == pwl_search_map.end() ) {
+                pwl = pwl_search(kActExp, x_min, x_max, PWL_DESIGN_THRESHOLD, 0.5*PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
+            } else {
+                pwl = pwl_search_map["exp"];
+            }
+            make_gna_pwl(activation_type, pwl, x_min, x_max, scale_in, scale_out, ptr_segment, n);
+            break;
+        }
+        case kActNegHalfLog: {
+            double x_min = (1 + ~XBASEMASK) / scale_in;
+            double x_max = ((INT32_MAX / scale_in) < LOG_DOMAIN) ? (INT32_MAX / scale_in) : LOG_DOMAIN;
+            if ( pwl_search_map.find("NegHalfLog") == pwl_search_map.end() ) {
+                pwl = pwl_search(kActNegHalfLog, x_min, x_max, PWL_DESIGN_THRESHOLD, 0.066*PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
+                pwl = negative_pwl(pwl);
+            } else {
+                pwl = pwl_search_map["NegHalfLog"];
+            }
+
+            make_gna_pwl(activation_type, pwl, x_min, x_max, scale_in, scale_out, ptr_segment, n);
+            break;
+       }
        default:
            break;
    }
@@ -359,7 +587,8 @@ void PwlDesign16(const DnnActivation activation_type,
                 intel_pwl_segment_t *ptr_segment,
                 const uint32_t num_segments,
                 const float scale_in,
-                 const float scale_out) {
+                 const float scale_out,
+                 const uint32_t n) {
    switch (activation_type) {
        case kActSigmoid:
           {
@@ -651,6 +880,27 @@ void PwlApply32(intel_dnn_component_t *component,
                }
            }
            break;
+        case kActDivByN:
+            for (uint32_t i = num_row_start; i <= num_row_end; i++) {
+                for (uint32_t j = num_col_start; j <= num_col_end; j++) {
+                    ptr_out[i * num_columns + j] = ptr_in[i * num_columns + j]/(float)(num_row_end-num_row_start+1);
+                }
+            }
+            break;
+        case kActExp:
+            for (uint32_t i = num_row_start; i <= num_row_end; i++) {
+                for (uint32_t j = num_col_start; j <= num_col_end; j++) {
+                    ptr_out[i * num_columns + j] = exp(ptr_in[i * num_columns + j]);
+                }
+            }
+            break;
+        case kActLog:
+            for (uint32_t i = num_row_start; i <= num_row_end; i++) {
+                for (uint32_t j = num_col_start; j <= num_col_end; j++) {
+                    ptr_out[i * num_columns + j] = log(ptr_in[i * num_columns + j]);
+                }
+            }
+            break;
        case kActCustom:
            // break;
        default:fprintf(stderr, "Unknown piecewise linear function type!\n");
--- a/inference-engine/src/gna_plugin/runtime/pwl.h
+++ b/inference-engine/src/gna_plugin/runtime/pwl.h
@@ -27,6 +27,8 @@
 #define XBASEMASK 0xFFFFFFFC  // only top 30 bits are used
 #define KALDI_LSTM_CLIP_LOWER (-50.0)
 #define KALDI_LSTM_CLIP_UPPER (50.0)
+#define LOG_DOMAIN (2981.0)
+#define EXP_DOMAIN (8.0)

 typedef struct {
    double t;
@@ -88,8 +90,10 @@ void PwlDesign16(const DnnActivation activation_type,
                 intel_pwl_segment_t *ptr_segment,
                 const uint32_t num_segments,
                 const float scale_in,
-                 const float scale_out);
+                 const float scale_out,
+                 const uint32_t n);
 void PwlDesignOpt16(const DnnActivation activation_type,
                std::vector<intel_pwl_segment_t> &ptr_segment,
                const float scale_in,
-                const float scale_out);
+                const float scale_out,
+                const uint32_t n);
--- a/inference-engine/src/inference_engine/CMakeLists.txt
+++ b/inference-engine/src/inference_engine/CMakeLists.txt
@@ -14,17 +14,13 @@ set(builder_files_src ${CMAKE_CURRENT_SOURCE_DIR}/builders/ie_layer_decorator.cp
                      ${CMAKE_CURRENT_SOURCE_DIR}/builders/ie_const_layer.cpp
                      ${CMAKE_CURRENT_SOURCE_DIR}/builders/ie_split_layer.cpp
                      ${CMAKE_CURRENT_SOURCE_DIR}/builders/ie_pooling_layer.cpp
+                      ${CMAKE_CURRENT_SOURCE_DIR}/builders/ie_divbyn_layer.cpp
                      ${CMAKE_CURRENT_SOURCE_DIR}/builders/ie_network_builder.cpp)

 file (GLOB LIBRARY_SRC
        ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/low_precision_transformations/*.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/low_precision_transformations/common/*.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/transform/*.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/transform/transformations/*.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/transform/transformations/fusion/*.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/transform/transformations/utils/*.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/ngraph_ops/*.cpp
+	${CMAKE_CURRENT_SOURCE_DIR}/low_precision_transformations/common/*.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/cpp_interfaces/*.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/shape_infer/*.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/shape_infer/built-in/*.cpp
@@ -55,6 +51,7 @@ file (GLOB LIBRARY_HEADERS
      )

 if (NOT ENABLE_NGRAPH)
+	message(error " removing enable ngraph")
    list(REMOVE_ITEM LIBRARY_SRC "${CMAKE_CURRENT_SOURCE_DIR}/ie_cnn_layer_builder_ngraph.cpp"
        "${CMAKE_CURRENT_SOURCE_DIR}/cnn_network_ngraph_impl.cpp"
        "${CMAKE_CURRENT_SOURCE_DIR}/ie_ir_parser.cpp"
@@ -318,7 +315,7 @@ install(TARGETS ${TARGET_NAME} ${TARGET_NAME}_nn_builder
        LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH}
        COMPONENT core)
 install(FILES "${OpenVINO_BINARY_DIR}/share/ie_parallel.cmake"
-              "${OpenVINO_BINARY_DIR}/share/ie_rh_decoder.cmake"
+              #"${OpenVINO_BINARY_DIR}/share/ie_rh_decoder.cmake"
              "${OpenVINO_BINARY_DIR}/share/InferenceEngineConfig.cmake"
              "${OpenVINO_BINARY_DIR}/share/InferenceEngineConfig-version.cmake"
        DESTINATION ${IE_CPACK_IE_DIR}/share
--- a/inference-engine/src/inference_engine/builders/ie_divbyn_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_divbyn_layer.cpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <builders/ie_divbyn_layer.hpp>
+
+#include <string>
+
+using namespace InferenceEngine;
+
+Builder::DivByNLayer::DivByNLayer(const std::string& name): LayerDecorator("DivByN", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
+}
+
+Builder::DivByNLayer::DivByNLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("DivByN");
+}
+
+Builder::DivByNLayer::DivByNLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("DivByN");
+}
+
+Builder::DivByNLayer& Builder::DivByNLayer::setName(const std::string& name) {
+    getLayer()->setName(name);
+    return *this;
+}
+
+const Port& Builder::DivByNLayer::getPort() const {
+    return getLayer()->getOutputPorts()[0];
+}
+
+Builder::DivByNLayer& Builder::DivByNLayer::setPort(const Port &port) {
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
+    return *this;
+}
--- a/inference-engine/src/inference_engine/builders/ie_exp_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_exp_layer.cpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <builders/ie_exp_layer.hpp>
+
+#include <string>
+
+using namespace InferenceEngine;
+
+Builder::ExpLayer::ExpLayer(const std::string& name): LayerDecorator("Exp", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
+}
+
+Builder::ExpLayer::ExpLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Exp");
+}
+
+Builder::ExpLayer::ExpLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Exp");
+}
+
+Builder::ExpLayer& Builder::ExpLayer::setName(const std::string& name) {
+    getLayer()->setName(name);
+    return *this;
+}
+
+const Port& Builder::ExpLayer::getPort() const {
+    return getLayer()->getOutputPorts()[0];
+}
+
+Builder::ExpLayer& Builder::ExpLayer::setPort(const Port &port) {
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
+    return *this;
+}
--- a/inference-engine/src/inference_engine/builders/ie_identity_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_identity_layer.cpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <builders/ie_identity_layer.hpp>
+
+#include <string>
+
+using namespace InferenceEngine;
+
+Builder::IdentityLayer::IdentityLayer(const std::string& name): LayerDecorator("Identity", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
+}
+
+Builder::IdentityLayer::IdentityLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Identity");
+}
+
+Builder::IdentityLayer::IdentityLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Identity");
+}
+
+Builder::IdentityLayer& Builder::IdentityLayer::setName(const std::string& name) {
+    getLayer()->setName(name);
+    return *this;
+}
+
+const Port& Builder::IdentityLayer::getPort() const {
+    return getLayer()->getOutputPorts()[0];
+}
+
+Builder::IdentityLayer& Builder::IdentityLayer::setPort(const Port &port) {
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
+    return *this;
+}
--- a/inference-engine/src/inference_engine/builders/ie_log_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_log_layer.cpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <builders/ie_log_layer.hpp>
+
+#include <string>
+
+using namespace InferenceEngine;
+
+Builder::LogLayer::LogLayer(const std::string& name): LayerDecorator("Log", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
+}
+
+Builder::LogLayer::LogLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Log");
+}
+
+Builder::LogLayer::LogLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Log");
+}
+
+Builder::LogLayer& Builder::LogLayer::setName(const std::string& name) {
+    getLayer()->setName(name);
+    return *this;
+}
+
+const Port& Builder::LogLayer::getPort() const {
+    return getLayer()->getOutputPorts()[0];
+}
+
+Builder::LogLayer& Builder::LogLayer::setPort(const Port &port) {
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
+    return *this;
+}
--- a/inference-engine/src/inference_engine/builders/ie_neghalf_log_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_neghalf_log_layer.cpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <builders/ie_neghalf_log_layer.hpp>
+
+#include <string>
+
+using namespace InferenceEngine;
+
+Builder::NegHalfLogLayer::NegHalfLogLayer(const std::string& name): LayerDecorator("NegHalfLog", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
+}
+
+Builder::NegHalfLogLayer::NegHalfLogLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("NegHalfLog");
+}
+
+Builder::NegHalfLogLayer::NegHalfLogLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("NegHalfLog");
+}
+
+Builder::NegHalfLogLayer& Builder::NegHalfLogLayer::setName(const std::string& name) {
+    getLayer()->setName(name);
+    return *this;
+}
+
+const Port& Builder::NegHalfLogLayer::getPort() const {
+    return getLayer()->getOutputPorts()[0];
+}
+
+Builder::NegHalfLogLayer& Builder::NegHalfLogLayer::setPort(const Port &port) {
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
+    return *this;
+}
--- a/inference-engine/src/inference_engine/builders/ie_network_builder.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_network_builder.cpp
@@ -269,13 +269,14 @@ idx_t Builder::Network::addLayer(const std::vector<PortInfo>& inputs, const Laye
 }

 idx_t Builder::Network::addLayer(const Layer& layer) {
+    auto &layerParam = parameters["layers"].as<std::vector<Layer::Ptr>>();
    auto getAvailableId = [&](idx_t defaultId) {
        if (defaultId == (std::numeric_limits<idx_t>::max)()) defaultId = 0;

-        auto it = parameters["layers"].as<std::vector<Layer::Ptr>>().begin();
-        while (it != parameters["layers"].as<std::vector<Layer::Ptr>>().end()) {
-            for (it = parameters["layers"].as<std::vector<Layer::Ptr>>().begin();
-                 it != parameters["layers"].as<std::vector<Layer::Ptr>>().end(); it++) {
+        auto it = layerParam.begin();
+        while (it != layerParam.end()) {
+            for (it = layerParam.begin();
+                 it != layerParam.end(); it++) {
                if ((*it)->getId() == defaultId) {
                    defaultId++;
                    break;
@@ -302,10 +303,8 @@ idx_t Builder::Network::addLayer(const Layer& layer) {
    };
    idx_t generatedId = getAvailableId(layer.getId());
    const auto name = generateAvailableName(layer.getName(), generatedId);
-    parameters["layers"].as<std::vector<Layer::Ptr>>().emplace_back(std::make_shared<Layer>(generatedId, layer));
-    parameters["layers"]
-        .as<std::vector<Layer::Ptr>>()[parameters["layers"].as<std::vector<Layer::Ptr>>().size() - 1]
-        ->setName(name);
+    layerParam.emplace_back(std::make_shared<Layer>(generatedId, layer));
+    layerParam[layerParam.size() - 1]->setName(name);
    return generatedId;
 }

--- a/inference-engine/src/inference_engine/builders/ie_network_builder_converter.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_network_builder_converter.cpp
@@ -81,6 +81,11 @@ public:
                {"elu", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("ELU")},
                {"sigmoid", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("Sigmoid")},
                {"tanh", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("TanH")},
+                {"neghalflog", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("NegHalfLog")},
+                {"log", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("Log")},
+                {"divbyn", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("DivByN")},
+                {"exp", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("exp")},
+                {"identity", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("identity")},
        };

        auto typeIt = layer->getParameters().find("type");
--- a/inference-engine/src/inference_engine/ie_cnn_net_reader_impl.cpp
+++ b/inference-engine/src/inference_engine/ie_cnn_net_reader_impl.cpp
@@ -13,7 +13,9 @@
 #include <utility>
 #include <vector>

+#if defined(ENABLE_NGRAPH)
 #include "cnn_network_ngraph_impl.hpp"
+#endif
 #include "debug.h"
 #include "details/os/os_filesystem.hpp"
 #include "ie_format_parser.h"
--- a/inference-engine/src/inference_engine/ie_core.cpp
+++ b/inference-engine/src/inference_engine/ie_core.cpp
@@ -15,7 +15,9 @@
 #include <utility>
 #include <vector>

+#if defined(ENABLE_NGRAPH)
 #include <ngraph/opsets/opset.hpp>
+#endif
 #include "cpp_interfaces/base/ie_plugin_base.hpp"
 #include "details/caseless.hpp"
 #include "details/ie_exception_conversion.hpp"
@@ -232,7 +234,6 @@ public:
     */
    InferencePlugin GetCPPPluginByName(const std::string& deviceName) const {
        IE_SUPPRESS_DEPRECATED_START
-
        auto it = pluginRegistry.find(deviceName);
        if (it == pluginRegistry.end()) {
            THROW_IE_EXCEPTION << "Device with \"" << deviceName << "\" name is not registered in the InferenceEngine";
@@ -368,6 +369,7 @@ public:
    }

    void addExtension(const IExtensionPtr& extension) {
+#if defined(ENABLE_NGRAPH)
        std::map<std::string, ngraph::OpSet> opsets;
        try {
            opsets = extension->getOpSets();
@@ -377,6 +379,7 @@ public:
                THROW_IE_EXCEPTION << "Cannot add opset with name: " << it.first << ". Opset with the same name already exists.";
            opsetNames.insert(it.first);
        }
+#endif
        extensions.emplace_back(extension);
    }

--- a/inference-engine/src/inference_engine/ie_layer_parsers.cpp
+++ b/inference-engine/src/inference_engine/ie_layer_parsers.cpp
@@ -38,6 +38,11 @@ CNNLayer::Ptr ActivationLayerCreator::CreateLayer(pugi::xml_node& node, LayerPar
        {"clamp", std::make_shared<LayerCreator<ClampLayer>>("Clamp")},
        {"elu", std::make_shared<LayerCreator<CNNLayer>>("ELU")},
        {"sigmoid", std::make_shared<LayerCreator<CNNLayer>>("Sigmoid")},
+        {"log", std::make_shared<LayerCreator<CNNLayer>>("Log")},
+        {"neghalflog", std::make_shared<LayerCreator<CNNLayer>>("NegHalfLog")},
+        {"divbyn", std::make_shared<LayerCreator<CNNLayer>>("DivByN")},
+        {"identity", std::make_shared<LayerCreator<CNNLayer>>("Identity")},
+        {"exp", std::make_shared<LayerCreator<CNNLayer>>("Exp")},
        {"tanh", std::make_shared<LayerCreator<CNNLayer>>("TanH")},
    };

--- a/inference-engine/src/inference_engine/ie_rtti.cpp
+++ b/inference-engine/src/inference_engine/ie_rtti.cpp
@@ -70,6 +70,13 @@ FillLayer::~FillLayer() {}
 SelectLayer::~SelectLayer() {}
 BroadcastLayer::~BroadcastLayer() {}
 QuantizeLayer::~QuantizeLayer() {}
+SigmoidLayer::~SigmoidLayer() {}
+DivByNLayer::~DivByNLayer() {}
+LogLayer::~LogLayer() {}
+IdentityLayer::~IdentityLayer() {}
+NegHalfLogLayer::~NegHalfLogLayer() {}
+ExpLayer::~ExpLayer() {}
+TanHLayer::~TanHLayer() {}
 MathLayer::~MathLayer() {}
 ReduceLayer::~ReduceLayer() {}
 TopKLayer::~TopKLayer() {}
--- a/inference-engine/src/inference_engine/ie_util_internal.cpp
+++ b/inference-engine/src/inference_engine/ie_util_internal.cpp
@@ -134,6 +134,12 @@ CNNLayerPtr clonelayer(const CNNLayer& source) {
                                   &layerCloneImpl<QuantizeLayer>,
                                   &layerCloneImpl<BinaryConvolutionLayer>,
                                   &layerCloneImpl<WeightableLayer>,
+                                   &layerCloneImpl<TanHLayer>,
+             			   &layerCloneImpl<LogLayer>,
+                                   &layerCloneImpl<NegHalfLogLayer>,
+				   &layerCloneImpl<IdentityLayer>,
+				   &layerCloneImpl<DivByNLayer>,
+                                   &layerCloneImpl<SigmoidLayer>,
                                   &layerCloneImpl<OneHotLayer>,
                                   &layerCloneImpl<CNNLayer>,
                                   &layerCloneImpl<UniqueLayer>};
--- a/inference-engine/src/inference_engine/layer_transform.hpp
+++ b/inference-engine/src/inference_engine/layer_transform.hpp
@@ -31,7 +31,7 @@ using AllLayers =
               ReshapeLayer*, TileLayer*, ScaleShiftLayer*, PReLULayer*, PowerLayer*, BatchNormalizationLayer*,
               ClampLayer*, TensorIterator*, LSTMCell*, GRUCell*, RNNCell*, RNNSequenceLayer*, QuantizeLayer*,
               BinaryConvolutionLayer*, WeightableLayer*, OneHotLayer*, MathLayer*, ReduceLayer*, UniqueLayer*,
-               NonMaxSuppressionLayer*, ScatterLayer*, CNNLayer*>;
+               NonMaxSuppressionLayer*, ScatterLayer*, TanHLayer*, SigmoidLayer*, LogLayer*, NegHalfLogLayer*,DivByNLayer*, IdentityLayer*, ExpLayer*, CNNLayer*>;

 /**
 * @brief checks whether type inxed as P has a parent among element in range I..N
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_built_in_holder.cpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_built_in_holder.cpp
@@ -125,6 +125,10 @@ REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, ELU);
 REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, TanH);
 REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, Logistic);
 REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, Sigmoid);
+REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, DivByN);
+REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, Identity);
+REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, Log);
+REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, NegHalfLog);
 REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, PReLU);
 REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, SoftMax);
 REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, LogSoftMax);
@@ -219,7 +223,6 @@ REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Cosh);
 REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Erf);
 REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Floor);
 REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, HardSigmoid);
-REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Log);
 REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Exp);
 REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Neg);
 REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Reciprocal);
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/other_tests/add_output.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/other_tests/add_output.cpp
@@ -0,0 +1,16 @@
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "other/add_output.hpp"
+
+const auto addOutputParams =
+    ::testing::Combine(::testing::Values("Memory_1"), ::testing::Values(CommonTestUtils::DEVICE_CPU));
+
+INSTANTIATE_TEST_CASE_P(AddOutputBasic, AddOutputTestsCommonClass, addOutputParams,
+                        AddOutputTestsCommonClass::getTestCaseName);
+
+TEST_P(AddOutputTestsCommonClass, basic) {
+    run_test();
+}
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/other_tests/add_output.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/other_tests/add_output.cpp
@@ -0,0 +1,15 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "other/add_output.hpp"
+
+const auto addOutputParams =
+    ::testing::Combine(::testing::Values("Memory_1"), ::testing::Values(CommonTestUtils::DEVICE_GNA));
+
+INSTANTIATE_TEST_CASE_P(AddOutputBasic, AddOutputTestsCommonClass, addOutputParams,
+                        AddOutputTestsCommonClass::getTestCaseName);
+
+TEST_P(AddOutputTestsCommonClass, basic) {
+    run_test();
+}
--- a/inference-engine/tests/functional/plugin/shared/include/other/add_output.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/other/add_output.hpp
@@ -0,0 +1,26 @@
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <map>
+
+#include "common_test_utils/common_layers_params.hpp"
+#include "common_test_utils/common_utils.hpp"
+#include "common_test_utils/test_common.hpp"
+#include "common_test_utils/test_constants.hpp"
+#include "common_test_utils/xml_net_builder/ir_net.hpp"
+#include "common_test_utils/xml_net_builder/xml_filler.hpp"
+#include "ie_core.hpp"
+
+class AddOutputTestsCommonClass : public CommonTestUtils::TestsCommon,
+                                  public testing::WithParamInterface<std::tuple<std::string, std::string>> {
+private:
+    static std::string generate_model();
+
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<std::tuple<std::string, std::string>> obj);
+    void run_test();
+};
--- a/inference-engine/tests/functional/plugin/shared/src/other/add_output.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/other/add_output.cpp
@@ -0,0 +1,76 @@
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "other/add_output.hpp"
+
+// TODO: Replace IRBuilder with NGraph when it supports Memory Layer
+std::string AddOutputTestsCommonClass::generate_model() {
+    CommonTestUtils::IRBuilder_v6 test_model_builder("model");
+
+    auto precision = InferenceEngine::Precision::FP32;
+
+    auto Memory_1_layer =
+        test_model_builder.AddLayer("Memory_1", "Memory", precision, {{"id", "r_1-3"}, {"index", "1"}, {"size", "2"}})
+            .AddOutPort({1, 200})
+            .getLayer();
+    auto Input_2_layer = test_model_builder.AddLayer("Input_2", "input", precision).AddOutPort({1, 200}).getLayer();
+    auto Eltwise_3_layer = test_model_builder.AddLayer("Eltwise_3", "Eltwise", precision, {{"operation", "mul"}})
+                               .AddInPort({1, 200})
+                               .AddInPort({1, 200})
+                               .AddOutPort({1, 200})
+                               .getLayer();
+
+    auto Activation_4_layer =
+        test_model_builder.AddLayer("Activation_4", "Activation", precision, {{"type", "sigmoid"}})
+            .AddInPort({1, 200})
+            .AddOutPort({1, 200})
+            .getLayer();
+    auto Memory_5_layer =
+        test_model_builder.AddLayer("Memory_5", "Memory", precision, {{"id", "r_1-3"}, {"index", "0"}, {"size", "2"}})
+            .AddInPort({1, 200})
+            .getLayer();
+
+    test_model_builder.AddEdge(Memory_1_layer.out(0), Eltwise_3_layer.in(0));
+    test_model_builder.AddEdge(Input_2_layer.out(0), Eltwise_3_layer.in(1));
+    test_model_builder.AddEdge(Eltwise_3_layer.out(0), Activation_4_layer.in(0));
+    test_model_builder.AddEdge(Activation_4_layer.out(0), Memory_5_layer.in(0));
+
+    auto serial = test_model_builder.serialize();
+
+    return serial;
+}
+
+std::string AddOutputTestsCommonClass::getTestCaseName(
+    testing::TestParamInfo<std::tuple<std::string, std::string>> obj) {
+    std::string layer;
+    std::string engine;
+
+    std::tie(layer, engine) = obj.param;
+    return layer + "_" + engine;
+}
+
+void AddOutputTestsCommonClass::run_test() {
+    std::string layer_name;
+    std::string engine_type;
+
+    std::tie(layer_name, engine_type) = this->GetParam();
+
+    auto model = this->generate_model();
+
+    InferenceEngine::Core ie;
+    InferenceEngine::CNNNetwork network;
+    InferenceEngine::ExecutableNetwork executableNet;
+
+    auto null_blob = CommonTestUtils::getWeightsBlob(0);
+    network = ie.ReadNetwork(model, null_blob);
+    network.addOutput(layer_name);
+    executableNet = ie.LoadNetwork(network, engine_type);
+
+    auto outputs = executableNet.GetOutputsInfo();
+
+    auto layer_output = outputs[layer_name];
+
+    ASSERT_EQ(true, layer_output && "layer not found in outputs");
+}
--- a/inference-engine/thirdparty/CMakeLists.txt
+++ b/inference-engine/thirdparty/CMakeLists.txt
@@ -36,7 +36,6 @@ function(build_with_lto)
    endif()

    add_subdirectory(pugixml)
-    add_subdirectory(stb_lib)
    add_subdirectory(ade)
    add_subdirectory(fluid/modules/gapi)

--- a/inference-engine/thirdparty/pugixml/CMakeLists.txt
+++ b/inference-engine/thirdparty/pugixml/CMakeLists.txt
@@ -36,7 +36,7 @@ if(DEFINED BUILD_DEFINES)
 	endforeach()
 endif()

-if(BUILD_SHARED_LIBS)
+if(NOT BUILD_SHARED_LIBS)
 	add_library(pugixml SHARED ${SOURCES})
 else()
 	add_library(pugixml STATIC ${SOURCES})
--- a/inference-engine/thirdparty/stb_lib/CMakeLists.txt
+++ b/inference-engine/thirdparty/stb_lib/CMakeLists.txt
@@ -1,20 +0,0 @@
-#===============================================================================
-# Copyright (C) 2018-2019 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
-
-set(TARGET stb_image)
-
-add_library(${TARGET} STATIC stb_image.cpp)
-target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
--- a/inference-engine/thirdparty/stb_lib/stb_image.cpp
+++ b/inference-engine/thirdparty/stb_lib/stb_image.cpp
@@ -1,11 +0,0 @@
-
-#define STB_IMAGE_IMPLEMENTATION
-#include "stb_image.h"
-
-#define STB_IMAGE_RESIZE_IMPLEMENTATION
-#define STBIR_DEFAULT_FILTER_UPSAMPLE STBIR_FILTER_BOX
-#define STBIR_DEFAULT_FILTER_DOWNSAMPLE STBIR_FILTER_BOX
-#include "stb_image_resize.h"
-
-#define STB_IMAGE_WRITE_IMPLEMENTATION
-#include "stb_image_write.h"
--- a/inference-engine/thirdparty/stb_lib/stb_image.h
+++ b/inference-engine/thirdparty/stb_lib/stb_image.h
--- a/inference-engine/thirdparty/stb_lib/stb_image_resize.h
+++ b/inference-engine/thirdparty/stb_lib/stb_image_resize.h
--- a/inference-engine/thirdparty/stb_lib/stb_image_write.h
+++ b/inference-engine/thirdparty/stb_lib/stb_image_write.h