Compare commits
2 Commits
show
...
releases/2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a4a1bff1cc | ||
|
|
83964338b0 |
@@ -139,7 +139,7 @@ if(WIN32)
|
||||
else()
|
||||
# TODO: enable for C sources as well
|
||||
# ie_add_compiler_flags(-Werror)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error")
|
||||
ie_add_compiler_flags(-ffunction-sections -fdata-sections)
|
||||
ie_add_compiler_flags(-fvisibility=hidden)
|
||||
ie_add_compiler_flags(-fdiagnostics-show-option)
|
||||
|
||||
@@ -85,11 +85,15 @@ add_cpplint_report_target()
|
||||
ie_cpack_add_component(cpp_samples REQUIRED DEPENDS core)
|
||||
|
||||
if(UNIX)
|
||||
install(DIRECTORY samples/
|
||||
DESTINATION ${IE_CPACK_IE_DIR}/samples/cpp
|
||||
COMPONENT cpp_samples
|
||||
USE_SOURCE_PERMISSIONS
|
||||
PATTERN *.bat EXCLUDE)
|
||||
if (${TARGET_OS} STREQUAL "CHROMEOS")
|
||||
message(STATUS " Skipping cpp samples")
|
||||
else()
|
||||
install(DIRECTORY samples/
|
||||
DESTINATION ${IE_CPACK_IE_DIR}/samples/cpp
|
||||
COMPONENT cpp_samples
|
||||
USE_SOURCE_PERMISSIONS
|
||||
PATTERN *.bat EXCLUDE)
|
||||
endif()
|
||||
elseif(WIN32)
|
||||
install(DIRECTORY samples
|
||||
DESTINATION ${IE_CPACK_IE_DIR}/samples/cpp
|
||||
@@ -103,31 +107,42 @@ endif()
|
||||
ie_cpack_add_component(c_samples REQUIRED DEPENDS core)
|
||||
|
||||
if(UNIX)
|
||||
install(PROGRAMS samples/build_samples.sh
|
||||
DESTINATION ${IE_CPACK_IE_DIR}/samples/c
|
||||
COMPONENT c_samples)
|
||||
if (${TARGET_OS} STREQUAL "CHROMEOS")
|
||||
message(STATUS " Skipping cpp samples")
|
||||
else()
|
||||
install(PROGRAMS samples/build_samples.sh
|
||||
DESTINATION ${IE_CPACK_IE_DIR}/samples/c
|
||||
COMPONENT c_samples)
|
||||
endif()
|
||||
elseif(WIN32)
|
||||
install(PROGRAMS samples/build_samples_msvc.bat
|
||||
DESTINATION ${IE_CPACK_IE_DIR}/samples/c
|
||||
COMPONENT c_samples)
|
||||
endif()
|
||||
|
||||
install(DIRECTORY ie_bridges/c/samples/
|
||||
DESTINATION ${IE_CPACK_IE_DIR}/samples/c
|
||||
COMPONENT c_samples
|
||||
PATTERN ie_bridges/c/samples/CMakeLists.txt EXCLUDE)
|
||||
if (${TARGET_OS} STREQUAL "CHROMEOS")
|
||||
message(STATUS " Skipping ie_bridges c/cpp samples")
|
||||
else()
|
||||
install(DIRECTORY ie_bridges/c/samples/
|
||||
DESTINATION ${IE_CPACK_IE_DIR}/samples/c
|
||||
COMPONENT c_samples
|
||||
PATTERN ie_bridges/c/samples/CMakeLists.txt EXCLUDE)
|
||||
|
||||
install(FILES samples/CMakeLists.txt
|
||||
DESTINATION ${IE_CPACK_IE_DIR}/samples/c
|
||||
COMPONENT c_samples)
|
||||
install(FILES samples/CMakeLists.txt
|
||||
DESTINATION ${IE_CPACK_IE_DIR}/samples/c
|
||||
COMPONENT c_samples)
|
||||
endif()
|
||||
|
||||
# install Python samples
|
||||
if (${TARGET_OS} STREQUAL "CHROMEOS")
|
||||
message(STATUS "Skipping python samples")
|
||||
else()
|
||||
ie_cpack_add_component(python_samples REQUIRED DEPENDS core)
|
||||
|
||||
ie_cpack_add_component(python_samples REQUIRED DEPENDS core)
|
||||
|
||||
install(DIRECTORY ${ie_python_api_SOURCE_DIR}/sample/
|
||||
DESTINATION ${IE_CPACK_IE_DIR}/samples/python
|
||||
COMPONENT python_samples)
|
||||
install(DIRECTORY ${ie_python_api_SOURCE_DIR}/sample/
|
||||
DESTINATION ${IE_CPACK_IE_DIR}/samples/python
|
||||
COMPONENT python_samples)
|
||||
endif()
|
||||
|
||||
# Custom target to build only Inference Engine Developer Package targets
|
||||
|
||||
|
||||
@@ -35,7 +35,9 @@ else()
|
||||
set(GNA_LIB_DIR x64 CACHE STRING "" FORCE)
|
||||
set(libGNA_INCLUDE_DIRS "${GNA}/include" CACHE STRING "" FORCE)
|
||||
endif()
|
||||
set(libGNA_LIBRARIES_BASE_PATH ${GNA}/${GNA_PLATFORM_DIR}/${GNA_LIB_DIR} CACHE STRING "" FORCE)
|
||||
#set(libGNA_LIBRARIES_BASE_PATH ${GNA}/${GNA_PLATFORM_DIR}/${GNA_LIB_DIR} CACHE STRING "" FORCE)
|
||||
# Ebuild fails to copy x64 named folder during build process
|
||||
set(libGNA_LIBRARIES_BASE_PATH ${GNA}/${GNA_PLATFORM_DIR} CACHE STRING "" FORCE)
|
||||
|
||||
add_library(libGNA::KERNEL SHARED IMPORTED)
|
||||
find_library(GNA_KERNEL_LIBRARY
|
||||
|
||||
@@ -49,26 +49,31 @@ endif ()
|
||||
|
||||
## Intel OMP package
|
||||
if (THREADING STREQUAL "OMP")
|
||||
if (WIN32)
|
||||
RESOLVE_DEPENDENCY(OMP
|
||||
ARCHIVE_WIN "iomp.zip"
|
||||
TARGET_PATH "${TEMP}/omp"
|
||||
ENVIRONMENT "OMP"
|
||||
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
|
||||
elseif(LINUX)
|
||||
RESOLVE_DEPENDENCY(OMP
|
||||
ARCHIVE_LIN "iomp.tgz"
|
||||
TARGET_PATH "${TEMP}/omp"
|
||||
ENVIRONMENT "OMP"
|
||||
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
|
||||
else(APPLE)
|
||||
RESOLVE_DEPENDENCY(OMP
|
||||
ARCHIVE_MAC "iomp_20190130_mac.tgz"
|
||||
TARGET_PATH "${TEMP}/omp"
|
||||
ENVIRONMENT "OMP"
|
||||
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
|
||||
endif()
|
||||
log_rpath_from_dir(OMP "${OMP}/lib")
|
||||
if (NOT DEFINED OMP_DIR)
|
||||
if (WIN32)
|
||||
RESOLVE_DEPENDENCY(OMP
|
||||
ARCHIVE_WIN "iomp.zip"
|
||||
TARGET_PATH "${TEMP}/omp"
|
||||
ENVIRONMENT "OMP"
|
||||
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
|
||||
elseif(LINUX)
|
||||
RESOLVE_DEPENDENCY(OMP
|
||||
ARCHIVE_LIN "iomp.tgz"
|
||||
TARGET_PATH "${TEMP}/omp"
|
||||
ENVIRONMENT "OMP"
|
||||
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
|
||||
else(APPLE)
|
||||
RESOLVE_DEPENDENCY(OMP
|
||||
ARCHIVE_MAC "iomp_20190130_mac.tgz"
|
||||
TARGET_PATH "${TEMP}/omp"
|
||||
ENVIRONMENT "OMP"
|
||||
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
|
||||
endif()
|
||||
log_rpath_from_dir(OMP "${OMP}/lib")
|
||||
else()
|
||||
set(OMP ${IE_MAIN_SOURCE_DIR}/${OMP_DIR})
|
||||
log_rpath_from_dir(OMP "${OMP}/lib")
|
||||
endif()
|
||||
debug_message(STATUS "intel_omp=" ${OMP})
|
||||
endif ()
|
||||
|
||||
@@ -183,22 +188,26 @@ if (ENABLE_GNA)
|
||||
GNA_LIB_DIR
|
||||
libGNA_INCLUDE_DIRS
|
||||
libGNA_LIBRARIES_BASE_PATH)
|
||||
if (GNA_LIBRARY_VERSION STREQUAL "GNA1")
|
||||
RESOLVE_DEPENDENCY(GNA
|
||||
ARCHIVE_UNIFIED "gna_20181120.zip"
|
||||
TARGET_PATH "${TEMP}/gna")
|
||||
else()
|
||||
if(GNA_LIBRARY_VERSION STREQUAL "GNA1_1401")
|
||||
set(GNA_VERSION "01.00.00.1401")
|
||||
endif()
|
||||
if(GNA_LIBRARY_VERSION STREQUAL "GNA2")
|
||||
set(GNA_VERSION "02.00.00.0587")
|
||||
endif()
|
||||
RESOLVE_DEPENDENCY(GNA
|
||||
ARCHIVE_UNIFIED "GNA_${GNA_VERSION}.zip"
|
||||
TARGET_PATH "${TEMP}/gna_${GNA_VERSION}"
|
||||
VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*")
|
||||
endif()
|
||||
if (NOT DEFINED GNA_DIR)
|
||||
if (GNA_LIBRARY_VERSION STREQUAL "GNA1")
|
||||
RESOLVE_DEPENDENCY(GNA
|
||||
ARCHIVE_UNIFIED "gna_20181120.zip"
|
||||
TARGET_PATH "${TEMP}/gna")
|
||||
else()
|
||||
if(GNA_LIBRARY_VERSION STREQUAL "GNA1_1401")
|
||||
set(GNA_VERSION "01.00.00.1401")
|
||||
endif()
|
||||
if(GNA_LIBRARY_VERSION STREQUAL "GNA2")
|
||||
set(GNA_VERSION "02.00.00.0587")
|
||||
endif()
|
||||
RESOLVE_DEPENDENCY(GNA
|
||||
ARCHIVE_UNIFIED "GNA_${GNA_VERSION}.zip"
|
||||
TARGET_PATH "${TEMP}/gna_${GNA_VERSION}"
|
||||
VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*")
|
||||
endif()
|
||||
else()
|
||||
set(GNA ${IE_MAIN_SOURCE_DIR}/${GNA_DIR})
|
||||
endif()
|
||||
debug_message(STATUS "gna=" ${GNA})
|
||||
endif()
|
||||
|
||||
|
||||
@@ -4,29 +4,33 @@
|
||||
|
||||
if (LINUX)
|
||||
function(get_linux_name res_var)
|
||||
if (NOT EXISTS "/etc/lsb-release")
|
||||
execute_process(COMMAND find -L /etc/ -maxdepth 1 -type f -name *-release -exec cat {} \;
|
||||
OUTPUT_VARIABLE release_data RESULT_VARIABLE result)
|
||||
set(name_regex "NAME=\"([^ \"\n]*).*\"\n")
|
||||
set(version_regex "VERSION=\"([0-9]+(\\.[0-9]+)?)[^\n]*\"")
|
||||
else ()
|
||||
#linux version detection using cat /etc/lsb-release
|
||||
file(READ "/etc/lsb-release" release_data)
|
||||
set(name_regex "DISTRIB_ID=([^ \n]*)\n")
|
||||
set(version_regex "DISTRIB_RELEASE=([0-9]+(\\.[0-9]+)?)")
|
||||
endif ()
|
||||
if (NOT DEFINED TARGET_OS)
|
||||
if (NOT EXISTS "/etc/lsb-release")
|
||||
execute_process(COMMAND find -L /etc/ -maxdepth 1 -type f -name *-release -exec cat {} \;
|
||||
OUTPUT_VARIABLE release_data RESULT_VARIABLE result)
|
||||
set(name_regex "NAME=\"([^ \"\n]*).*\"\n")
|
||||
set(version_regex "VERSION=\"([0-9]+(\\.[0-9]+)?)[^\n]*\"")
|
||||
else ()
|
||||
#linux version detection using cat /etc/lsb-release
|
||||
file(READ "/etc/lsb-release" release_data)
|
||||
set(name_regex "DISTRIB_ID=([^ \n]*)\n")
|
||||
set(version_regex "DISTRIB_RELEASE=([0-9]+(\\.[0-9]+)?)")
|
||||
endif ()
|
||||
|
||||
string(REGEX MATCH ${name_regex} name ${release_data})
|
||||
set(os_name ${CMAKE_MATCH_1})
|
||||
string(REGEX MATCH ${name_regex} name ${release_data})
|
||||
set(os_name ${CMAKE_MATCH_1})
|
||||
|
||||
string(REGEX MATCH ${version_regex} version ${release_data})
|
||||
set(os_name "${os_name} ${CMAKE_MATCH_1}")
|
||||
|
||||
if (os_name)
|
||||
set(${res_var} ${os_name} PARENT_SCOPE)
|
||||
else ()
|
||||
set(${res_var} NOTFOUND PARENT_SCOPE)
|
||||
endif ()
|
||||
string(REGEX MATCH ${version_regex} version ${release_data})
|
||||
set(os_name "${os_name} ${CMAKE_MATCH_1}")
|
||||
|
||||
if (os_name)
|
||||
set(${res_var} ${os_name} PARENT_SCOPE)
|
||||
else ()
|
||||
set(${res_var} NOTFOUND PARENT_SCOPE)
|
||||
endif ()
|
||||
else()
|
||||
set(os_name ${TARGET_OS})
|
||||
set(${res_var} ${os_name} PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
endif ()
|
||||
|
||||
55
inference-engine/include/builders/ie_divbyn_layer.hpp
Normal file
55
inference-engine/include/builders/ie_divbyn_layer.hpp
Normal file
@@ -0,0 +1,55 @@
|
||||
// Copyright (C) 2018-2019 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <builders/ie_layer_decorator.hpp>
|
||||
#include <ie_network.hpp>
|
||||
#include <string>
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Builder {
|
||||
|
||||
/**
|
||||
* @brief The class represents a builder for Log layer
|
||||
*/
|
||||
class INFERENCE_ENGINE_API_CLASS(DivByNLayer): public LayerDecorator {
|
||||
public:
|
||||
/**
|
||||
* @brief The constructor creates a builder with the name
|
||||
* @param name Layer name
|
||||
*/
|
||||
explicit DivByNLayer(const std::string& name = "");
|
||||
/**
|
||||
* @brief The constructor creates a builder from generic builder
|
||||
* @param layer pointer to generic builder
|
||||
*/
|
||||
explicit DivByNLayer(const Layer::Ptr& layer);
|
||||
/**
|
||||
* @brief The constructor creates a builder from generic builder
|
||||
* @param layer constant pointer to generic builder
|
||||
*/
|
||||
explicit DivByNLayer(const Layer::CPtr& layer);
|
||||
/**
|
||||
* @brief Sets the name for the layer
|
||||
* @param name Layer name
|
||||
* @return reference to layer builder
|
||||
*/
|
||||
DivByNLayer& setName(const std::string& name);
|
||||
|
||||
/**
|
||||
* @brief Returns port with shapes for the layer
|
||||
* @return Port with shapes
|
||||
*/
|
||||
const Port& getPort() const;
|
||||
/**
|
||||
* @brief Sets port shapes for the layer
|
||||
* @param port Port with shapes
|
||||
* @return reference to layer builder
|
||||
*/
|
||||
DivByNLayer& setPort(const Port& port);
|
||||
};
|
||||
|
||||
} // namespace Builder
|
||||
} // namespace InferenceEngine
|
||||
55
inference-engine/include/builders/ie_exp_layer.hpp
Normal file
55
inference-engine/include/builders/ie_exp_layer.hpp
Normal file
@@ -0,0 +1,55 @@
|
||||
// Copyright (C) 2018-2019 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <builders/ie_layer_decorator.hpp>
|
||||
#include <ie_network.hpp>
|
||||
#include <string>
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Builder {
|
||||
|
||||
/**
|
||||
* @brief The class represents a builder for Log layer
|
||||
*/
|
||||
class INFERENCE_ENGINE_API_CLASS(ExpLayer): public LayerDecorator {
|
||||
public:
|
||||
/**
|
||||
* @brief The constructor creates a builder with the name
|
||||
* @param name Layer name
|
||||
*/
|
||||
explicit ExpLayer(const std::string& name = "");
|
||||
/**
|
||||
* @brief The constructor creates a builder from generic builder
|
||||
* @param layer pointer to generic builder
|
||||
*/
|
||||
explicit ExpLayer(const Layer::Ptr& layer);
|
||||
/**
|
||||
* @brief The constructor creates a builder from generic builder
|
||||
* @param layer constant pointer to generic builder
|
||||
*/
|
||||
explicit ExpLayer(const Layer::CPtr& layer);
|
||||
/**
|
||||
* @brief Sets the name for the layer
|
||||
* @param name Layer name
|
||||
* @return reference to layer builder
|
||||
*/
|
||||
ExpLayer& setName(const std::string& name);
|
||||
|
||||
/**
|
||||
* @brief Returns port with shapes for the layer
|
||||
* @return Port with shapes
|
||||
*/
|
||||
const Port& getPort() const;
|
||||
/**
|
||||
* @brief Sets port shapes for the layer
|
||||
* @param port Port with shapes
|
||||
* @return reference to layer builder
|
||||
*/
|
||||
ExpLayer& setPort(const Port& port);
|
||||
};
|
||||
|
||||
} // namespace Builder
|
||||
} // namespace InferenceEngine
|
||||
55
inference-engine/include/builders/ie_identity_layer.hpp
Normal file
55
inference-engine/include/builders/ie_identity_layer.hpp
Normal file
@@ -0,0 +1,55 @@
|
||||
// Copyright (C) 2018-2019 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <builders/ie_layer_decorator.hpp>
|
||||
#include <ie_network.hpp>
|
||||
#include <string>
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Builder {
|
||||
|
||||
/**
|
||||
* @brief The class represents a builder for Log layer
|
||||
*/
|
||||
class INFERENCE_ENGINE_API_CLASS(IdentityLayer): public LayerDecorator {
|
||||
public:
|
||||
/**
|
||||
* @brief The constructor creates a builder with the name
|
||||
* @param name Layer name
|
||||
*/
|
||||
explicit IdentityLayer(const std::string& name = "");
|
||||
/**
|
||||
* @brief The constructor creates a builder from generic builder
|
||||
* @param layer pointer to generic builder
|
||||
*/
|
||||
explicit IdentityLayer(const Layer::Ptr& layer);
|
||||
/**
|
||||
* @brief The constructor creates a builder from generic builder
|
||||
* @param layer constant pointer to generic builder
|
||||
*/
|
||||
explicit IdentityLayer(const Layer::CPtr& layer);
|
||||
/**
|
||||
* @brief Sets the name for the layer
|
||||
* @param name Layer name
|
||||
* @return reference to layer builder
|
||||
*/
|
||||
IdentityLayer& setName(const std::string& name);
|
||||
|
||||
/**
|
||||
* @brief Returns port with shapes for the layer
|
||||
* @return Port with shapes
|
||||
*/
|
||||
const Port& getPort() const;
|
||||
/**
|
||||
* @brief Sets port shapes for the layer
|
||||
* @param port Port with shapes
|
||||
* @return reference to layer builder
|
||||
*/
|
||||
IdentityLayer& setPort(const Port& port);
|
||||
};
|
||||
|
||||
} // namespace Builder
|
||||
} // namespace InferenceEngine
|
||||
55
inference-engine/include/builders/ie_log_layer.hpp
Normal file
55
inference-engine/include/builders/ie_log_layer.hpp
Normal file
@@ -0,0 +1,55 @@
|
||||
// Copyright (C) 2018-2019 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <builders/ie_layer_decorator.hpp>
|
||||
#include <ie_network.hpp>
|
||||
#include <string>
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Builder {
|
||||
|
||||
/**
|
||||
* @brief The class represents a builder for Log layer
|
||||
*/
|
||||
class INFERENCE_ENGINE_API_CLASS(LogLayer): public LayerDecorator {
|
||||
public:
|
||||
/**
|
||||
* @brief The constructor creates a builder with the name
|
||||
* @param name Layer name
|
||||
*/
|
||||
explicit LogLayer(const std::string& name = "");
|
||||
/**
|
||||
* @brief The constructor creates a builder from generic builder
|
||||
* @param layer pointer to generic builder
|
||||
*/
|
||||
explicit LogLayer(const Layer::Ptr& layer);
|
||||
/**
|
||||
* @brief The constructor creates a builder from generic builder
|
||||
* @param layer constant pointer to generic builder
|
||||
*/
|
||||
explicit LogLayer(const Layer::CPtr& layer);
|
||||
/**
|
||||
* @brief Sets the name for the layer
|
||||
* @param name Layer name
|
||||
* @return reference to layer builder
|
||||
*/
|
||||
LogLayer& setName(const std::string& name);
|
||||
|
||||
/**
|
||||
* @brief Returns port with shapes for the layer
|
||||
* @return Port with shapes
|
||||
*/
|
||||
const Port& getPort() const;
|
||||
/**
|
||||
* @brief Sets port shapes for the layer
|
||||
* @param port Port with shapes
|
||||
* @return reference to layer builder
|
||||
*/
|
||||
LogLayer& setPort(const Port& port);
|
||||
};
|
||||
|
||||
} // namespace Builder
|
||||
} // namespace InferenceEngine
|
||||
55
inference-engine/include/builders/ie_neghalf_log_layer.hpp
Normal file
55
inference-engine/include/builders/ie_neghalf_log_layer.hpp
Normal file
@@ -0,0 +1,55 @@
|
||||
// Copyright (C) 2018-2019 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <builders/ie_layer_decorator.hpp>
|
||||
#include <ie_network.hpp>
|
||||
#include <string>
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Builder {
|
||||
|
||||
/**
|
||||
* @brief The class represents a builder for Log layer
|
||||
*/
|
||||
class INFERENCE_ENGINE_API_CLASS(NegHalfLogLayer): public LayerDecorator {
|
||||
public:
|
||||
/**
|
||||
* @brief The constructor creates a builder with the name
|
||||
* @param name Layer name
|
||||
*/
|
||||
explicit NegHalfLogLayer(const std::string& name = "");
|
||||
/**
|
||||
* @brief The constructor creates a builder from generic builder
|
||||
* @param layer pointer to generic builder
|
||||
*/
|
||||
explicit NegHalfLogLayer(const Layer::Ptr& layer);
|
||||
/**
|
||||
* @brief The constructor creates a builder from generic builder
|
||||
* @param layer constant pointer to generic builder
|
||||
*/
|
||||
explicit NegHalfLogLayer(const Layer::CPtr& layer);
|
||||
/**
|
||||
* @brief Sets the name for the layer
|
||||
* @param name Layer name
|
||||
* @return reference to layer builder
|
||||
*/
|
||||
NegHalfLogLayer& setName(const std::string& name);
|
||||
|
||||
/**
|
||||
* @brief Returns port with shapes for the layer
|
||||
* @return Port with shapes
|
||||
*/
|
||||
const Port& getPort() const;
|
||||
/**
|
||||
* @brief Sets port shapes for the layer
|
||||
* @param port Port with shapes
|
||||
* @return reference to layer builder
|
||||
*/
|
||||
NegHalfLogLayer& setPort(const Port& port);
|
||||
};
|
||||
|
||||
} // namespace Builder
|
||||
} // namespace InferenceEngine
|
||||
@@ -56,7 +56,7 @@ DECLARE_GNA_CONFIG_KEY(SCALE_FACTOR);
|
||||
* currently supported values are I16, I8
|
||||
*/
|
||||
DECLARE_GNA_CONFIG_KEY(PRECISION);
|
||||
|
||||
DECLARE_GNA_CONFIG_KEY(IDENTITY_SCALE_FACTOR);
|
||||
|
||||
/**
|
||||
* @brief if turned on, dump GNA firmware model into specified file
|
||||
|
||||
@@ -50,6 +50,11 @@
|
||||
#include <builders/ie_roi_pooling_layer.hpp>
|
||||
#include <builders/ie_scale_shift_layer.hpp>
|
||||
#include <builders/ie_sigmoid_layer.hpp>
|
||||
#include <builders/ie_log_layer.hpp>
|
||||
#include <builders/ie_neghalf_log_layer.hpp>
|
||||
#include <builders/ie_exp_layer.hpp>
|
||||
#include <builders/ie_divbyn_layer.hpp>
|
||||
#include <builders/ie_identity_layer.hpp>
|
||||
#include <builders/ie_simpler_nms_layer.hpp>
|
||||
#include <builders/ie_softmax_layer.hpp>
|
||||
#include <builders/ie_split_layer.hpp>
|
||||
|
||||
@@ -2079,4 +2079,127 @@ public:
|
||||
virtual ~ScatterLayer();
|
||||
};
|
||||
|
||||
class INFERENCE_ENGINE_API_CLASS(TanHLayer): public CNNLayer {
|
||||
public:
|
||||
/**
|
||||
* @brief A default constructor. Creates a new ReLULayer instance and initializes layer parameters with the given values.
|
||||
* @param prms Initial layer parameters
|
||||
*/
|
||||
//explicit TanHLayer(const LayerParams &prms) : CNNLayer(prms), negative_slope(0.0f) {}
|
||||
|
||||
using CNNLayer::CNNLayer;
|
||||
|
||||
virtual ~TanHLayer();
|
||||
|
||||
/**
|
||||
* @brief Negative slope is used to takle negative inputs instead of setting them to 0
|
||||
*/
|
||||
float negative_slope;
|
||||
};
|
||||
|
||||
class INFERENCE_ENGINE_API_CLASS(SigmoidLayer): public CNNLayer {
|
||||
public:
|
||||
/**
|
||||
* @brief A default constructor. Creates a new ReLULayer instance and initializes layer parameters with the given values.
|
||||
* @param prms Initial layer parameters
|
||||
*/
|
||||
//explicit SigmoidLayer(const LayerParams &prms) : CNNLayer(prms), negative_slope(0.0f) {}
|
||||
|
||||
using CNNLayer::CNNLayer;
|
||||
|
||||
virtual ~SigmoidLayer();
|
||||
|
||||
/**
|
||||
* @brief Negative slope is used to takle negative inputs instead of setting them to 0
|
||||
*/
|
||||
float negative_slope;
|
||||
};
|
||||
|
||||
class INFERENCE_ENGINE_API_CLASS(DivByNLayer) : public CNNLayer {
|
||||
public:
|
||||
/**
|
||||
* @brief A default constructor. Creates a new ReLULayer instance and initializes layer parameters with the given values.
|
||||
* @param prms Initial layer parameters
|
||||
*/
|
||||
// explicit LogLayer(const LayerParams &prms) : CNNLayer(prms), negative_slope(0.0f) {}
|
||||
|
||||
using CNNLayer::CNNLayer;
|
||||
|
||||
virtual ~DivByNLayer();
|
||||
|
||||
/**
|
||||
* @brief Negative slope is used to takle negative inputs instead of setting them to 0
|
||||
*/
|
||||
float negative_slope;
|
||||
};
|
||||
class INFERENCE_ENGINE_API_CLASS(IdentityLayer) : public CNNLayer {
|
||||
public:
|
||||
/**
|
||||
* @brief A default constructor. Creates a new ReLULayer instance and initializes layer parameters with the given values.
|
||||
* @param prms Initial layer parameters
|
||||
*/
|
||||
// explicit LogLayer(const LayerParams &prms) : CNNLayer(prms), negative_slope(0.0f) {}
|
||||
|
||||
using CNNLayer::CNNLayer;
|
||||
|
||||
virtual ~IdentityLayer();
|
||||
|
||||
/**
|
||||
* @brief Negative slope is used to takle negative inputs instead of setting them to 0
|
||||
*/
|
||||
float negative_slope;
|
||||
};
|
||||
class INFERENCE_ENGINE_API_CLASS(ExpLayer) : public CNNLayer {
|
||||
public:
|
||||
/**
|
||||
* @brief A default constructor. Creates a new ReLULayer instance and initializes layer parameters with the given values.
|
||||
* @param prms Initial layer parameters
|
||||
*/
|
||||
// explicit LogLayer(const LayerParams &prms) : CNNLayer(prms), negative_slope(0.0f) {}
|
||||
|
||||
using CNNLayer::CNNLayer;
|
||||
|
||||
virtual ~ExpLayer();
|
||||
/**
|
||||
* @brief Negative slope is used to takle negative inputs instead of setting them to 0
|
||||
*/
|
||||
float negative_slope;
|
||||
};
|
||||
|
||||
class INFERENCE_ENGINE_API_CLASS(NegHalfLogLayer) : public CNNLayer {
|
||||
public:
|
||||
/**
|
||||
* @brief A default constructor. Creates a new ReLULayer instance and initializes layer parameters with the given values.
|
||||
* @param prms Initial layer parameters
|
||||
*/
|
||||
// explicit LogLayer(const LayerParams &prms) : CNNLayer(prms), negative_slope(0.0f) {}
|
||||
|
||||
using CNNLayer::CNNLayer;
|
||||
|
||||
virtual ~NegHalfLogLayer();
|
||||
|
||||
/**
|
||||
* @brief Negative slope is used to takle negative inputs instead of setting them to 0
|
||||
*/
|
||||
float negative_slope;
|
||||
};
|
||||
|
||||
class INFERENCE_ENGINE_API_CLASS(LogLayer) : public CNNLayer {
|
||||
public:
|
||||
/**
|
||||
* @brief A default constructor. Creates a new ReLULayer instance and initializes layer parameters with the given values.
|
||||
* @param prms Initial layer parameters
|
||||
*/
|
||||
// explicit LogLayer(const LayerParams &prms) : CNNLayer(prms), negative_slope(0.0f) {}
|
||||
|
||||
using CNNLayer::CNNLayer;
|
||||
|
||||
virtual ~LogLayer();
|
||||
|
||||
/**
|
||||
* @brief Negative slope is used to takle negative inputs instead of setting them to 0
|
||||
*/
|
||||
float negative_slope;
|
||||
};
|
||||
|
||||
} // namespace InferenceEngine
|
||||
|
||||
@@ -235,6 +235,7 @@ DECLARE_CONFIG_KEY(GPU_THROUGHPUT_STREAMS);
|
||||
* PluginConfigParams::YES or PluginConfigParams::NO
|
||||
*/
|
||||
DECLARE_CONFIG_KEY(PERF_COUNT);
|
||||
DECLARE_CONFIG_KEY(IDENTITY_SCALE_FACTOR);
|
||||
|
||||
/**
|
||||
* @brief The key defines dynamic limit of batch processing.
|
||||
|
||||
@@ -23,14 +23,14 @@ class BitMap : public Reader {
|
||||
private:
|
||||
static Register<BitMap> reg;
|
||||
|
||||
typedef struct {
|
||||
typedef struct BmpHeader{
|
||||
unsigned short type = 0u; /* Magic identifier */
|
||||
unsigned int size = 0u; /* File size in bytes */
|
||||
unsigned int reserved = 0u;
|
||||
unsigned int offset = 0u; /* Offset to image data, bytes */
|
||||
} BmpHeader;
|
||||
|
||||
typedef struct {
|
||||
typedef struct BmpInfoHeader{
|
||||
unsigned int size = 0u; /* Header size in bytes */
|
||||
int width = 0, height = 0; /* Width and height of image */
|
||||
unsigned short planes = 0u; /* Number of colour planes */
|
||||
|
||||
@@ -26,9 +26,13 @@ else()
|
||||
endif()
|
||||
|
||||
#saving rpath to GNA shared library be used by CI
|
||||
log_rpath_from_dir(GNA ${libGNA_LIBRARIES_BASE_PATH})
|
||||
#log_rpath_from_dir(GNA ${libGNA_LIBRARIES_BASE_PATH})
|
||||
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE inference_engine ${INTEL_ITT_LIBS} Threads::Threads libGNA)
|
||||
if(TARGET_OS STREQUAL "CHROMEOS")
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE inference_engine ${INTEL_ITT_LIBS} Threads::Threads gna)
|
||||
else()
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE inference_engine ${INTEL_ITT_LIBS} Threads::Threads libGNA)
|
||||
endif()
|
||||
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
target_compile_definitions(${TARGET_NAME}
|
||||
PRIVATE
|
||||
@@ -45,7 +49,11 @@ target_compile_definitions(${TARGET_NAME}_test_static
|
||||
GNA_LIB_VER=${GNA_LIBRARY_VERSION_NUMBER}
|
||||
INTEGER_LOW_P
|
||||
USE_STATIC_IE)
|
||||
target_link_libraries(${TARGET_NAME}_test_static PUBLIC inference_engine_preproc_s libGNA::API)
|
||||
if(TARGET_OS STREQUAL "CHROMEOS")
|
||||
target_link_libraries(${TARGET_NAME}_test_static PUBLIC inference_engine_preproc_s gna)
|
||||
else()
|
||||
target_link_libraries(${TARGET_NAME}_test_static PUBLIC inference_engine_preproc_s libGNA::API)
|
||||
endif()
|
||||
target_include_directories(${TARGET_NAME}_test_static PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
set_target_properties(${TARGET_NAME}_test_static PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}_test_static)
|
||||
|
||||
|
||||
@@ -309,6 +309,65 @@ void GNAPluginNS::backend::AMIntelDNN::InitPiecewiseLinearComponentPrivate(intel
|
||||
}
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::InitInterleaveComponentPrivate(intel_dnn_component_t &comp,
|
||||
uint32_t num_rows_in,
|
||||
uint32_t num_columns_in,
|
||||
uint32_t num_bytes_per_input,
|
||||
uint32_t num_bytes_per_output,
|
||||
float output_scale_factor,
|
||||
void *&ptr_inputs,
|
||||
void *&ptr_outputs,
|
||||
bool postInitMem) {
|
||||
comp.num_rows_in = num_rows_in;
|
||||
comp.num_columns_in = num_columns_in;
|
||||
comp.num_rows_out = num_columns_in;
|
||||
comp.num_columns_out = num_rows_in;
|
||||
comp.num_bytes_per_input = num_bytes_per_input;
|
||||
comp.num_bytes_per_output = num_bytes_per_output;
|
||||
comp.operation = kDnnInterleaveOp;
|
||||
comp.macro_operation = kDnnMacroOpNone;
|
||||
comp.orientation_in = kDnnNonInterleavedOrientation;
|
||||
comp.orientation_out = kDnnInterleavedOrientation;
|
||||
comp.output_scale_factor = output_scale_factor;
|
||||
comp.input_scale_factor = output_scale_factor;
|
||||
if (!postInitMem) {
|
||||
comp.ptr_inputs = ptr_inputs;
|
||||
comp.ptr_outputs = ptr_outputs;
|
||||
} else {
|
||||
ptr_inputs = &comp.ptr_inputs;
|
||||
ptr_outputs = &comp.ptr_outputs;
|
||||
}
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::InitDeinterleaveComponentPrivate(intel_dnn_component_t &comp,
|
||||
uint32_t num_rows_in,
|
||||
uint32_t num_columns_in,
|
||||
uint32_t num_bytes_per_input,
|
||||
uint32_t num_bytes_per_output,
|
||||
float output_scale_factor,
|
||||
void *&ptr_inputs,
|
||||
void *&ptr_outputs,
|
||||
bool postInitMem) {
|
||||
comp.num_rows_in = num_rows_in;
|
||||
comp.num_columns_in = num_columns_in;
|
||||
comp.num_rows_out = num_columns_in;
|
||||
comp.num_columns_out = num_rows_in;
|
||||
comp.num_bytes_per_input = num_bytes_per_input;
|
||||
comp.num_bytes_per_output = num_bytes_per_output;
|
||||
comp.operation = kDnnDeinterleaveOp;
|
||||
comp.macro_operation = kDnnMacroOpNone;
|
||||
comp.orientation_in = kDnnInterleavedOrientation;
|
||||
comp.orientation_out = kDnnInterleavedOrientation;
|
||||
comp.output_scale_factor = output_scale_factor;
|
||||
comp.input_scale_factor = output_scale_factor;
|
||||
if (!postInitMem) {
|
||||
comp.ptr_inputs = ptr_inputs;
|
||||
comp.ptr_outputs = ptr_outputs;
|
||||
} else {
|
||||
ptr_inputs = &comp.ptr_inputs;
|
||||
ptr_outputs = &comp.ptr_outputs;
|
||||
}
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::Propagate() {
|
||||
for (uint32_t i = 0; i < component.size(); i++) {
|
||||
|
||||
@@ -192,6 +192,46 @@ public:
|
||||
ptr_segments,
|
||||
true);
|
||||
}
|
||||
template<class A, class B>
|
||||
static void InitDeinterleaveComponent(intel_dnn_component_t &cmp,
|
||||
uint32_t num_rows_in,
|
||||
uint32_t num_columns_in,
|
||||
uint32_t num_bytes_per_input,
|
||||
uint32_t num_bytes_per_output,
|
||||
float output_scale_factor,
|
||||
A *&ptr_inputs,
|
||||
B *&ptr_outputs) {
|
||||
InitDeinterleaveComponentPrivate(cmp,
|
||||
num_rows_in,
|
||||
num_columns_in,
|
||||
num_bytes_per_input,
|
||||
num_bytes_per_output,
|
||||
output_scale_factor,
|
||||
(void *&) ptr_inputs,
|
||||
(void *&) ptr_outputs,
|
||||
true);
|
||||
}
|
||||
|
||||
template<class A, class B>
|
||||
static void InitInterleaveComponent(intel_dnn_component_t &cmp,
|
||||
uint32_t num_rows_in,
|
||||
uint32_t num_columns_in,
|
||||
uint32_t num_bytes_per_input,
|
||||
uint32_t num_bytes_per_output,
|
||||
float output_scale_factor,
|
||||
A *&ptr_inputs,
|
||||
B *&ptr_outputs) {
|
||||
InitInterleaveComponentPrivate(cmp,
|
||||
num_rows_in,
|
||||
num_columns_in,
|
||||
num_bytes_per_input,
|
||||
num_bytes_per_output,
|
||||
output_scale_factor,
|
||||
(void *&) ptr_inputs,
|
||||
(void *&) ptr_outputs,
|
||||
true);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<class A, class B>
|
||||
@@ -342,6 +382,26 @@ private:
|
||||
void *&ptr_outputs,
|
||||
intel_pwl_segment_t *ptr_segments,
|
||||
bool postInitMem);
|
||||
static void InitInterleaveComponentPrivate(intel_dnn_component_t &cmp,
|
||||
uint32_t num_rows_in,
|
||||
uint32_t num_columns_in,
|
||||
uint32_t num_bytes_per_input,
|
||||
uint32_t num_bytes_per_output,
|
||||
float output_scale_factor,
|
||||
void *&ptr_inputs,
|
||||
void *&ptr_outputs,
|
||||
bool postInitMem);
|
||||
|
||||
static void InitDeinterleaveComponentPrivate(intel_dnn_component_t &cmp,
|
||||
uint32_t num_rows_in,
|
||||
uint32_t num_columns_in,
|
||||
uint32_t num_bytes_per_input,
|
||||
uint32_t num_bytes_per_output,
|
||||
float output_scale_factor,
|
||||
void *&ptr_inputs,
|
||||
void *&ptr_outputs,
|
||||
bool postInitMem);
|
||||
|
||||
|
||||
static void InitConvolutional1DComponentPrivate(intel_dnn_component_t &comp,
|
||||
uint32_t num_rows_in,
|
||||
|
||||
@@ -19,6 +19,10 @@ enum DnnActivationType : uint8_t {
|
||||
kActIdentity,
|
||||
kActKaldiLstmClipping,
|
||||
kActCustom,
|
||||
kActExp,
|
||||
kActLog,
|
||||
kActNegHalfLog,
|
||||
kActDivByN,
|
||||
kActNumType
|
||||
};
|
||||
|
||||
@@ -47,7 +51,11 @@ static const char *intel_dnn_activation_name[kActNumType] = {
|
||||
"kActLeakyRelu",
|
||||
"kActIdentity",
|
||||
"kActKaldiLstmClipping",
|
||||
"kActCustom"
|
||||
"kActCustom",
|
||||
"kActExp",
|
||||
"kActLog",
|
||||
"kActNegHalfLog",
|
||||
"kActDivByN"
|
||||
};
|
||||
|
||||
typedef enum DnnSoftmaxType {
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#define PWL_FROM_FILE
|
||||
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
|
||||
@@ -16,9 +18,13 @@ void make_gna_pwl(const DnnActivation fun,
|
||||
const double u_bound,
|
||||
const double in_scale,
|
||||
const double out_scale,
|
||||
std::vector<intel_pwl_segment_t> &gna_pwl) {
|
||||
std::vector<intel_pwl_segment_t> &gna_pwl,
|
||||
const uint32_t n) {
|
||||
pwl_gna_slope_scale_t s;
|
||||
uint32_t pwl_size = static_cast<int32_t>(pwl.size());
|
||||
gnalog() << "make_gna_pwl\n";
|
||||
gnalog() << " in_scale " << in_scale << "\n";
|
||||
gnalog() << " out_scale " << out_scale << "\n";
|
||||
switch (fun) {
|
||||
case kActSigmoid:
|
||||
case kActTanh: {
|
||||
@@ -46,7 +52,7 @@ void make_gna_pwl(const DnnActivation fun,
|
||||
gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
|
||||
gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index;
|
||||
|
||||
gnalog() << (gna_pwl[1].xBase/in_scale)
|
||||
gnalog() << ((int32_t)(gna_pwl[1].xBase & XBASEMASK) / in_scale)
|
||||
<< " " << (gna_pwl[1].yBase) / out_scale
|
||||
<< " " << pwl[0].m
|
||||
<< "\n";
|
||||
@@ -75,6 +81,130 @@ void make_gna_pwl(const DnnActivation fun,
|
||||
<< "\n";
|
||||
break;
|
||||
}
|
||||
case kActExp: {
|
||||
auto n_segments = static_cast<int32_t> (pwl_size) + 1;
|
||||
gna_pwl.resize(n_segments);
|
||||
// insert extra segment for x values < l_bound
|
||||
gna_pwl[0].xBase = static_cast<int32_t> (INT32_MIN & XBASEMASK); // zero out the 2 lsb
|
||||
gnalog() << "=========================== Exp Segments ===========================\n";
|
||||
gna_pwl[0].yBase = gna_pwl[1].yBase = 0;
|
||||
gna_pwl[1].xBase = (static_cast<int32_t> (in_scale * (-pwl[0].b / pwl[0].m))) & XBASEMASK;
|
||||
gna_pwl[0].slope = 0;
|
||||
|
||||
gnalog() << (gna_pwl[0].xBase) / in_scale
|
||||
<< " " << (gna_pwl[0].yBase) / out_scale
|
||||
<< " " << 0.0
|
||||
<< "\n";
|
||||
|
||||
s = gna_slope(pwl[0].m, in_scale, out_scale);
|
||||
gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
|
||||
gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index;
|
||||
|
||||
gnalog() << (gna_pwl[1].xBase / in_scale)
|
||||
<< " " << (gna_pwl[1].yBase) / out_scale
|
||||
<< " " << pwl[0].m
|
||||
<< "\n";
|
||||
|
||||
for (uint32_t i = 1; i < pwl_size - 1; ++i) {
|
||||
s = gna_slope(pwl[i].m, in_scale, out_scale);
|
||||
gna_pwl[i + 1].xBase = (static_cast<int32_t> (in_scale * pwl[i].alpha)) & XBASEMASK;
|
||||
gna_pwl[i + 1].yBase = FLOAT_TO_INT16(pwl[i].beta * out_scale);
|
||||
gna_pwl[i + 1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
|
||||
gna_pwl[i + 1].xBase = gna_pwl[i + 1].xBase | s.slope_scale_index;
|
||||
|
||||
gnalog() << (pwl[i].alpha)
|
||||
<< " " << pwl[i].beta
|
||||
<< " " << pwl[i].m
|
||||
<< "\n";
|
||||
}
|
||||
// insert extra segment for xvalues > u_bound
|
||||
gna_pwl[n_segments - 1].xBase =
|
||||
((uint32_t)(in_scale * (INT16_MAX/out_scale - pwl[pwl_size - 2].b) / pwl[pwl_size - 2].m)) & XBASEMASK;
|
||||
gna_pwl[n_segments - 1].yBase = INT16_MAX;
|
||||
gna_pwl[n_segments - 1].slope = 0;
|
||||
|
||||
gnalog() << (gna_pwl[n_segments - 1].xBase / in_scale)
|
||||
<< " " << 1.0
|
||||
<< " " << 0.0
|
||||
<< "\n";
|
||||
break;
|
||||
}
|
||||
case kActLog: {
|
||||
auto n_segments = static_cast<int32_t> (pwl_size);
|
||||
gna_pwl.resize(n_segments);
|
||||
// insert extra segment for x values < l_bound
|
||||
gna_pwl[0].xBase = static_cast<int32_t> (INT32_MIN & XBASEMASK); // zero out the 2 lsb
|
||||
gnalog() << "=========================== Exp Segments ===========================\n";
|
||||
gna_pwl[0].yBase = gna_pwl[1].yBase = INT16_MIN;
|
||||
gna_pwl[1].xBase = (static_cast<int32_t> (1 + ~XBASEMASK)); // smallest representable value
|
||||
gna_pwl[0].slope = 0;
|
||||
|
||||
gnalog() << (gna_pwl[0].xBase) / in_scale
|
||||
<< " " << (gna_pwl[0].yBase) / out_scale
|
||||
<< " " << 0.0
|
||||
<< "\n";
|
||||
|
||||
s = gna_slope(pwl[0].m, in_scale, out_scale);
|
||||
gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
|
||||
gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index;
|
||||
|
||||
gnalog() << (gna_pwl[1].xBase / in_scale)
|
||||
<< " " << (gna_pwl[1].yBase) / out_scale
|
||||
<< " " << pwl[0].m
|
||||
<< "\n";
|
||||
|
||||
for (uint32_t i = 1; i < pwl_size - 1; ++i) {
|
||||
s = gna_slope(pwl[i].m, in_scale, out_scale);
|
||||
gna_pwl[i + 1].xBase = (static_cast<int32_t> (in_scale * pwl[i].alpha)) & XBASEMASK;
|
||||
gna_pwl[i + 1].yBase = FLOAT_TO_INT16(pwl[i].beta * out_scale);
|
||||
gna_pwl[i + 1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
|
||||
gna_pwl[i + 1].xBase = gna_pwl[i + 1].xBase | s.slope_scale_index;
|
||||
|
||||
gnalog() << (pwl[i].alpha)
|
||||
<< " " << pwl[i].beta
|
||||
<< " " << pwl[i].m
|
||||
<< "\n";
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kActNegHalfLog: {
|
||||
auto n_segments = static_cast<int32_t> (pwl_size);
|
||||
gna_pwl.resize(n_segments);
|
||||
// insert extra segment for x values < l_bound
|
||||
gna_pwl[0].xBase = static_cast<int32_t> (INT32_MIN & XBASEMASK); // zero out the 2 lsb
|
||||
gnalog() << "=========================== NegHalfLog Segments ===========================\n";
|
||||
gna_pwl[0].yBase = gna_pwl[1].yBase = INT16_MAX;
|
||||
gna_pwl[1].xBase = (static_cast<int32_t> (1 + ~XBASEMASK)); // smallest representable value
|
||||
gna_pwl[0].slope = 0;
|
||||
|
||||
gnalog() << gna_pwl[0].xBase / in_scale
|
||||
<< " " << (gna_pwl[0].yBase) / out_scale
|
||||
<< " " << 0.0
|
||||
<< "\n";
|
||||
|
||||
s = gna_slope(pwl[0].m, in_scale, out_scale);
|
||||
gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
|
||||
gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index;
|
||||
|
||||
gnalog() << ((gna_pwl[1].xBase & XBASEMASK) / in_scale)
|
||||
<< " " << (gna_pwl[1].yBase) / out_scale
|
||||
<< " " << pwl[0].m
|
||||
<< "\n";
|
||||
|
||||
for (uint32_t i = 1; i < pwl_size - 1; ++i) {
|
||||
s = gna_slope(pwl[i].m, in_scale, out_scale);
|
||||
gna_pwl[i + 1].xBase = (static_cast<int32_t> (in_scale * pwl[i].alpha)) & XBASEMASK;
|
||||
gna_pwl[i + 1].yBase = FLOAT_TO_INT16(pwl[i].beta * out_scale);
|
||||
gna_pwl[i + 1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
|
||||
gna_pwl[i + 1].xBase = gna_pwl[i + 1].xBase | s.slope_scale_index;
|
||||
|
||||
gnalog() << (pwl[i].alpha)
|
||||
<< " " << pwl[i].beta
|
||||
<< " " << pwl[i].m
|
||||
<< "\n";
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kActRelu:
|
||||
case kActLeakyRelu: {
|
||||
auto n_segments = 2;
|
||||
@@ -106,7 +236,8 @@ void make_gna_pwl(const DnnActivation fun,
|
||||
break;
|
||||
}
|
||||
case kActIdentity:
|
||||
case kActKaldiLstmClipping: {
|
||||
case kActKaldiLstmClipping:
|
||||
case kActDivByN: {
|
||||
int32_t x_lower = INT32_MIN;
|
||||
int32_t x_upper = INT32_MAX;
|
||||
int16_t y_lower = INT16_MIN;
|
||||
@@ -130,14 +261,20 @@ void make_gna_pwl(const DnnActivation fun,
|
||||
x_upper = FLOAT_TO_INT32(y_upper * in_scale / out_scale);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
} else if (fun == kActIdentity) {
|
||||
gnalog() << "=========================== Identity Segments ===========================\n";
|
||||
if (x_lower < y_lower * in_scale / out_scale) x_lower = FLOAT_TO_INT32(y_lower * in_scale / out_scale);
|
||||
if (x_upper > y_upper * in_scale / out_scale) x_upper = FLOAT_TO_INT32(y_upper * in_scale / out_scale);
|
||||
if (y_lower < x_lower * out_scale / in_scale) y_lower = FLOAT_TO_INT16(x_lower * out_scale / in_scale);
|
||||
if (y_upper > x_upper * out_scale / in_scale) y_upper = FLOAT_TO_INT16(x_upper * out_scale / in_scale);
|
||||
} else {
|
||||
gnalog() << "=========================== DivByN Segments ===========================\n";
|
||||
if (x_lower < y_lower * (float)n * in_scale / out_scale) x_lower = FLOAT_TO_INT32(y_lower * (float)n * in_scale / out_scale);
|
||||
if (x_upper > y_upper * (float)n * in_scale / out_scale) x_upper = FLOAT_TO_INT32(y_upper * (float)n * in_scale / out_scale);
|
||||
if (y_lower < x_lower * (1.0 / n) * out_scale / in_scale) y_lower = FLOAT_TO_INT16(x_lower * (1.0 / n) * out_scale / in_scale);
|
||||
if (y_upper > x_upper* (1.0 / n) * out_scale / in_scale) y_upper = FLOAT_TO_INT16(x_upper * (1.0 / n) * out_scale / in_scale);
|
||||
}
|
||||
gna_pwl.resize(n_segments);
|
||||
gna_pwl.resize(n_segments);
|
||||
gna_pwl[0].xBase = INT32_MIN & XBASEMASK; // zero out the 2 lsb
|
||||
gna_pwl[0].yBase = y_lower;
|
||||
gna_pwl[0].slope = 0;
|
||||
@@ -148,13 +285,19 @@ void make_gna_pwl(const DnnActivation fun,
|
||||
|
||||
gna_pwl[1].xBase = x_lower & XBASEMASK; // zero out the 2 lsb
|
||||
gna_pwl[1].yBase = y_lower;
|
||||
s = gna_slope(1.0, in_scale, out_scale);
|
||||
if (fun == kActDivByN) {
|
||||
s = gna_slope(1.0 / n, in_scale, out_scale);
|
||||
} else {
|
||||
s = gna_slope(1.0, in_scale, out_scale);
|
||||
}
|
||||
gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
|
||||
gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index;
|
||||
gnalog() << gna_pwl[1].xBase / in_scale
|
||||
<< " " << gna_pwl[1].yBase / out_scale
|
||||
<< " " << 1.0
|
||||
<< "\n";
|
||||
int32_t round_scale = FLOAT_TO_INT16(0.5f / s.slope) & XBASEMASK;
|
||||
gna_pwl[1].xBase = (gna_pwl[1].xBase - round_scale) | s.slope_scale_index;
|
||||
gnalog() << (int32_t)(gna_pwl[1].xBase & XBASEMASK) / in_scale
|
||||
<< " " << gna_pwl[1].yBase / out_scale
|
||||
<< " " << 1.0
|
||||
<< "\n";
|
||||
|
||||
if (INT32_MAX > x_upper) { // need a right segment
|
||||
gna_pwl.push_back({
|
||||
@@ -162,10 +305,10 @@ void make_gna_pwl(const DnnActivation fun,
|
||||
y_upper,
|
||||
0 });
|
||||
|
||||
gnalog() << gna_pwl[n_segments].xBase / in_scale
|
||||
<< " " << gna_pwl[n_segments].yBase / out_scale
|
||||
<< " " << 0
|
||||
<< "\n";
|
||||
gnalog() << (x_upper & XBASEMASK) / in_scale
|
||||
<< " " << gna_pwl[n_segments].yBase / out_scale
|
||||
<< " " << 0
|
||||
<< "\n";
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -14,4 +14,5 @@ void make_gna_pwl(const DnnActivation fun,
|
||||
const double u_bound,
|
||||
const double in_scale,
|
||||
const double out_scale,
|
||||
std::vector<intel_pwl_segment_t> &gna_pwl);
|
||||
std::vector<intel_pwl_segment_t> &gna_pwl,
|
||||
const uint32_t n);
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
#include <iostream>
|
||||
#include <details/ie_exception.hpp>
|
||||
#include "quantization.h"
|
||||
#include <xmmintrin.h>
|
||||
#include <smmintrin.h>
|
||||
|
||||
void QuantizeAffine16(float *ptr_float_weights,
|
||||
float *ptr_float_biases,
|
||||
@@ -20,7 +22,7 @@ void QuantizeAffine16(float *ptr_float_weights,
|
||||
uint32_t num_columns_padded) {
|
||||
uint32_t num_saturate = 0;
|
||||
|
||||
if (*ptr_weight_scale_factor == 1.0) {
|
||||
/* if (*ptr_weight_scale_factor == 1.0) {
|
||||
// scale factor for weights is not calculated yet
|
||||
float mean_weight = 0.0;
|
||||
float mean_weight_squared = 0.0;
|
||||
@@ -48,7 +50,7 @@ void QuantizeAffine16(float *ptr_float_weights,
|
||||
*ptr_weight_scale_factor = static_cast<float>(MAX_VAL_2B_WEIGHT) / max_weight;
|
||||
}
|
||||
*ptr_output_scale_factor = input_scale_factor * *ptr_weight_scale_factor;
|
||||
}
|
||||
}*/
|
||||
|
||||
for (uint32_t row = 0; row < num_rows; row++) {
|
||||
for (uint32_t col = 0; col < num_columns; col++) {
|
||||
@@ -104,26 +106,156 @@ void QuantizeAffine16(float *ptr_float_weights,
|
||||
}
|
||||
}
|
||||
|
||||
float ScaleFactorForQuantization(void *ptr_float_memory, float target_max, size_t num_elements) {
|
||||
float *ptr_float_feat = reinterpret_cast<float *>(ptr_float_memory);
|
||||
float max = 0.0;
|
||||
__attribute__ ((target ("default")))
|
||||
float ScaleFactorForQuantization(void *ptr_float_memory, float target_max, size_t num_elements)
|
||||
{
|
||||
float *ptr_float_feat = (float*)ptr_float_memory;
|
||||
float min = 0.0;
|
||||
float buf[4];
|
||||
float scale_factor;
|
||||
float abs_f;
|
||||
__m128 zero = _mm_setzero_ps();
|
||||
__m128 total_abs = _mm_setzero_ps();
|
||||
char not_aligned_buffer[32];
|
||||
__m128 v, neg_v, abs;
|
||||
__m128 v2, neg_v2, abs2;
|
||||
|
||||
for (size_t i = 0; i < num_elements; i++) {
|
||||
if (fabs(ptr_float_feat[i]) > max) {
|
||||
max = fabs(ptr_float_feat[i]);
|
||||
|
||||
uint32_t moves = num_elements >>3;
|
||||
uint32_t mod = num_elements % 8;
|
||||
uint32_t i;
|
||||
|
||||
v = _mm_load_ps(ptr_float_feat);
|
||||
neg_v = _mm_sub_ps(zero, v);
|
||||
abs = _mm_max_ps(v, neg_v);
|
||||
total_abs = _mm_max_ps(total_abs, abs);
|
||||
|
||||
for (i = 0; i<moves; i++, ptr_float_feat +=8)
|
||||
{
|
||||
v = _mm_load_ps(ptr_float_feat);
|
||||
v2 = _mm_load_ps(ptr_float_feat+4);
|
||||
neg_v = _mm_sub_ps(zero, v);
|
||||
abs = _mm_max_ps(v, neg_v);
|
||||
neg_v2 = _mm_sub_ps(zero, v2);
|
||||
abs2 = _mm_max_ps(v2, neg_v2);
|
||||
total_abs = _mm_min_ps(total_abs, abs);
|
||||
total_abs = _mm_min_ps(total_abs, abs2);
|
||||
}
|
||||
_mm_storeu_ps(buf, total_abs);
|
||||
float single_min_lo = buf[0] < buf[1] ? buf[0] : buf[1];
|
||||
float single_min_hi = buf[2] < buf[3] ? buf[2] : buf[3];
|
||||
float single_min = single_min_lo < single_min_hi ? single_min_lo : single_min_hi;
|
||||
|
||||
for (i = 0; i < mod; i++)
|
||||
{
|
||||
abs_f = fabs(ptr_float_feat[i]);
|
||||
if (abs_f < min) {
|
||||
min = abs_f;
|
||||
}
|
||||
}
|
||||
|
||||
if (max == 0) {
|
||||
scale_factor = -1.0f; // need to handle all zeros as a special case
|
||||
} else {
|
||||
scale_factor = target_max / max;
|
||||
return(single_min != 0 ? (single_min < 1.0 ? 1 / single_min : 1.0f) : 1.0f);
|
||||
}
|
||||
|
||||
float accessmember(__m128 v, int index)
|
||||
{
|
||||
union vec{ __m128 sse;
|
||||
float f[4];
|
||||
};
|
||||
|
||||
vec U;
|
||||
U.sse = v;
|
||||
return U.f[index];
|
||||
}
|
||||
|
||||
__attribute__ ((target ("default")))
|
||||
void QuantizeBias8(float *ptr_float_biases, intel_compound_bias_t *ptr_int_biases, float *ptr_output_scale_factor, uint32_t num_rows)
|
||||
{
|
||||
float *ptr_float_feat = (float*)ptr_float_biases;
|
||||
intel_compound_bias_t *ptr_int = (intel_compound_bias_t*)ptr_int_biases;
|
||||
|
||||
uint32_t moves = num_rows / 4;
|
||||
uint32_t mod = num_rows % 4;
|
||||
uint32_t i, j;
|
||||
|
||||
__m128 v, zero, half, neg_half, scale_factores, mask, rounding_values, min, max, values;
|
||||
|
||||
#ifdef ROUND_AND_CAST
|
||||
__m128i tmp;
|
||||
#endif
|
||||
|
||||
zero = _mm_setzero_ps();
|
||||
half = _mm_set1_ps(0.5f);
|
||||
neg_half = _mm_set1_ps(-0.5f);
|
||||
max = _mm_set1_ps(2147483647.0f);
|
||||
min = _mm_set1_ps(-2147483648.0f);
|
||||
|
||||
scale_factores = _mm_set1_ps(*ptr_output_scale_factor);
|
||||
|
||||
for (i = 0; i < moves; i++, ptr_float_feat += 4, ptr_int += 4) {
|
||||
|
||||
v = _mm_load_ps(ptr_float_feat);
|
||||
|
||||
//rounding_values = (v>0) ? 0.5f : -0.5f;
|
||||
mask = _mm_min_ps(v, zero);
|
||||
rounding_values = _mm_blendv_ps(half, neg_half, mask);
|
||||
|
||||
// values = v * scale_factores + rounding_values
|
||||
values = _mm_mul_ps(v, scale_factores);
|
||||
values = _mm_add_ps(values, rounding_values);
|
||||
|
||||
// shrink to <-2147483648.0f, 2147483647.0f>
|
||||
values = _mm_min_ps(values, max);
|
||||
values = _mm_max_ps(values, min);
|
||||
|
||||
#ifdef ROUND_AND_CAST
|
||||
// round and cast float to int16 ... much faster than "only cast" in MS compiler ??
|
||||
tmp = _mm_cvtps_epi32(values);
|
||||
ptr_int[0].bias = tmp.m128i_i32[0];
|
||||
ptr_int[1].bias = tmp.m128i_i32[1];
|
||||
ptr_int[2].bias = tmp.m128i_i32[2];
|
||||
ptr_int[3].bias = tmp.m128i_i32[3];
|
||||
#else
|
||||
// only cast float to int16
|
||||
for (j = 0; j < 4; j++)
|
||||
ptr_int[j].bias = (int32_t)accessmember(values, j);
|
||||
#endif
|
||||
}
|
||||
|
||||
return (scale_factor);
|
||||
for (i = 0; i < mod; i++) {
|
||||
float rounding_value = (ptr_float_feat[i]>0) ? 0.5f : -0.5f;
|
||||
float value = ptr_float_feat[i] * *ptr_output_scale_factor + rounding_value;
|
||||
if (value > 2147483647.0) {
|
||||
ptr_int[i].bias = 2147483647L;
|
||||
}
|
||||
else if (value < -2147483648.0) {
|
||||
ptr_int[i].bias = -2147483648LL;
|
||||
}
|
||||
else {
|
||||
ptr_int[i].bias = (int32_t)value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*__attribute__ ((target ("default")))
|
||||
void QuantizeBias8(float *ptr_float_biases, intel_compound_bias_t *ptr_int_biases, float *ptr_output_scale_factor, uint32_t num_rows)
|
||||
{
|
||||
uint32_t num_saturate = 0;
|
||||
for (uint32_t j = 0; j < num_rows; j++) {
|
||||
float rounding_value = (ptr_float_biases[j] > 0) ? 0.5f : -0.5f;
|
||||
float value = ptr_float_biases[j] * *ptr_output_scale_factor + rounding_value;
|
||||
if (value > 2147483647.0) {
|
||||
ptr_int_biases[j].bias = 2147483647L;
|
||||
num_saturate++;
|
||||
} else if (value < -2147483648.0) {
|
||||
ptr_int_biases[j].bias = -2147483648LL;
|
||||
num_saturate++;
|
||||
} else {
|
||||
ptr_int_biases[j].bias = (int32_t) value;
|
||||
}
|
||||
}
|
||||
}*/
|
||||
|
||||
void QuantizeVector16(float *ptr_float_memory, int16_t *ptr_int_memory, uint32_t num_elements, float scale_factor) {
|
||||
float *ptr_float_feat = reinterpret_cast<float *>(ptr_float_memory);
|
||||
uint32_t num_saturate = 0;
|
||||
@@ -158,7 +290,7 @@ void QuantizeAffine8(float *ptr_float_weights, float *ptr_float_biases,
|
||||
}
|
||||
uint32_t num_saturate = 0;
|
||||
|
||||
if (*ptr_weight_scale_factor == 1.0) {
|
||||
/*if (*ptr_weight_scale_factor == 1.0) {
|
||||
// scale factor for weights is not calculated yet
|
||||
float mean_weight = 0.0;
|
||||
float mean_weight_squared = 0.0;
|
||||
@@ -191,7 +323,7 @@ void QuantizeAffine8(float *ptr_float_weights, float *ptr_float_biases,
|
||||
// 4. quantize and store scaled row
|
||||
*ptr_weight_scale_factor = MAX_OUT_MULTIPLIER * *ptr_weight_scale_factor; // increase dynamic range by max multiplier
|
||||
*ptr_output_scale_factor = input_scale_factor * *ptr_weight_scale_factor;
|
||||
}
|
||||
}*/
|
||||
float valueAcc = 0.0;
|
||||
for (uint32_t row = 0; row < num_rows; row++) {
|
||||
float scaled_row_max = 0;
|
||||
@@ -237,19 +369,7 @@ void QuantizeAffine8(float *ptr_float_weights, float *ptr_float_biases,
|
||||
|
||||
// bias value of the bas will be only used when input bias provided
|
||||
if (ptr_float_biases != nullptr) {
|
||||
for (uint32_t j = 0; j < num_rows; j++) {
|
||||
float rounding_value = (ptr_float_biases[j] > 0) ? 0.5f : -0.5f;
|
||||
float value = ptr_float_biases[j] * *ptr_output_scale_factor + rounding_value;
|
||||
if (value > 2147483647.0) {
|
||||
ptr_int_biases[j].bias = 2147483647L;
|
||||
num_saturate++;
|
||||
} else if (value < -2147483648.0) {
|
||||
ptr_int_biases[j].bias = -2147483648LL;
|
||||
num_saturate++;
|
||||
} else {
|
||||
ptr_int_biases[j].bias = (int32_t) value;
|
||||
}
|
||||
}
|
||||
QuantizeBias8(ptr_float_biases, ptr_int_biases, ptr_output_scale_factor, num_rows);
|
||||
}
|
||||
|
||||
if (num_saturate > 0) {
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include "gna_slope_scale.h"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
extern float identity_SF;
|
||||
namespace frontend {
|
||||
struct ScaleFactorUpdateResult {
|
||||
InferenceEngine::CNNLayer *restartLayer = nullptr;
|
||||
@@ -53,7 +54,7 @@ template<>
|
||||
class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
|
||||
private :
|
||||
const float activation_scale_factor = 2048.f;
|
||||
const float identity_scale_factor = 2049.0f;
|
||||
const float identity_scale_factor = identity_SF;
|
||||
const float k = 5;
|
||||
const float k_identity = 6;
|
||||
|
||||
@@ -472,7 +473,7 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
|
||||
quant->_weights_quant.scale = 1.0f;
|
||||
}
|
||||
|
||||
if (wl->_biases) {
|
||||
/*if (wl->_biases) {
|
||||
quant->_bias_quant.scale = ScaleFactorForQuantization(wl->_biases->buffer().as<float *>(),
|
||||
MAX_VAL_4B_BIAS,
|
||||
wl->_biases->size());
|
||||
@@ -480,7 +481,7 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
|
||||
quant->_bias_quant.scale = std::min(quant->_weights_quant.scale * quant->_src_quant.scale, quant->_bias_quant.scale);
|
||||
quant->_weights_quant.scale = quant->_bias_quant.scale / quant->_src_quant.scale;
|
||||
}
|
||||
}
|
||||
}*/
|
||||
|
||||
// TODO: findout why ???
|
||||
if (weightsSize == 1) {
|
||||
@@ -552,7 +553,7 @@ class ScaleFactorPerLayer<InferenceEngine::ConvolutionLayer*> : public ScaleFact
|
||||
*/
|
||||
class ScaleFactorCalculator {
|
||||
using Cnt = std::vector<InferenceEngine::CNNLayerPtr>;
|
||||
Cnt net;
|
||||
Cnt net;
|
||||
mutable Cnt::const_iterator idx;
|
||||
mutable bool needRestart = false;
|
||||
int weightsBytesSize;
|
||||
|
||||
@@ -59,8 +59,10 @@ void ExportLdForNoMmu(uint32_t modelId, std::ostream & outStream) {
|
||||
|
||||
status = Gna2ModelExportConfigSetSource(exportConfig, 0, modelId);
|
||||
GNADeviceHelper::checkGna2Status(status);
|
||||
#if GNA_LIB_VER != 2 && GNA_LIB_VER != 1
|
||||
status = Gna2ModelExportConfigSetTarget(exportConfig, Gna2DeviceVersionEmbedded3_0);
|
||||
GNADeviceHelper::checkGna2Status(status);
|
||||
#endif
|
||||
|
||||
void * ldNoMmu;
|
||||
uint32_t ldNoMmuSize;
|
||||
|
||||
@@ -24,7 +24,10 @@
|
||||
#include "details/ie_exception.hpp"
|
||||
#include "gna_plugin_log.hpp"
|
||||
|
||||
std::mutex GNADeviceHelper::acrossPluginsSync{};
|
||||
|
||||
uint8_t* GNADeviceHelper::alloc(uint32_t size_requested, uint32_t *size_granted) {
|
||||
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
|
||||
void * memPtr;
|
||||
#if GNA_LIB_VER == 1
|
||||
memPtr = GNAAlloc(nGNAHandle, size_requested, size_granted);
|
||||
@@ -41,6 +44,7 @@ uint8_t* GNADeviceHelper::alloc(uint32_t size_requested, uint32_t *size_granted)
|
||||
}
|
||||
|
||||
void GNADeviceHelper::free(void * ptr) {
|
||||
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
|
||||
#if GNA_LIB_VER == 1
|
||||
GNAFree(nGNAHandle);
|
||||
#else
|
||||
@@ -53,6 +57,7 @@ void GNADeviceHelper::free(void * ptr) {
|
||||
uint32_t GNADeviceHelper::propagate(const intel_nnet_type_t *pNeuralNetwork,
|
||||
const uint32_t *pActiveIndices,
|
||||
uint32_t nActiveIndices) {
|
||||
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
|
||||
uint32_t reqId;
|
||||
|
||||
nGNAStatus = GNAPropagateForward(nGNAHandle, pNeuralNetwork,
|
||||
@@ -62,14 +67,17 @@ uint32_t GNADeviceHelper::propagate(const intel_nnet_type_t *pNeuralNetwork,
|
||||
}
|
||||
#else
|
||||
void GNADeviceHelper::setUpActiveList(const uint32_t requestConfigId, uint32_t layerIndex, uint32_t* ptr_active_indices, uint32_t num_active_indices) {
|
||||
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
|
||||
const auto status = Gna2RequestConfigEnableActiveList(requestConfigId, layerIndex, num_active_indices, ptr_active_indices);
|
||||
checkGna2Status(status);
|
||||
}
|
||||
void GNADeviceHelper::propagateSync(const uint32_t requestConfigId) {
|
||||
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
|
||||
wait(propagate(requestConfigId));
|
||||
}
|
||||
|
||||
uint32_t GNADeviceHelper::propagate(const uint32_t requestConfigId) {
|
||||
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
|
||||
uint32_t reqId;
|
||||
const auto status = Gna2RequestEnqueue(requestConfigId, &reqId);
|
||||
checkGna2Status(status);
|
||||
@@ -77,6 +85,7 @@ uint32_t GNADeviceHelper::propagate(const uint32_t requestConfigId) {
|
||||
}
|
||||
|
||||
uint32_t GNADeviceHelper::createModel(const Gna2Model& gnaModel) const {
|
||||
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
|
||||
uint32_t modelId;
|
||||
const auto status = Gna2ModelCreate(nGnaDeviceIndex, &gnaModel, &modelId);
|
||||
checkGna2Status(status);
|
||||
@@ -84,11 +93,13 @@ uint32_t GNADeviceHelper::createModel(const Gna2Model& gnaModel) const {
|
||||
}
|
||||
|
||||
void GNADeviceHelper::releseModel(const uint32_t model_id) {
|
||||
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
|
||||
const auto status = Gna2ModelRelease(model_id);
|
||||
checkGna2Status(status);
|
||||
}
|
||||
|
||||
uint32_t GNADeviceHelper::createRequestConfig(const uint32_t model_id) {
|
||||
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
|
||||
uint32_t reqConfId;
|
||||
auto status = Gna2RequestConfigCreate(model_id, &reqConfId);
|
||||
checkGna2Status(status);
|
||||
@@ -121,6 +132,7 @@ void GNADeviceHelper::checkGna2Status(Gna2Status status) {
|
||||
#endif
|
||||
|
||||
void GNADeviceHelper::wait(uint32_t reqId) {
|
||||
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
|
||||
#if GNA_LIB_VER == 2
|
||||
const auto status = Gna2RequestWait(reqId, GNA_TIMEOUT);
|
||||
checkGna2Status(status);
|
||||
@@ -220,9 +232,11 @@ void GNADeviceHelper::open(uint8_t n_threads) {
|
||||
|
||||
void GNADeviceHelper::close() {
|
||||
#if GNA_LIB_VER == 1
|
||||
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
|
||||
GNADeviceClose(nGNAHandle);
|
||||
nGNAHandle = 0;
|
||||
#else
|
||||
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
|
||||
const auto status = Gna2DeviceClose(nGnaDeviceIndex);
|
||||
checkGna2Status(status);
|
||||
#endif
|
||||
|
||||
@@ -30,7 +30,10 @@
|
||||
/**
|
||||
* holds gna - style handle in RAII way
|
||||
*/
|
||||
typedef uint32_t gna_device_id; // TODO:3:API redesign: remove and use uint32_t instead.
|
||||
|
||||
class GNADeviceHelper {
|
||||
static std::mutex acrossPluginsSync;
|
||||
#if GNA_LIB_VER == 1
|
||||
intel_gna_status_t nGNAStatus = GNA_NOERROR;
|
||||
intel_gna_handle_t nGNAHandle = 0;
|
||||
@@ -159,6 +162,7 @@ public:
|
||||
void setOMPThreads(uint8_t const n_threads);
|
||||
|
||||
void initGnaPerfCounters() {
|
||||
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
|
||||
#if GNA_LIB_VER == 1
|
||||
nGNAPerfResults = {{0, 0, 0, 0, 0, 0, 0}, {0, 0}, {0, 0, 0}, {0, 0}};
|
||||
nGNAPerfResultsTotal = {{0, 0, 0, 0, 0, 0, 0}, {0, 0}, {0, 0, 0}, {0, 0}};
|
||||
|
||||
@@ -803,6 +803,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
|
||||
uint32_t num_rows_in = FROM_IR_DIM(inputs, 1);
|
||||
uint32_t num_columns_in = FROM_IR_DIM(inputs, 2);
|
||||
uint32_t num_rows_out = isDiag ? num_rows_in : FROM_IR_DIM(outputs, 1);
|
||||
|
||||
uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
|
||||
uint32_t num_padding_out = isDiag ? num_padding : 0;
|
||||
|
||||
@@ -1190,6 +1191,7 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
num_rows = FROM_IR_DIM(inputs, 1);
|
||||
}
|
||||
|
||||
uint32_t non_batch_dim = (orientation == kDnnNonInterleavedOrientation) ? num_columns : num_rows;
|
||||
size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()))
|
||||
* outputs->getPrecision().size();
|
||||
|
||||
@@ -1198,6 +1200,10 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
|
||||
static InferenceEngine::details::caseless_unordered_map<std::string, DnnActivationType> supportedActivations = {
|
||||
{"sigmoid", kActSigmoid},
|
||||
{"divbyn", kActDivByN},
|
||||
{"log", kActLog},
|
||||
{"neghalflog", kActNegHalfLog},
|
||||
{"exp", kActExp},
|
||||
{"tanh", kActTanh},
|
||||
{"relu", kActRelu},
|
||||
{"leakyrelu", kActLeakyRelu},
|
||||
@@ -1262,12 +1268,14 @@ case name:\
|
||||
&*ptr_pwl_segments.begin(),
|
||||
static_cast<uint32_t>(ptr_pwl_segments.size()),
|
||||
input_pwl_scale_factor,
|
||||
output_pwl_scale_factor);
|
||||
output_pwl_scale_factor,
|
||||
non_batch_dim);
|
||||
} else {
|
||||
PwlDesignOpt16(activation_type,
|
||||
ptr_pwl_segments,
|
||||
input_pwl_scale_factor,
|
||||
output_pwl_scale_factor);
|
||||
output_pwl_scale_factor,
|
||||
non_batch_dim);
|
||||
}
|
||||
ptr_pwl_segments_target = reinterpret_cast<intel_pwl_segment_t*>(&ptr_pwl_segments_target);
|
||||
}
|
||||
@@ -1298,30 +1306,85 @@ case name:\
|
||||
}
|
||||
|
||||
void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
static int count = 0;
|
||||
count++;
|
||||
if (LayerInfo(layer).isTrivialPermute()) {
|
||||
return;
|
||||
}
|
||||
auto layerOrder = layer->GetParamAsInts("order");
|
||||
|
||||
string dimMessage;
|
||||
if (layerOrder == vector<int>({0, 3, 2, 1})) {
|
||||
return; // supported case
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
||||
if (layer->insData.empty()) {
|
||||
THROW_GNA_EXCEPTION << "Input layer pointer is unexpectedly absent";
|
||||
}
|
||||
auto inputs = layer->insData.begin()->lock();
|
||||
auto inputsOrder = inputs->getTensorDesc().getDims();
|
||||
auto outputs = layer->outData.front();
|
||||
// squeeze order vector
|
||||
SizeVector squeezedInputOrder;
|
||||
for (auto input_shape : inputsOrder) {
|
||||
if (input_shape != 1) squeezedInputOrder.push_back(input_shape);
|
||||
}
|
||||
SizeVector squeezedOutputOrder;
|
||||
for (auto output_shape : layerOrder) {
|
||||
if (output_shape != 0) squeezedOutputOrder.push_back(output_shape);
|
||||
}
|
||||
|
||||
if (layerOrder == vector<int>({1, 0, 2})) {
|
||||
IE_ASSERT(!layer->insData.empty());
|
||||
auto inputs = layer->insData.begin()->lock();
|
||||
auto inputs_size = inputs->getTensorDesc().getDims().size();
|
||||
if (inputs_size != layerOrder.size()) {
|
||||
THROW_IE_EXCEPTION << "[GNA plugin] Invalid input tensor size for permute layer " <<
|
||||
layer->GetParamAsString("order");
|
||||
}
|
||||
auto permuteDim0 = FROM_IR_DIM(inputs, inputs_size);
|
||||
auto permuteDim1 = FROM_IR_DIM(inputs, inputs_size - 1);
|
||||
if (permuteDim0 == 1 || permuteDim1 == 1) {
|
||||
return; // supported case
|
||||
}
|
||||
dimMessage = " (with first dim = " + to_string(permuteDim0) + ", second dim = " + to_string(permuteDim1) + ")";
|
||||
void* ptr_inputs = nullptr;
|
||||
void* ptr_outputs = nullptr;
|
||||
|
||||
if (squeezedInputOrder.size() > 2) {
|
||||
THROW_GNA_EXCEPTION << "unsupported permute (requested transpose is not 2D)";
|
||||
}
|
||||
THROW_IE_EXCEPTION << "[GNA plugin] Unsupported permute order: was " << layer->GetParamAsString("order") <<
|
||||
dimMessage << ", but only support 1,0,2 (with first or second dim = 1) and 0,3,2,1";
|
||||
|
||||
if (count%2 == 0) {
|
||||
auto temp = squeezedInputOrder[0];
|
||||
squeezedInputOrder[0] = squeezedInputOrder[1];
|
||||
squeezedInputOrder[1] = temp;
|
||||
}
|
||||
|
||||
if (std::min(squeezedInputOrder[0], squeezedInputOrder[1]) > 8) {
|
||||
THROW_GNA_EXCEPTION << "unsupported permute (minor dimension="
|
||||
<< std::min(squeezedInputOrder[0], squeezedInputOrder[1]) << " > 8)";
|
||||
}
|
||||
|
||||
// now this can be run on GNA
|
||||
if (squeezedInputOrder[0] < squeezedInputOrder[1]) { // interleave case
|
||||
if (ALIGN(squeezedInputOrder[1], 8) != squeezedInputOrder[1]) {
|
||||
THROW_GNA_EXCEPTION << "unsupported permute (row size not a multiple of 8)";
|
||||
} else {
|
||||
auto& currentComponent = dnnComponents.addComponent(layer->name, "interleave");
|
||||
dnn->InitInterleaveComponent(currentComponent,
|
||||
squeezedInputOrder[0],
|
||||
squeezedInputOrder[1],
|
||||
inputs->getPrecision().size(),
|
||||
outputs->getPrecision().size(),
|
||||
(quantized == nullptr) ? 1.0f : quantized->_dst_quant.scale,
|
||||
ptr_inputs,
|
||||
ptr_outputs);
|
||||
}
|
||||
|
||||
} else { // deinterleave case
|
||||
if (ALIGN(squeezedInputOrder[0], 8) != squeezedInputOrder[0]) {
|
||||
THROW_GNA_EXCEPTION << "[GNA plugin] unsupported permute (column size not a multiple of 8)";
|
||||
} else {
|
||||
auto& currentComponent = dnnComponents.addComponent(layer->name, "deinterleave");
|
||||
dnn->InitDeinterleaveComponent(currentComponent,
|
||||
squeezedInputOrder[0],
|
||||
squeezedInputOrder[1],
|
||||
inputs->getPrecision().size(),
|
||||
outputs->getPrecision().size(),
|
||||
quantized == nullptr ? 1 : quantized->_dst_quant.scale,
|
||||
ptr_inputs,
|
||||
ptr_outputs);
|
||||
}
|
||||
}
|
||||
size_t num_data_bytes_out = ALIGN(InferenceEngine::details::product(
|
||||
begin(outputs->getDims()), end(outputs->getDims())), 8)
|
||||
* outputs->getPrecision().size();
|
||||
size_t num_data_bytes_in = squeezedInputOrder[0] * squeezedInputOrder[1] * inputs->getPrecision().size();
|
||||
|
||||
connectInput(layer, ptr_inputs, num_data_bytes_in);
|
||||
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
||||
}
|
||||
|
||||
void SKIP(GNAGraphCompiler*, CNNLayerPtr) {}
|
||||
@@ -1338,7 +1401,7 @@ void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) {
|
||||
{{"Split"}, SKIP}, // skip information about which part of prev layer need to consume handle during layer creation
|
||||
{{"Slice"}, SKIP},
|
||||
{{"link"}, SKIP},
|
||||
{{"clamp", "sigmoid", "relu", "tanh", "identity"}, CREATE(PWLPrimitive)},
|
||||
{{"clamp", "sigmoid", "relu", "tanh", "log", "neghalflog", "divbyn", "exp", "identity"}, CREATE(PWLPrimitive)},
|
||||
{{"Convolution"}, CREATE(ConvolutionPrimitive)},
|
||||
{{"Permute"}, CREATE(PermutePrimitive)}, // permute of certain form (2D transpose) can be assimilated in followed FC layer
|
||||
{{"Pooling"}, CREATE(PoolingPrimitive)},
|
||||
@@ -1644,11 +1707,14 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
}
|
||||
|
||||
if (LayerInfo(prevLayer).isPermute()) {
|
||||
gnalog() << "Skipping permute layer: " << prevLayer->name << "\n";
|
||||
return {connectInput(prevLayer, ptr, num_data_bytes_in, offset, 0).input, true, prevLayer};
|
||||
if (!LayerInfo(prevLayer).isTrivialPermute()) {
|
||||
// we should have GNA primitive for it
|
||||
THROW_GNA_EXCEPTION << "missed gna primitive for permute: " << prevLayer->name;
|
||||
}
|
||||
gnalog() << "Skipping trivial permute layer: " << prevLayer->name << "\n";
|
||||
return connectInput(prevLayer, ptr, num_data_bytes_in, offset, 0);
|
||||
}
|
||||
|
||||
|
||||
THROW_GNA_EXCEPTION << "Cannot connect input for: " << layer->name;
|
||||
}
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "descriptions/gna_input_desc.hpp"
|
||||
#include "descriptions/gna_flags.hpp"
|
||||
#include "cpp_interfaces/base/ie_plugin_base.hpp"
|
||||
#include "cpp_interfaces/impl/ie_memory_state_internal.hpp"
|
||||
#include "connection_details.hpp"
|
||||
#include "backend/dnn.hpp"
|
||||
#include "memory/polymorph_allocator.hpp"
|
||||
@@ -52,6 +53,7 @@ public:
|
||||
GNAPluginNS::backend::DnnComponents dnnComponents;
|
||||
MemoryConnection memory_connection;
|
||||
ConcatConnection concat_connection;
|
||||
std::vector<InferenceEngine::IMemoryStateInternal::Ptr> memoryStates;
|
||||
|
||||
void setGNAMemoryPtr(std::shared_ptr<GNAPluginNS::gna_memory_type> gnaMemPtr);
|
||||
void setDNNPtr(std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnnPtr);
|
||||
|
||||
@@ -18,8 +18,15 @@
|
||||
|
||||
#include "gna_lib_ver_selector.hpp"
|
||||
|
||||
typedef struct _nnet_type_t
|
||||
{
|
||||
uint32_t nLayers; // The number of layers in the network.
|
||||
uint32_t nGroup; // Input vector grouping level.
|
||||
intel_nnet_layer_t *pLayers; // Layer configurations.
|
||||
|
||||
} intel_nnet_type_t;
|
||||
|
||||
#ifndef WIN32
|
||||
#include <profiler.h>
|
||||
|
||||
void clearTimeB(timeb & tb) {
|
||||
tb.time = 0;
|
||||
|
||||
@@ -57,6 +57,7 @@ uint32_t ToByteSize(const Gna2DataType type) {
|
||||
}
|
||||
}
|
||||
|
||||
float GNAPluginNS::identity_SF = 256.0f;
|
||||
constexpr uint32_t GNAPluginNS::GNAPlugin::FAKE_REQUEST_CONFIG_ID;
|
||||
#endif
|
||||
using namespace InferenceEngine;
|
||||
@@ -501,11 +502,11 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
|
||||
// auto idx = std::distance(outputsDataMap.begin(), outputPort);
|
||||
auto & desc = outputsDesc[idx];
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
||||
|
||||
desc.ptrs.resize(gnaFlags->gna_lib_async_threads_num);
|
||||
desc.orientation = component.orientation_out;
|
||||
desc.num_bytes_per_element = component.num_bytes_per_output;
|
||||
desc.scale_factor = quantized != nullptr ? quantized->_dst_quant.scale : 1.0f;
|
||||
|
||||
// TODO: this need to be fixed
|
||||
desc.num_elements = component.num_rows_out;
|
||||
|
||||
@@ -518,6 +519,18 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
|
||||
// gets output layer pointer in original topology not in cloned
|
||||
auto outLayer = outPort.second->getCreatorLayer().lock();
|
||||
|
||||
// Memory layers are not dnnComponents hence we need to make switch with identity layer
|
||||
if (outLayer->type == "Memory") {
|
||||
// traverse memory connection to find corresponding output_memory
|
||||
for (auto && memConnection : graphCompiler.memory_connection) {
|
||||
if (memConnection.second.getInput()->name == outLayer->name) {
|
||||
// if connection is found, replace memory input layer with memory output layer
|
||||
outLayer = memConnection.second.getOutput();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// searching for outData represented in GNA blob
|
||||
// using ufs - upper first search
|
||||
gnalog() << "[UFS] searching for : "<< outPort.first << " representation in GNA\n";
|
||||
@@ -693,10 +706,25 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
|
||||
num_rotate_rows = dnn->num_rotate_rows;
|
||||
num_rotate_columns = dnn->num_rotate_columns;
|
||||
|
||||
for (auto& gnaMemoryConn : graphCompiler.memory_connection) {
|
||||
std::string name = gnaMemoryConn.first;
|
||||
GNAMemoryLayer memLayer = gnaMemoryConn.second;
|
||||
|
||||
InferenceEngine::CNNLayerPtr layer = memLayer.getInput();
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
||||
auto scale_factor = quantized != nullptr ? quantized->_dst_quant.scale : 1.0f;
|
||||
|
||||
auto ptr = make_blob_with_precision(TensorDesc(InferenceEngine::Precision::I16,
|
||||
memLayer.getDims(),
|
||||
memLayer.getDims().size() == 2 ? NC : NCHW),
|
||||
memLayer.gna_ptr);
|
||||
graphCompiler.memoryStates.emplace_back(std::make_shared<memory::GNAMemoryState>(name, ptr, scale_factor));
|
||||
}
|
||||
|
||||
DumpXNNToFile();
|
||||
|
||||
#ifdef PLOT
|
||||
dnn->WriteGraphWizModel("gna-blob.dot");
|
||||
dnn->WriteGraphWizModel("/data/local/tmp/gna-blob.dot");
|
||||
#endif
|
||||
#if GNA_LIB_VER == 2
|
||||
createRequestConfigsForGnaModels();
|
||||
@@ -1047,7 +1075,7 @@ std::vector<InferenceEngine::MemoryStateInternal::Ptr> GNAPlugin::QueryState()
|
||||
return {};
|
||||
}
|
||||
|
||||
return {std::make_shared<memory::GNAMemoryState>(shared_from_this())};
|
||||
return graphCompiler.memoryStates;
|
||||
}
|
||||
|
||||
std::string GNAPlugin::GetName() const noexcept {
|
||||
@@ -1400,6 +1428,14 @@ void GNAPlugin::SetConfig(const std::map<std::string, std::string> &config) {
|
||||
}
|
||||
});
|
||||
|
||||
if_set(CONFIG_KEY(IDENTITY_SCALE_FACTOR), [&] {
|
||||
auto idScaleFactor = InferenceEngine::CNNLayer::ie_parse_float(value);
|
||||
if (fp32eq(idScaleFactor, 0.0f)) {
|
||||
THROW_GNA_EXCEPTION << "identity scale factor of 0.0f not supported";
|
||||
}
|
||||
identity_SF = idScaleFactor;
|
||||
});
|
||||
|
||||
if_set(GNA_CONFIG_KEY(LIB_N_THREADS), [&] {
|
||||
uint64_t lib_threads = std::stoul(value, NULL, 10);
|
||||
if (lib_threads == 0 || lib_threads > std::numeric_limits<uint8_t>::max()/2-1) {
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
#endif
|
||||
|
||||
namespace GNAPluginNS {
|
||||
extern float identity_SF;
|
||||
class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::enable_shared_from_this<GNAPlugin> {
|
||||
protected:
|
||||
std::string _pluginName = "GNA";
|
||||
|
||||
@@ -6,8 +6,28 @@
|
||||
|
||||
#include <ostream>
|
||||
#include <details/ie_exception.hpp>
|
||||
#include "sys/timeb.h"
|
||||
|
||||
// #define GNA_DEBUG
|
||||
typedef unsigned long long time_tsc;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
time_tsc start; // time value on profiler start
|
||||
time_tsc stop; // time value on profiler stop
|
||||
time_tsc passed; // time passed between start and stop
|
||||
} intel_gna_profiler_tsc;
|
||||
|
||||
typedef struct timeb time_rtc;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
time_rtc start; // time value on profiler start
|
||||
time_rtc stop; // time value on profiler stop
|
||||
time_rtc passed; // time passed between start and stop
|
||||
} intel_gna_profiler_rtc;
|
||||
|
||||
//#define GNA_DEBUG
|
||||
#ifdef GNA_DEBUG
|
||||
#include <iostream>
|
||||
/**
|
||||
@@ -58,7 +78,7 @@ inline GnaLog & gnawarn() {
|
||||
#ifdef __PRETTY_FUNCTION__
|
||||
#undef __PRETTY_FUNCTION__
|
||||
#endif
|
||||
#ifdef _WIN32
|
||||
#if defined _WIN32
|
||||
# define __PRETTY_FUNCTION__ __FUNCSIG__
|
||||
#else
|
||||
# define __PRETTY_FUNCTION__ __FUNCTION__
|
||||
|
||||
@@ -111,6 +111,7 @@ std::map<std::string, std::string> GNAPlugin::supportedConfigKeysWithDefaults()
|
||||
{GNA_CONFIG_KEY(PRECISION), Precision(Precision::I8).name()},
|
||||
{GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN), CONFIG_VALUE(YES)},
|
||||
{CONFIG_KEY(PERF_COUNT), CONFIG_VALUE(NO)},
|
||||
{CONFIG_KEY(IDENTITY_SCALE_FACTOR), "2048.0"},
|
||||
{GNA_CONFIG_KEY(LIB_N_THREADS), "1"},
|
||||
{CONFIG_KEY(SINGLE_THREAD), CONFIG_VALUE(YES)}
|
||||
};
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
typedef struct {
|
||||
typedef struct pwl_gna_slope_scale_t{
|
||||
double slope;
|
||||
uint64_t slope_scale = 0;
|
||||
uint32_t slope_scale_index;
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
#include "details/caseless.hpp"
|
||||
#include "ie_algorithm.hpp"
|
||||
#include "gna-api.h"
|
||||
|
||||
#include "gna_permute.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
|
||||
@@ -68,7 +68,7 @@ class LayerInfo {
|
||||
IS_VALID();
|
||||
static InferenceEngine::details::caseless_set<std::string> activations =
|
||||
{ "clamp", "sigmoid", "identity", "relu",
|
||||
"leakyrelu", "tanh", "prelu", "exp", "log", "sign", "abs", "neghalflog"};
|
||||
"leakyrelu", "tanh", "prelu", "exp", "log", "sign", "abs", "neghalflog", "divbyn"};
|
||||
return activations.find(layer->type) != activations.end();
|
||||
}
|
||||
|
||||
@@ -155,6 +155,43 @@ class LayerInfo {
|
||||
bool isPermute() const noexcept {
|
||||
return isOfType("permute");
|
||||
}
|
||||
// @brief this not only mathematically trivial, has some WA for kaldi case
|
||||
bool isTrivialPermute() {
|
||||
if (!isPermute()) return false;
|
||||
|
||||
auto layerOrder = layer->GetParamAsInts("order");
|
||||
|
||||
if (layerOrder == std::vector<int>({ 0, 3, 2, 1 })) {
|
||||
return true; // supported case
|
||||
}
|
||||
IE_ASSERT(!layer->insData.empty());
|
||||
auto inputs = layer->insData.begin()->lock();
|
||||
auto inputsOrder = inputs->getTensorDesc().getDims();
|
||||
|
||||
// cases when all permutations happened either between 1 and X shape where no other dims in between
|
||||
auto permuteSequence = genPermutations(layerOrder.begin(), layerOrder.end());
|
||||
auto inputsOrderTransformed = inputsOrder;
|
||||
for (auto && permute : permuteSequence) {
|
||||
// check dims of permuted
|
||||
if (inputsOrderTransformed[permute.first] == 1 &&
|
||||
inputsOrderTransformed[permute.second] == 1) {
|
||||
return true;
|
||||
}
|
||||
if (inputsOrderTransformed[permute.first] != 1 &&
|
||||
inputsOrderTransformed[permute.second] != 1) {
|
||||
return false;
|
||||
}
|
||||
// check dims in between
|
||||
for (int j = permute.first + 1; j != permute.second; j++) {
|
||||
if (inputsOrderTransformed[j] != 1) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// apply permutation
|
||||
std::swap(inputsOrderTransformed[permute.first], inputsOrderTransformed[permute.second]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool isPooling() const noexcept {
|
||||
return isOfType("pooling");
|
||||
}
|
||||
|
||||
@@ -38,6 +38,11 @@ enum LayerType {
|
||||
Memory,
|
||||
Power,
|
||||
Crop,
|
||||
Exp,
|
||||
Log,
|
||||
NegHalfLog,
|
||||
Identity,
|
||||
DivByN,
|
||||
LSTMCell,
|
||||
TensorIterator,
|
||||
NO_TYPE
|
||||
@@ -66,6 +71,11 @@ static const InferenceEngine::details::caseless_map<std::string, GNAPluginNS::La
|
||||
{ "Power" , Power},
|
||||
{ "Memory" , Memory },
|
||||
{ "Crop" , Crop },
|
||||
{ "Log", Log },
|
||||
{ "NegHalfLog" , NegHalfLog },
|
||||
{ "DivByN", DivByN },
|
||||
{ "Identity", Identity },
|
||||
{ "Exp", Exp },
|
||||
{ "LSTMCell", LSTMCell },
|
||||
{ "TensorIterator", TensorIterator }
|
||||
};
|
||||
|
||||
104
inference-engine/src/gna_plugin/layers/gna_permute.hpp
Normal file
104
inference-engine/src/gna_plugin/layers/gna_permute.hpp
Normal file
@@ -0,0 +1,104 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
|
||||
#pragma once
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <utility>
|
||||
#include "gna_plugin_log.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
template <class T>
|
||||
class PermuteSequence {
|
||||
public:
|
||||
using cnt_type = std::vector<std::pair<T, T>>;
|
||||
|
||||
private:
|
||||
std::vector<T> orderVec;
|
||||
cnt_type permutes;
|
||||
|
||||
public:
|
||||
explicit PermuteSequence(std::vector<T> && orderVecIn) : orderVec(std::move(orderVecIn)) {
|
||||
std::vector<bool> counter(orderVec.size());
|
||||
for (auto && x : this->orderVec) {
|
||||
if (x < 0) {
|
||||
THROW_GNA_EXCEPTION << "invalid order: element " << x << " should be >= 0";
|
||||
}
|
||||
if (x >= counter.size()) {
|
||||
THROW_GNA_EXCEPTION << "invalid order: element " << x << " should be < "<< counter.size();
|
||||
}
|
||||
if (counter[x]) {
|
||||
THROW_GNA_EXCEPTION << "invalid order: element " << x << " present more than once";
|
||||
}
|
||||
counter[x] = true;
|
||||
}
|
||||
|
||||
// generating permutation graph
|
||||
std::fill(counter.begin(), counter.end(), false);
|
||||
|
||||
// length of current cycle
|
||||
std::list<cnt_type> permuteCycles;
|
||||
bool newSeq = false;
|
||||
|
||||
for (int i = 0; i != orderVec.size();) {
|
||||
// we have this permutation on the list already
|
||||
if (counter[i]) {
|
||||
newSeq = false;
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
counter[i] = true;
|
||||
// looks we found a permutation
|
||||
if (orderVec[i] != i) {
|
||||
if (!newSeq) {
|
||||
newSeq = true;
|
||||
permuteCycles.push_back({});
|
||||
}
|
||||
permuteCycles.back().push_back({i, orderVec[i]});
|
||||
counter[i] = true;
|
||||
i = orderVec[i];
|
||||
continue;
|
||||
}
|
||||
// this dims not permuted
|
||||
i++;
|
||||
}
|
||||
|
||||
for (auto && cycle : permuteCycles) {
|
||||
for (int i = 0; i + 1 < cycle.size(); i++) {
|
||||
permutes.push_back(cycle[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
const cnt_type & cnt() const noexcept {
|
||||
return permutes;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief generates permutations sequence in order to reach given order
|
||||
* @tparam Iterator
|
||||
* @return
|
||||
*/
|
||||
template <class Iterator>
|
||||
inline typename PermuteSequence<typename std::iterator_traits<Iterator>::value_type>::cnt_type genPermutations(
|
||||
Iterator beg, Iterator en) {
|
||||
static_assert(
|
||||
std::is_same<std::random_access_iterator_tag,
|
||||
typename std::iterator_traits<Iterator>::iterator_category>::value,
|
||||
"The genPermutations() function only accepts random access iterators or raw pointers to an array.\n");
|
||||
using value_type = typename std::iterator_traits<Iterator>::value_type;
|
||||
std::vector<value_type> v;
|
||||
for (; beg != en; beg++) {
|
||||
v.push_back(*beg);
|
||||
}
|
||||
auto permute = PermuteSequence<value_type> (std::move(v));
|
||||
return permute.cnt();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline typename PermuteSequence<T>::cnt_type genPermutations(const std::initializer_list<T> & lst) {
|
||||
return genPermutations(lst.begin(), lst.end());
|
||||
}
|
||||
} // namespace GNAPluginNS
|
||||
@@ -7,19 +7,54 @@
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <cpp_interfaces/impl/ie_memory_state_internal.hpp>
|
||||
#include <ie_blob.h>
|
||||
#include "gna_plugin.hpp"
|
||||
#include "preprocessing.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace memory {
|
||||
class GNAMemoryState : public InferenceEngine::MemoryStateInternal {
|
||||
std::shared_ptr<GNAPlugin> plg;
|
||||
std::string stateName;
|
||||
InferenceEngine::Blob::Ptr memState;
|
||||
float scalefactor;
|
||||
|
||||
public:
|
||||
using Ptr = InferenceEngine::MemoryStateInternal::Ptr;
|
||||
|
||||
explicit GNAMemoryState(std::shared_ptr<GNAPlugin> plg)
|
||||
: InferenceEngine::MemoryStateInternal("GNAResetState"), plg(plg) {}
|
||||
explicit GNAMemoryState(std::string name,
|
||||
InferenceEngine::Blob::Ptr state,
|
||||
float scale_factor)
|
||||
: InferenceEngine::MemoryStateInternal(name), stateName(name),
|
||||
memState(state), scalefactor(scale_factor) {}
|
||||
|
||||
void Reset() override {
|
||||
plg->Reset();
|
||||
std::memset(memState->buffer().as<int16_t*>(), 0, memState->byteSize());
|
||||
}
|
||||
|
||||
void SetState(InferenceEngine::Blob::Ptr newState) override {
|
||||
if (newState->getTensorDesc().getDims().size() != 2) {
|
||||
THROW_GNA_EXCEPTION << "SetState failed for blob dimensions > 2";
|
||||
}
|
||||
|
||||
if ((newState->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) &&
|
||||
(newState->byteSize()/2 == memState->byteSize())) {
|
||||
ConvertToInt16(memState->buffer().as<int16_t*>(),
|
||||
newState->buffer().as<float*>(),
|
||||
newState->getTensorDesc().getDims()[0],
|
||||
newState->getTensorDesc().getDims()[1],
|
||||
scalefactor);
|
||||
} else if ((newState->getTensorDesc().getPrecision() == InferenceEngine::Precision::I16) &&
|
||||
(newState->byteSize() == memState->byteSize())) {
|
||||
std::memcpy(memState->buffer().as<uint8_t*>(),
|
||||
newState->buffer().as<uint8_t*>(),
|
||||
newState->byteSize());
|
||||
} else {
|
||||
THROW_GNA_EXCEPTION << "SetState call failed. Invalid precision / size";
|
||||
}
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::CPtr GetLastState() const override {
|
||||
return memState;
|
||||
}
|
||||
};
|
||||
} // namespace memory
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
|
||||
#ifdef _NO_MKL_
|
||||
#include <cmath>
|
||||
@@ -30,6 +31,140 @@
|
||||
#include "round_float_define.hpp"
|
||||
|
||||
double first_deriv_tanh(const double x) { return(1.0 - tanh(x) * tanh(x)); }
|
||||
double first_deriv_exp(const double x) { return(exp(x)); }
|
||||
double first_deriv_log(const double x) { return(1.0 / x); }
|
||||
double neghalflog(const double x) { return(-0.5*log(x)); }
|
||||
double first_deriv_neghalflog(const double x) { return(-0.5 / x); }
|
||||
|
||||
std::map<std::string, std::vector<pwl_t>> pwl_search_map {
|
||||
{"log", {{1.0769533473860933e-05 , 8.4918474385631271e-06 , -11.662751279293021 , 92854.532875275778 , -12.451257806448908},
|
||||
{1.7021658371797054e-05 , 1.3421901942456181e-05 , -11.204973371284382 , 58748.682305649265 , -11.993492424439317},
|
||||
{2.6901160981803783e-05 , 2.121301943569138e-05 , -10.747255484868321 , 37173.116828541606 , -11.5358095346374},
|
||||
{4.2508975575310986e-05 , 3.3523097254749539e-05 , -10.289651523932033 , 23524.443637281987 , -11.078263735848511},
|
||||
{6.7159638615065504e-05 , 5.2968285045173431e-05 , -9.8322142997398423 , 14889.895488146327 , -10.620906528248819},
|
||||
{0.00010607938536412906 , 8.3674876369534097e-05 , -9.3749963641228913 , 9426.9022823556988 , -10.163791247146683},
|
||||
{0.00016750484167401942 , 0.0001321476299142904 , -8.9180484530997628 , 5969.97668847147 , -9.7069667231248324},
|
||||
{0.00026440839561966089 , 0.00020863498397988411 , -8.4614207323653048 , 3782.0281676624659 , -9.2504841185370346},
|
||||
{0.00041721037390891224 , 0.00032927394464933357 , -8.0051607849959225 , 2396.8723275762272 , -8.794388391117776},
|
||||
{0.00065803047888043399 , 0.00051945736028058084 , -7.5493154189054579 , 1519.6864462895246 , -8.3387277287491912},
|
||||
{0.001037362756270747 , 0.00081911613772069059 , -7.0939280363180339 , 963.98294035052527 , -7.8835420192465913},
|
||||
{0.0016345178691889663 , 0.0012909987974520291 , -6.6390412024897918 , 611.8012037985186 , -7.4288758208733832},
|
||||
{0.0025739995666866798 , 0.0020336387647946299 , -6.1846931764806961 , 388.50045390148483 , -6.9747627596750643},
|
||||
{0.004051067401148134 , 0.0032016468460507675 , -5.7309215067520842 , 246.84852187761314 , -6.5212432982738378},
|
||||
{0.0063717878399278975 , 0.0050374410331472524 , -5.2777584251958025 , 156.94182309926316 , -6.0683436046929682},
|
||||
{0.010015399744147635 , 0.0079208190427630248 , -4.8252358236823785 , 99.846239345996764 , -5.6160998176424242},
|
||||
{0.015731959416888035 , 0.012446344656992419 , -4.3733791100375914 , 63.564872848992557 , -5.1645294255940524},
|
||||
{0.024693977573234764 , 0.019544055268788288 , -3.9222140374798418 , 40.495703741299124 , -4.7136643095482684},
|
||||
{0.038733765155746253 , 0.030667594605217476 , -3.4717584839571183 , 25.817268111660667 , -4.2635119962197363},
|
||||
{0.060710894498842323 , 0.048087203788268418 , -3.0220317632779459 , 16.471508256546421 , -3.8141005375106394},
|
||||
{0.095087507492990303 , 0.07534567638130947 , -2.5730436069008258 , 10.516628591549917 , -3.3654261013821722},
|
||||
{0.14881589164648179 , 0.1179673976691515 , -2.1248067941840345 , 6.7197124509762807 , -2.9175137851107023},
|
||||
{0.23272951343232598 , 0.18456019070518948 , -1.6773223736744836 , 4.2968336299589422 , -2.4703468078481774},
|
||||
{0.36368021928596278 , 0.28852643303350278 , -1.2305967272577258 , 2.7496683816440872 , -2.0239487374384986},
|
||||
{0.56789454936972605 , 0.45071871177964989 , -0.78462174664264084 , 1.7608902939988476 , -1.5782879515390904},
|
||||
{0.88610468810159015 , 0.70355895583540895 , -0.33939781495255494 , 1.1285348259949077 , -1.1333885987534271},
|
||||
{1.3816335789411809 , 1.0974241494088073 , 0.10509277274225079 , 0.72378090344789758 , -0.68920186958239626},
|
||||
{2.1526786672071778 , 1.7105408556958117 , 0.54885493633765814 , 0.46453751562343976 , -0.24575546313966701},
|
||||
{3.3517682459038256 , 2.6643098700159529 , 0.99191642472855346 , 0.29834998324305195 , 0.19701961965499604},
|
||||
{5.2150907594192395 , 4.1470315870077039 , 1.4342864241471516 , 0.19175121702222525 , 0.63908807030881409},
|
||||
{8.1092990368633888 , 6.4506248753490381 , 1.8760032407108322 , 0.12331522064412515 , 1.0805430109146834},
|
||||
{12.601356824411839 , 10.02743923506598 , 2.3170788926824022 , 0.079356533898219603 , 1.5213360711125516},
|
||||
{19.571181442374591 , 15.578135417317394 , 2.7575629024279547 , 0.051095535695910903 , 1.9615897281366799},
|
||||
{30.377546048984698 , 24.187602070756132 , 3.1974682131414882 , 0.032919051406834175 , 2.4012352971662185},
|
||||
{47.129543514739666 , 37.535139255177242 , 3.6368564758700774 , 0.02121811342575923 , 2.840431633702059},
|
||||
{73.079549140200285 , 58.219475920898645 , 4.0757390773799447 , 0.013683718793633217 , 3.2790801405656671},
|
||||
{113.27901633813272 , 90.261352592829454 , 4.5141911073789212 , 0.0088277602712848978 , 3.7173855249275034},
|
||||
{175.50631439356027 , 139.88074429844423 , 4.9522192021630707 , 0.0056978006942677408 , 4.155206600184707},
|
||||
{271.85671771564853 , 216.69890827860715 , 5.3899137902216161 , 0.0036784082747808367 , 4.5928067328736137},
|
||||
{420.92464268160882 , 335.59785527230628 , 5.8272726607059671 , 0.0023757221568907016 , 5.02998540013055},
|
||||
{651.67787232666603 , 519.59734639612975 , 6.2644043286254485 , 0.0015345004678918 , 5.4670819574652496},
|
||||
{1008.5639944354145 , 804.30768425774158 , 6.7012924752877234 , 0.00099150872479816517 , 5.9038143889239647},
|
||||
{1561.0124734004803 , 1244.8233916434529 , 7.1380676425712917 , 0.00064060987150321626 , 6.3406214896063817},
|
||||
{2415.300015438163 , 1926.4004262204676 , 7.5746926191112163 , 0.00041402724034620179 , 6.7771103668414092},
|
||||
{0 , 2981 , 8.0113255703134367 , 0 , 0}}},
|
||||
{"exp", {{-5.2905549738656035 , -7.6246190071105957 , -0.0029375872840971921 , 0.0050389629907875762 , 0.035482585711588618},
|
||||
{-3.2765565204702316 , -3.966387017312524 , 0.015496108324210485 , 0.037758052285013388 , 0.16525915670649505},
|
||||
{-2.304345998844584 , -2.7128986917228044 , 0.062825386060480992 , 0.099824064363453618 , 0.33363795967454735},
|
||||
{-1.6505759560844804 , -1.9420942111377082 , 0.1397702221420461 , 0.19193932815516293 , 0.512534480241849},
|
||||
{-1.1551903296891044 , -1.3825157257724856 , 0.24717534067313054 , 0.31499757780654358 , 0.6826644455709191},
|
||||
{-0.75535692384923692 , -0.94198675918566444 , 0.38594089810159893 , 0.46984288666759971 , 0.82852667624004861},
|
||||
{-0.41999557300201118 , -0.57832149976517178 , 0.55680643336844471 , 0.65704972856644539 , 0.93679241781329048},
|
||||
{-0.13153483933686491 , -0.2688406706913587 , 0.76015072810791207 , 0.87674872926137593 , 0.99585644451033684},
|
||||
{0.4235904473306159 , 0 , 0.94308787650971637 , 1.5274359002153208 , 0.94308787650971637},
|
||||
{1.0937368353927797 , 0.79581116380659767 , 2.1586384179000495 , 2.9854092373666563 , -0.2171835817276766},
|
||||
{ 1.5979006351086957 , 1.3669113028909896 , 3.8636060485839758 , 4.9426451092152783 , -2.8925514173812577},
|
||||
{ 2.0041923346353099 , 1.8147648658250355 , 6.0771872710645756 , 7.420098519377718 , -7.3885468228624731},
|
||||
{ 2.3452256111159864 , 2.1843822125126793 , 8.8197843979578803 , 10.435626855401082 , -13.975613281399871},
|
||||
{ 2.6387547172826245 , 2.4991598196640292 , 12.1046860486254 , 13.99576406829804 , -22.872965156362628},
|
||||
{ 0 , 2.7725581832447883 , 15.931105041960471 , -0 , -0}}},
|
||||
{"sigmoid", {{-6.0269768546940687 , -10 , -0.0033685324745532531 , 0.0024011761556240077 , 0.020643229081686823},
|
||||
{-3.4572777895083773 , -4.2646607997060624 , 0.010403027257608216 , 0.029619100828046807 , 0.13671844548152082},
|
||||
{-2.302945392313446 , -2.7960754970003254 , 0.053901203413037058 , 0.082620267964448268 , 0.28491371022403178},
|
||||
{-1.4431692770391085 , -1.8482809500056467 , 0.13220824286098024 , 0.15455301637191463 , 0.41786563878710092},
|
||||
{-0.58709153507881506 , -1.0390898867848257 , 0.25727116250295479 , 0.22963741468060328 , 0.49588507772498291},
|
||||
{0.58709153507881362 , 0 , 0.50411492227501709 , 0.22963741468060325 , 0.50411492227501709},
|
||||
{1.4431692770391091 , 1.0390898867848253 , 0.7427288374970451 , 0.15455301637191451 , 0.58213436121289919},
|
||||
{2.3029453923134513 , 1.8482809500056492 , 0.86779175713902001 , 0.082620267964447991 , 0.71508628977596878},
|
||||
{3.4572777895083746 , 2.7960754970003263 , 0.94609879658696283 , 0.029619100828046918 , 0.86328155451847877},
|
||||
{6.0269768546940705 , 4.2646607997060606 , 0.98959697274239178 , 0.0024011761556240298 , 0.97935677091831308},
|
||||
{ 0 , 10 , 1.0033685324745534 , 0 , 0}}},
|
||||
{"tanh", {{-3.0134884273470361 , -5 , -1.0067370649491065 , 0.0096047046224959371 , -0.95871354183662683},
|
||||
{-1.7286388947541886 , -2.1323303998530339 , -0.979193945484784 , 0.11847640331218724 , -0.72656310903695842},
|
||||
{-1.1514726961567241 , -1.3980377485001632 , -0.892197593173926 , 0.3304810718577928 , -0.43017257955193672},
|
||||
{-0.72158463851955434 , -0.92414047500282348 , -0.73558351427803959 , 0.61821206548765828 , -0.16426872242579849},
|
||||
{-0.29354576753940709 , -0.51954494339241275 , -0.48545767499409032 , 0.91854965872241312 , -0.0082298445500341155},
|
||||
{0.29354576753940703 , 0 , 0.0082298445500341155 , 0.91854965872241323 , 0.0082298445500341155},
|
||||
{0.72158463851955434 , 0.51954494339241275 , 0.48545767499409037 , 0.6182120654876585 , 0.16426872242579826},
|
||||
{1.1514726961567245 , 0.92414047500282415 , 0.73558351427804003 , 0.33048107185779213 , 0.43017257955193755},
|
||||
{1.7286388947541889 , 1.3980377485001632 , 0.89219759317392588 , 0.11847640331218723 , 0.72656310903695842},
|
||||
{3.0134884273470322 , 2.1323303998530312 , 0.97919394548478356 , 0.0096047046224960447 , 0.95871354183662627},
|
||||
{ 0 , 5 , 1.0067370649491065 , 0 , 0}}},
|
||||
{"NegHalfLog", {{1.0769533473860933e-05 ,8.4918474385631271e-06 ,5.8313756396465104 ,-46427.266437637889 ,6.2256289032244538 },
|
||||
{1.7021658371797054e-05 ,1.3421901942456181e-05 ,5.6024866856421909 ,-29374.341152824632 ,5.9967462122196586 },
|
||||
{2.6901160981803783e-05 ,2.121301943569138e-05 ,5.3736277424341603 ,-18586.558414270803 ,5.7679047673186998 },
|
||||
{4.2508975575310986e-05 ,3.3523097254749539e-05 ,5.1448257619660165 ,-11762.221818640994 ,5.5391318679242554 },
|
||||
{6.7159638615065504e-05 ,5.2968285045173431e-05 ,4.9161071498699211 ,-7444.9477440731634 ,5.3104532641244093 },
|
||||
{0.00010607938536412906 ,8.3674876369534097e-05 ,4.6874981820614456 ,-4713.4511411778494 ,5.0818956235733417 },
|
||||
{0.00016750484167401942 ,0.0001321476299142904 ,4.4590242265498814 ,-2984.988344235735 ,4.8534833615624162 },
|
||||
{0.00026440839561966089 ,0.00020863498397988411 ,4.2307103661826524 ,-1891.0140838312329 ,4.6252420592685173 },
|
||||
{0.00041721037390891224 ,0.00032927394464933357 ,4.0025803924979613 ,-1198.4361637881136 ,4.397194195558888 },
|
||||
{0.00065803047888043399 ,0.00051945736028058084 ,3.7746577094527289 ,-759.84322314476231 ,4.1693638643745956 },
|
||||
{0.001037362756270747 ,0.00081911613772069059 ,3.546964018159017 ,-481.99147017526263 ,3.9417710096232956 },
|
||||
{0.0016345178691889663 ,0.0012909987974520291 ,3.3195206012448959 ,-305.9006018992593 ,3.7144379104366916 },
|
||||
{0.0025739995666866798 ,0.0020336387647946299 ,3.092346588240348 ,-194.25022695074242 ,3.4873813798375322 },
|
||||
{0.004051067401148134 ,0.0032016468460507675 ,2.8654607533760421 ,-123.42426093880657 ,3.2606216491369189 },
|
||||
{0.0063717878399278975 ,0.0050374410331472524 ,2.6388792125979013 ,-78.47091154963158 ,3.0341718023464841 },
|
||||
{0.010015399744147635 ,0.0079208190427630248 ,2.4126179118411892 ,-49.923119672998382 ,2.8080499088212121 },
|
||||
{0.015731959416888035 ,0.012446344656992419 ,2.1866895550187957 ,-31.782436424496279 ,2.5822647127970262 },
|
||||
{0.024693977573234764 ,0.019544055268788288 ,1.9611070187399209 ,-20.247851870649562 ,2.3568321547741342 },
|
||||
{0.038733765155746253 ,0.030667594605217476 ,1.7358792419785591 ,-12.908634055830333 ,2.1317559981098682 },
|
||||
{0.060710894498842323 ,0.048087203788268418 ,1.5110158816389729 ,-8.2357541282732107 ,1.9070502687553197 },
|
||||
{0.095087507492990303 ,0.07534567638130947 ,1.2865218034504129 ,-5.2583142957749587 ,1.6827130506910861 },
|
||||
{0.14881589164648179 ,0.1179673976691515 ,1.0624033970920173 ,-3.3598562254881403 ,1.4587568925553511 },
|
||||
{0.23272951343232598 ,0.18456019070518948 ,0.83866118683724178 ,-2.1484168149794711 ,1.2351734039240887 },
|
||||
{0.36368021928596278 ,0.28852643303350278 ,0.61529836362886292 ,-1.3748341908220436 ,1.0119743687192493 },
|
||||
{0.56789454936972605 ,0.45071871177964989 ,0.39231087332132042 ,-0.88044514699942378 ,0.7891439757695452 },
|
||||
{0.88610468810159015 ,0.70355895583540895 ,0.16969890747627747 ,-0.56426741299745387 ,0.56669429937671356 },
|
||||
{1.3816335789411809 ,1.0974241494088073 ,-0.052546386371125395 ,-0.36189045172394879 ,0.34460093479119813 },
|
||||
{2.1526786672071778 ,1.7105408556958117 ,-0.27442746816882907 ,-0.23226875781171988 ,0.1228777315698335 },
|
||||
{3.3517682459038256 ,2.6643098700159529 ,-0.49595821236427673 ,-0.14917499162152598 ,-0.098509809827498018 },
|
||||
{5.2150907594192395 ,4.1470315870077039 ,-0.71714321207357579 ,-0.095875608511112625 ,-0.31954403515440705 },
|
||||
{8.1092990368633888 ,6.4506248753490381 ,-0.93800162035541612 ,-0.061657610322062573 ,-0.54027150545734171 },
|
||||
{12.601356824411839 ,10.02743923506598 ,-1.1585394463412011 ,-0.039678266949109801 ,-0.76066803555627582 },
|
||||
{19.571181442374591 ,15.578135417317394 ,-1.3787814512139773 ,-0.025547767847955451 ,-0.98079486406833993 },
|
||||
{30.377546048984698 ,24.187602070756132 ,-1.5987341065707441 ,-0.016459525703417088 ,-1.2006176485831093 },
|
||||
{47.129543514739666 ,37.535139255177242 ,-1.8184282379350387 ,-0.010609056712879615 ,-1.4202158168510295 },
|
||||
{73.079549140200285 ,58.219475920898645 ,-2.0378695386899723 ,-0.0068418593968166087 ,-1.6395400702828336 },
|
||||
{113.27901633813272 ,90.261352592829454 ,-2.2570955536894606 ,-0.0044138801356424489 ,-1.8586927624637517 },
|
||||
{175.50631439356027 ,139.88074429844423 ,-2.4761096010815353 ,-0.0028489003471338704 ,-2.0776033000923535 },
|
||||
{271.85671771564853 ,216.69890827860715 ,-2.694956895110808 ,-0.0018392041373904184 ,-2.2964033664368069 },
|
||||
{420.92464268160882 ,335.59785527230628 ,-2.9136363303529835 ,-0.0011878610784453508 ,-2.514992700065275 },
|
||||
{651.67787232666603 ,519.59734639612975 ,-3.1322021643127242 ,-0.00076725023394590001 ,-2.7335409787326248 },
|
||||
{1008.5639944354145 ,804.30768425774158 ,-3.3506462376438617 ,-0.00049575436239908258 ,-2.9519071944619824 },
|
||||
{1561.0124734004803 ,1244.8233916434529 ,-3.5690338212856458 ,-0.00032030493575160813 ,-3.1703107448031909 },
|
||||
{2415.300015438163 ,1926.4004262204676 ,-3.7873463095556081 ,-0.00020701362017310089 ,-3.3885551834207046 },
|
||||
{0 ,2981 ,-4.0056627851567184 ,-0 ,-0 }}}
|
||||
};
|
||||
|
||||
double sigmoid(const double x) { return(0.5 * (1.0 + tanh(x / 2))); }
|
||||
double first_deriv_sigmoid(const double x) { return(sigmoid(x) * (1.0 - sigmoid(x))); }
|
||||
@@ -174,6 +309,15 @@ double calculate_error_pct(const DnnActivationType fun,
|
||||
min_val = max_val = sigmoid(l_bound); break;
|
||||
case kActTanh:
|
||||
min_val = max_val = tanh(l_bound); break;\
|
||||
case kActExp:
|
||||
min_val = max_val = exp(l_bound);
|
||||
break;
|
||||
case kActLog:
|
||||
min_val = max_val = log(l_bound);
|
||||
break;
|
||||
case kActNegHalfLog:
|
||||
min_val = max_val = neghalflog(l_bound);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -188,6 +332,15 @@ double calculate_error_pct(const DnnActivationType fun,
|
||||
case kActTanh:
|
||||
val = tanh(arg);
|
||||
break;
|
||||
case kActExp:
|
||||
val = exp(arg);
|
||||
break;
|
||||
case kActLog:
|
||||
val = log(arg);
|
||||
break;
|
||||
case kActNegHalfLog:
|
||||
val = neghalflog(arg);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -209,6 +362,7 @@ bool split_search(const DnnActivationType fun,
|
||||
switch (fun) {
|
||||
case kActSigmoid:
|
||||
case kActTanh:
|
||||
case kActExp:
|
||||
if ((l_bound < 0.0) && (u_bound > 0.0)) {
|
||||
is_split = true;
|
||||
}
|
||||
@@ -254,7 +408,9 @@ std::vector<pwl_t> pwl_search(const DnnActivationType fun,
|
||||
pwl = pwl_search(fun, l_bound, 0.0, threshold, allowed_err_pct, samples, err_pct1);
|
||||
pwl = negative_pwl(pwl);
|
||||
pwl2 = pwl_search(fun, 0.0, u_bound, threshold, allowed_err_pct, samples, err_pct2);
|
||||
|
||||
if (fun == kActExp) {
|
||||
pwl2 = negative_pwl(pwl2); // both regions of exp are concave
|
||||
}
|
||||
// merge
|
||||
pwl.pop_back(); // remove final alpha and beta from first half
|
||||
pwl.insert(pwl.end(), pwl2.begin(), pwl2.end()); // concatenate the two halves
|
||||
@@ -274,10 +430,12 @@ std::vector<pwl_t> pwl_search(const DnnActivationType fun,
|
||||
pwl[0].alpha = pwl[0].t = pwl[0].beta = -std::numeric_limits<float>::infinity();
|
||||
pwl[0].m = 0.0;
|
||||
pwl[0].b = pwl[0].beta = KALDI_LSTM_CLIP_LOWER;
|
||||
pwl[1].alpha = pwl[0].t = pwl[1].beta = KALDI_LSTM_CLIP_LOWER;
|
||||
//pwl[1].alpha = pwl[0].t = pwl[1].beta = KALDI_LSTM_CLIP_LOWER;
|
||||
pwl[1].alpha = pwl[1].t = pwl[1].beta = KALDI_LSTM_CLIP_LOWER;
|
||||
pwl[1].m = 1.0;
|
||||
pwl[1].b = 0.0;
|
||||
pwl[2].alpha = pwl[0].t = pwl[1].beta = KALDI_LSTM_CLIP_UPPER;
|
||||
//pwl[2].alpha = pwl[0].t = pwl[1].beta = KALDI_LSTM_CLIP_UPPER;
|
||||
pwl[2].alpha = pwl[2].t = pwl[2].beta = KALDI_LSTM_CLIP_UPPER;
|
||||
pwl[2].m = 0.0;
|
||||
pwl[2].b = KALDI_LSTM_CLIP_UPPER;
|
||||
pwl[3].alpha = pwl[3].beta = std::numeric_limits<float>::infinity();
|
||||
@@ -294,6 +452,17 @@ std::vector<pwl_t> pwl_search(const DnnActivationType fun,
|
||||
if (u_bound == 0) negative = true; // make left half convex
|
||||
err = pivot_search(pwl, tanh, first_deriv_tanh, n_segments, l_bound, u_bound, threshold, negative);
|
||||
break;
|
||||
case kActExp:
|
||||
negative = true; // make function convex
|
||||
err = pivot_search(pwl, exp, first_deriv_exp, n_segments, l_bound, u_bound, threshold, negative);
|
||||
break;
|
||||
case kActLog:
|
||||
err = pivot_search(pwl, log, first_deriv_log, n_segments, l_bound, u_bound, threshold, negative);
|
||||
break;
|
||||
case kActNegHalfLog:
|
||||
negative = true; // make function convex
|
||||
err = pivot_search(pwl, neghalflog, first_deriv_neghalflog, n_segments, l_bound, u_bound, threshold, negative);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -308,6 +477,15 @@ std::vector<pwl_t> pwl_search(const DnnActivationType fun,
|
||||
case kActTanh:
|
||||
err = pivot_search(pwl, tanh, first_deriv_tanh, n_segments, l_bound, u_bound, threshold, negative);
|
||||
break;
|
||||
case kActExp:
|
||||
err = pivot_search(pwl, exp, first_deriv_exp, n_segments, l_bound, u_bound, threshold, negative);
|
||||
break;
|
||||
case kActLog:
|
||||
err = pivot_search(pwl, log, first_deriv_log, n_segments, l_bound, u_bound, threshold, negative);
|
||||
break;
|
||||
case kActNegHalfLog:
|
||||
err = pivot_search(pwl, neghalflog, first_deriv_neghalflog, n_segments, l_bound, u_bound, threshold, negative);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -326,30 +504,80 @@ std::vector<pwl_t> pwl_search(const DnnActivationType fun,
|
||||
void PwlDesignOpt16(const DnnActivation activation_type,
|
||||
std::vector<intel_pwl_segment_t> &ptr_segment,
|
||||
const float scale_in,
|
||||
const float scale_out) {
|
||||
const float scale_out,
|
||||
const uint32_t n) {
|
||||
std::vector<pwl_t> pwl;
|
||||
double err_pct = 0.0;
|
||||
switch (activation_type) {
|
||||
case kActSigmoid:
|
||||
pwl = pwl_search(kActSigmoid, -SIGMOID_DOMAIN, SIGMOID_DOMAIN, PWL_DESIGN_THRESHOLD, PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
|
||||
make_gna_pwl(activation_type, pwl, -SIGMOID_DOMAIN, SIGMOID_DOMAIN, scale_in, scale_out, ptr_segment);
|
||||
if ( pwl_search_map.find("sigmoid") == pwl_search_map.end() ) {
|
||||
pwl = pwl_search(kActSigmoid, -SIGMOID_DOMAIN, SIGMOID_DOMAIN, PWL_DESIGN_THRESHOLD, PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
|
||||
} else {
|
||||
pwl = pwl_search_map["sigmoid"];
|
||||
}
|
||||
|
||||
make_gna_pwl(activation_type, pwl, -SIGMOID_DOMAIN, SIGMOID_DOMAIN, scale_in, scale_out, ptr_segment, n);
|
||||
break;
|
||||
case kActTanh:
|
||||
pwl = pwl_search(kActTanh, -TANH_DOMAIN, TANH_DOMAIN, PWL_DESIGN_THRESHOLD, PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
|
||||
make_gna_pwl(activation_type, pwl, -TANH_DOMAIN, TANH_DOMAIN, scale_in, scale_out, ptr_segment);
|
||||
if ( pwl_search_map.find("tanh") == pwl_search_map.end() ) {
|
||||
pwl = pwl_search(kActTanh, -TANH_DOMAIN, TANH_DOMAIN, PWL_DESIGN_THRESHOLD, PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
|
||||
} else {
|
||||
pwl = pwl_search_map["tanh"];
|
||||
}
|
||||
make_gna_pwl(activation_type, pwl, -TANH_DOMAIN, TANH_DOMAIN, scale_in, scale_out, ptr_segment, n);
|
||||
break;
|
||||
case kActRelu:
|
||||
make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment);
|
||||
make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment, n);
|
||||
break;
|
||||
case kActLeakyRelu:
|
||||
make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment);
|
||||
make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment, n);
|
||||
break;
|
||||
case kActIdentity:
|
||||
make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment);
|
||||
make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment, n);
|
||||
break;
|
||||
case kActKaldiLstmClipping:
|
||||
make_gna_pwl(activation_type, pwl, KALDI_LSTM_CLIP_LOWER, KALDI_LSTM_CLIP_UPPER, scale_in, scale_out, ptr_segment);
|
||||
make_gna_pwl(activation_type, pwl, KALDI_LSTM_CLIP_LOWER, KALDI_LSTM_CLIP_UPPER, scale_in, scale_out, ptr_segment, n);
|
||||
break;
|
||||
case kActDivByN: {
|
||||
int32_t n_4 = n/4;
|
||||
make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment, n_4);
|
||||
break;
|
||||
}
|
||||
case kActLog: {
|
||||
double x_min = (1 + ~XBASEMASK) / scale_in;
|
||||
double x_max = ((INT32_MAX / scale_in) < LOG_DOMAIN) ? (INT32_MAX / scale_in) : LOG_DOMAIN;
|
||||
if ( pwl_search_map.find("log") == pwl_search_map.end() ) {
|
||||
pwl = pwl_search(kActLog, x_min, x_max, PWL_DESIGN_THRESHOLD, 0.066*PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
|
||||
} else {
|
||||
pwl = pwl_search_map["log"];
|
||||
}
|
||||
make_gna_pwl(activation_type, pwl, x_min, x_max, scale_in, scale_out, ptr_segment, n);
|
||||
break;
|
||||
}
|
||||
case kActExp: {
|
||||
double x_min = -log(scale_out);
|
||||
double x_max = x_min + log(INT16_MAX);
|
||||
if ( pwl_search_map.find("exp") == pwl_search_map.end() ) {
|
||||
pwl = pwl_search(kActExp, x_min, x_max, PWL_DESIGN_THRESHOLD, 0.5*PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
|
||||
} else {
|
||||
pwl = pwl_search_map["exp"];
|
||||
}
|
||||
make_gna_pwl(activation_type, pwl, x_min, x_max, scale_in, scale_out, ptr_segment, n);
|
||||
break;
|
||||
}
|
||||
case kActNegHalfLog: {
|
||||
double x_min = (1 + ~XBASEMASK) / scale_in;
|
||||
double x_max = ((INT32_MAX / scale_in) < LOG_DOMAIN) ? (INT32_MAX / scale_in) : LOG_DOMAIN;
|
||||
if ( pwl_search_map.find("NegHalfLog") == pwl_search_map.end() ) {
|
||||
pwl = pwl_search(kActNegHalfLog, x_min, x_max, PWL_DESIGN_THRESHOLD, 0.066*PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
|
||||
pwl = negative_pwl(pwl);
|
||||
} else {
|
||||
pwl = pwl_search_map["NegHalfLog"];
|
||||
}
|
||||
|
||||
make_gna_pwl(activation_type, pwl, x_min, x_max, scale_in, scale_out, ptr_segment, n);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -359,7 +587,8 @@ void PwlDesign16(const DnnActivation activation_type,
|
||||
intel_pwl_segment_t *ptr_segment,
|
||||
const uint32_t num_segments,
|
||||
const float scale_in,
|
||||
const float scale_out) {
|
||||
const float scale_out,
|
||||
const uint32_t n) {
|
||||
switch (activation_type) {
|
||||
case kActSigmoid:
|
||||
{
|
||||
@@ -651,6 +880,27 @@ void PwlApply32(intel_dnn_component_t *component,
|
||||
}
|
||||
}
|
||||
break;
|
||||
case kActDivByN:
|
||||
for (uint32_t i = num_row_start; i <= num_row_end; i++) {
|
||||
for (uint32_t j = num_col_start; j <= num_col_end; j++) {
|
||||
ptr_out[i * num_columns + j] = ptr_in[i * num_columns + j]/(float)(num_row_end-num_row_start+1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case kActExp:
|
||||
for (uint32_t i = num_row_start; i <= num_row_end; i++) {
|
||||
for (uint32_t j = num_col_start; j <= num_col_end; j++) {
|
||||
ptr_out[i * num_columns + j] = exp(ptr_in[i * num_columns + j]);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case kActLog:
|
||||
for (uint32_t i = num_row_start; i <= num_row_end; i++) {
|
||||
for (uint32_t j = num_col_start; j <= num_col_end; j++) {
|
||||
ptr_out[i * num_columns + j] = log(ptr_in[i * num_columns + j]);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case kActCustom:
|
||||
// break;
|
||||
default:fprintf(stderr, "Unknown piecewise linear function type!\n");
|
||||
|
||||
@@ -27,6 +27,8 @@
|
||||
#define XBASEMASK 0xFFFFFFFC // only top 30 bits are used
|
||||
#define KALDI_LSTM_CLIP_LOWER (-50.0)
|
||||
#define KALDI_LSTM_CLIP_UPPER (50.0)
|
||||
#define LOG_DOMAIN (2981.0)
|
||||
#define EXP_DOMAIN (8.0)
|
||||
|
||||
typedef struct {
|
||||
double t;
|
||||
@@ -88,8 +90,10 @@ void PwlDesign16(const DnnActivation activation_type,
|
||||
intel_pwl_segment_t *ptr_segment,
|
||||
const uint32_t num_segments,
|
||||
const float scale_in,
|
||||
const float scale_out);
|
||||
const float scale_out,
|
||||
const uint32_t n);
|
||||
void PwlDesignOpt16(const DnnActivation activation_type,
|
||||
std::vector<intel_pwl_segment_t> &ptr_segment,
|
||||
const float scale_in,
|
||||
const float scale_out);
|
||||
const float scale_out,
|
||||
const uint32_t n);
|
||||
|
||||
@@ -14,17 +14,13 @@ set(builder_files_src ${CMAKE_CURRENT_SOURCE_DIR}/builders/ie_layer_decorator.cp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/builders/ie_const_layer.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/builders/ie_split_layer.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/builders/ie_pooling_layer.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/builders/ie_divbyn_layer.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/builders/ie_network_builder.cpp)
|
||||
|
||||
file (GLOB LIBRARY_SRC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/low_precision_transformations/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/low_precision_transformations/common/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/transform/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/transform/transformations/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/transform/transformations/fusion/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/transform/transformations/utils/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/ngraph_ops/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/low_precision_transformations/common/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cpp_interfaces/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/shape_infer/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/shape_infer/built-in/*.cpp
|
||||
@@ -55,6 +51,7 @@ file (GLOB LIBRARY_HEADERS
|
||||
)
|
||||
|
||||
if (NOT ENABLE_NGRAPH)
|
||||
message(error " removing enable ngraph")
|
||||
list(REMOVE_ITEM LIBRARY_SRC "${CMAKE_CURRENT_SOURCE_DIR}/ie_cnn_layer_builder_ngraph.cpp"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/cnn_network_ngraph_impl.cpp"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/ie_ir_parser.cpp"
|
||||
@@ -318,7 +315,7 @@ install(TARGETS ${TARGET_NAME} ${TARGET_NAME}_nn_builder
|
||||
LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH}
|
||||
COMPONENT core)
|
||||
install(FILES "${OpenVINO_BINARY_DIR}/share/ie_parallel.cmake"
|
||||
"${OpenVINO_BINARY_DIR}/share/ie_rh_decoder.cmake"
|
||||
#"${OpenVINO_BINARY_DIR}/share/ie_rh_decoder.cmake"
|
||||
"${OpenVINO_BINARY_DIR}/share/InferenceEngineConfig.cmake"
|
||||
"${OpenVINO_BINARY_DIR}/share/InferenceEngineConfig-version.cmake"
|
||||
DESTINATION ${IE_CPACK_IE_DIR}/share
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
// Copyright (C) 2018-2019 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <builders/ie_divbyn_layer.hpp>
|
||||
|
||||
#include <string>
|
||||
|
||||
using namespace InferenceEngine;
|
||||
|
||||
Builder::DivByNLayer::DivByNLayer(const std::string& name): LayerDecorator("DivByN", name) {
|
||||
getLayer()->getOutputPorts().resize(1);
|
||||
getLayer()->getInputPorts().resize(1);
|
||||
}
|
||||
|
||||
Builder::DivByNLayer::DivByNLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
|
||||
checkType("DivByN");
|
||||
}
|
||||
|
||||
Builder::DivByNLayer::DivByNLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
|
||||
checkType("DivByN");
|
||||
}
|
||||
|
||||
Builder::DivByNLayer& Builder::DivByNLayer::setName(const std::string& name) {
|
||||
getLayer()->setName(name);
|
||||
return *this;
|
||||
}
|
||||
|
||||
const Port& Builder::DivByNLayer::getPort() const {
|
||||
return getLayer()->getOutputPorts()[0];
|
||||
}
|
||||
|
||||
Builder::DivByNLayer& Builder::DivByNLayer::setPort(const Port &port) {
|
||||
getLayer()->getOutputPorts()[0] = port;
|
||||
getLayer()->getInputPorts()[0] = port;
|
||||
return *this;
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
// Copyright (C) 2018-2019 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <builders/ie_exp_layer.hpp>
|
||||
|
||||
#include <string>
|
||||
|
||||
using namespace InferenceEngine;
|
||||
|
||||
Builder::ExpLayer::ExpLayer(const std::string& name): LayerDecorator("Exp", name) {
|
||||
getLayer()->getOutputPorts().resize(1);
|
||||
getLayer()->getInputPorts().resize(1);
|
||||
}
|
||||
|
||||
Builder::ExpLayer::ExpLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
|
||||
checkType("Exp");
|
||||
}
|
||||
|
||||
Builder::ExpLayer::ExpLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
|
||||
checkType("Exp");
|
||||
}
|
||||
|
||||
Builder::ExpLayer& Builder::ExpLayer::setName(const std::string& name) {
|
||||
getLayer()->setName(name);
|
||||
return *this;
|
||||
}
|
||||
|
||||
const Port& Builder::ExpLayer::getPort() const {
|
||||
return getLayer()->getOutputPorts()[0];
|
||||
}
|
||||
|
||||
Builder::ExpLayer& Builder::ExpLayer::setPort(const Port &port) {
|
||||
getLayer()->getOutputPorts()[0] = port;
|
||||
getLayer()->getInputPorts()[0] = port;
|
||||
return *this;
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
// Copyright (C) 2018-2019 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <builders/ie_identity_layer.hpp>
|
||||
|
||||
#include <string>
|
||||
|
||||
using namespace InferenceEngine;
|
||||
|
||||
Builder::IdentityLayer::IdentityLayer(const std::string& name): LayerDecorator("Identity", name) {
|
||||
getLayer()->getOutputPorts().resize(1);
|
||||
getLayer()->getInputPorts().resize(1);
|
||||
}
|
||||
|
||||
Builder::IdentityLayer::IdentityLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
|
||||
checkType("Identity");
|
||||
}
|
||||
|
||||
Builder::IdentityLayer::IdentityLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
|
||||
checkType("Identity");
|
||||
}
|
||||
|
||||
Builder::IdentityLayer& Builder::IdentityLayer::setName(const std::string& name) {
|
||||
getLayer()->setName(name);
|
||||
return *this;
|
||||
}
|
||||
|
||||
const Port& Builder::IdentityLayer::getPort() const {
|
||||
return getLayer()->getOutputPorts()[0];
|
||||
}
|
||||
|
||||
Builder::IdentityLayer& Builder::IdentityLayer::setPort(const Port &port) {
|
||||
getLayer()->getOutputPorts()[0] = port;
|
||||
getLayer()->getInputPorts()[0] = port;
|
||||
return *this;
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
// Copyright (C) 2018-2019 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <builders/ie_log_layer.hpp>
|
||||
|
||||
#include <string>
|
||||
|
||||
using namespace InferenceEngine;
|
||||
|
||||
Builder::LogLayer::LogLayer(const std::string& name): LayerDecorator("Log", name) {
|
||||
getLayer()->getOutputPorts().resize(1);
|
||||
getLayer()->getInputPorts().resize(1);
|
||||
}
|
||||
|
||||
Builder::LogLayer::LogLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
|
||||
checkType("Log");
|
||||
}
|
||||
|
||||
Builder::LogLayer::LogLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
|
||||
checkType("Log");
|
||||
}
|
||||
|
||||
Builder::LogLayer& Builder::LogLayer::setName(const std::string& name) {
|
||||
getLayer()->setName(name);
|
||||
return *this;
|
||||
}
|
||||
|
||||
const Port& Builder::LogLayer::getPort() const {
|
||||
return getLayer()->getOutputPorts()[0];
|
||||
}
|
||||
|
||||
Builder::LogLayer& Builder::LogLayer::setPort(const Port &port) {
|
||||
getLayer()->getOutputPorts()[0] = port;
|
||||
getLayer()->getInputPorts()[0] = port;
|
||||
return *this;
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
// Copyright (C) 2018-2019 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <builders/ie_neghalf_log_layer.hpp>
|
||||
|
||||
#include <string>
|
||||
|
||||
using namespace InferenceEngine;
|
||||
|
||||
Builder::NegHalfLogLayer::NegHalfLogLayer(const std::string& name): LayerDecorator("NegHalfLog", name) {
|
||||
getLayer()->getOutputPorts().resize(1);
|
||||
getLayer()->getInputPorts().resize(1);
|
||||
}
|
||||
|
||||
Builder::NegHalfLogLayer::NegHalfLogLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
|
||||
checkType("NegHalfLog");
|
||||
}
|
||||
|
||||
Builder::NegHalfLogLayer::NegHalfLogLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
|
||||
checkType("NegHalfLog");
|
||||
}
|
||||
|
||||
Builder::NegHalfLogLayer& Builder::NegHalfLogLayer::setName(const std::string& name) {
|
||||
getLayer()->setName(name);
|
||||
return *this;
|
||||
}
|
||||
|
||||
const Port& Builder::NegHalfLogLayer::getPort() const {
|
||||
return getLayer()->getOutputPorts()[0];
|
||||
}
|
||||
|
||||
Builder::NegHalfLogLayer& Builder::NegHalfLogLayer::setPort(const Port &port) {
|
||||
getLayer()->getOutputPorts()[0] = port;
|
||||
getLayer()->getInputPorts()[0] = port;
|
||||
return *this;
|
||||
}
|
||||
@@ -269,13 +269,14 @@ idx_t Builder::Network::addLayer(const std::vector<PortInfo>& inputs, const Laye
|
||||
}
|
||||
|
||||
idx_t Builder::Network::addLayer(const Layer& layer) {
|
||||
auto &layerParam = parameters["layers"].as<std::vector<Layer::Ptr>>();
|
||||
auto getAvailableId = [&](idx_t defaultId) {
|
||||
if (defaultId == (std::numeric_limits<idx_t>::max)()) defaultId = 0;
|
||||
|
||||
auto it = parameters["layers"].as<std::vector<Layer::Ptr>>().begin();
|
||||
while (it != parameters["layers"].as<std::vector<Layer::Ptr>>().end()) {
|
||||
for (it = parameters["layers"].as<std::vector<Layer::Ptr>>().begin();
|
||||
it != parameters["layers"].as<std::vector<Layer::Ptr>>().end(); it++) {
|
||||
auto it = layerParam.begin();
|
||||
while (it != layerParam.end()) {
|
||||
for (it = layerParam.begin();
|
||||
it != layerParam.end(); it++) {
|
||||
if ((*it)->getId() == defaultId) {
|
||||
defaultId++;
|
||||
break;
|
||||
@@ -302,10 +303,8 @@ idx_t Builder::Network::addLayer(const Layer& layer) {
|
||||
};
|
||||
idx_t generatedId = getAvailableId(layer.getId());
|
||||
const auto name = generateAvailableName(layer.getName(), generatedId);
|
||||
parameters["layers"].as<std::vector<Layer::Ptr>>().emplace_back(std::make_shared<Layer>(generatedId, layer));
|
||||
parameters["layers"]
|
||||
.as<std::vector<Layer::Ptr>>()[parameters["layers"].as<std::vector<Layer::Ptr>>().size() - 1]
|
||||
->setName(name);
|
||||
layerParam.emplace_back(std::make_shared<Layer>(generatedId, layer));
|
||||
layerParam[layerParam.size() - 1]->setName(name);
|
||||
return generatedId;
|
||||
}
|
||||
|
||||
|
||||
@@ -81,6 +81,11 @@ public:
|
||||
{"elu", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("ELU")},
|
||||
{"sigmoid", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("Sigmoid")},
|
||||
{"tanh", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("TanH")},
|
||||
{"neghalflog", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("NegHalfLog")},
|
||||
{"log", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("Log")},
|
||||
{"divbyn", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("DivByN")},
|
||||
{"exp", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("exp")},
|
||||
{"identity", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("identity")},
|
||||
};
|
||||
|
||||
auto typeIt = layer->getParameters().find("type");
|
||||
|
||||
@@ -13,7 +13,9 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#if defined(ENABLE_NGRAPH)
|
||||
#include "cnn_network_ngraph_impl.hpp"
|
||||
#endif
|
||||
#include "debug.h"
|
||||
#include "details/os/os_filesystem.hpp"
|
||||
#include "ie_format_parser.h"
|
||||
|
||||
@@ -15,7 +15,9 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#if defined(ENABLE_NGRAPH)
|
||||
#include <ngraph/opsets/opset.hpp>
|
||||
#endif
|
||||
#include "cpp_interfaces/base/ie_plugin_base.hpp"
|
||||
#include "details/caseless.hpp"
|
||||
#include "details/ie_exception_conversion.hpp"
|
||||
@@ -232,7 +234,6 @@ public:
|
||||
*/
|
||||
InferencePlugin GetCPPPluginByName(const std::string& deviceName) const {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
|
||||
auto it = pluginRegistry.find(deviceName);
|
||||
if (it == pluginRegistry.end()) {
|
||||
THROW_IE_EXCEPTION << "Device with \"" << deviceName << "\" name is not registered in the InferenceEngine";
|
||||
@@ -368,6 +369,7 @@ public:
|
||||
}
|
||||
|
||||
void addExtension(const IExtensionPtr& extension) {
|
||||
#if defined(ENABLE_NGRAPH)
|
||||
std::map<std::string, ngraph::OpSet> opsets;
|
||||
try {
|
||||
opsets = extension->getOpSets();
|
||||
@@ -377,6 +379,7 @@ public:
|
||||
THROW_IE_EXCEPTION << "Cannot add opset with name: " << it.first << ". Opset with the same name already exists.";
|
||||
opsetNames.insert(it.first);
|
||||
}
|
||||
#endif
|
||||
extensions.emplace_back(extension);
|
||||
}
|
||||
|
||||
|
||||
@@ -38,6 +38,11 @@ CNNLayer::Ptr ActivationLayerCreator::CreateLayer(pugi::xml_node& node, LayerPar
|
||||
{"clamp", std::make_shared<LayerCreator<ClampLayer>>("Clamp")},
|
||||
{"elu", std::make_shared<LayerCreator<CNNLayer>>("ELU")},
|
||||
{"sigmoid", std::make_shared<LayerCreator<CNNLayer>>("Sigmoid")},
|
||||
{"log", std::make_shared<LayerCreator<CNNLayer>>("Log")},
|
||||
{"neghalflog", std::make_shared<LayerCreator<CNNLayer>>("NegHalfLog")},
|
||||
{"divbyn", std::make_shared<LayerCreator<CNNLayer>>("DivByN")},
|
||||
{"identity", std::make_shared<LayerCreator<CNNLayer>>("Identity")},
|
||||
{"exp", std::make_shared<LayerCreator<CNNLayer>>("Exp")},
|
||||
{"tanh", std::make_shared<LayerCreator<CNNLayer>>("TanH")},
|
||||
};
|
||||
|
||||
|
||||
@@ -70,6 +70,13 @@ FillLayer::~FillLayer() {}
|
||||
SelectLayer::~SelectLayer() {}
|
||||
BroadcastLayer::~BroadcastLayer() {}
|
||||
QuantizeLayer::~QuantizeLayer() {}
|
||||
SigmoidLayer::~SigmoidLayer() {}
|
||||
DivByNLayer::~DivByNLayer() {}
|
||||
LogLayer::~LogLayer() {}
|
||||
IdentityLayer::~IdentityLayer() {}
|
||||
NegHalfLogLayer::~NegHalfLogLayer() {}
|
||||
ExpLayer::~ExpLayer() {}
|
||||
TanHLayer::~TanHLayer() {}
|
||||
MathLayer::~MathLayer() {}
|
||||
ReduceLayer::~ReduceLayer() {}
|
||||
TopKLayer::~TopKLayer() {}
|
||||
|
||||
@@ -134,6 +134,12 @@ CNNLayerPtr clonelayer(const CNNLayer& source) {
|
||||
&layerCloneImpl<QuantizeLayer>,
|
||||
&layerCloneImpl<BinaryConvolutionLayer>,
|
||||
&layerCloneImpl<WeightableLayer>,
|
||||
&layerCloneImpl<TanHLayer>,
|
||||
&layerCloneImpl<LogLayer>,
|
||||
&layerCloneImpl<NegHalfLogLayer>,
|
||||
&layerCloneImpl<IdentityLayer>,
|
||||
&layerCloneImpl<DivByNLayer>,
|
||||
&layerCloneImpl<SigmoidLayer>,
|
||||
&layerCloneImpl<OneHotLayer>,
|
||||
&layerCloneImpl<CNNLayer>,
|
||||
&layerCloneImpl<UniqueLayer>};
|
||||
|
||||
@@ -31,7 +31,7 @@ using AllLayers =
|
||||
ReshapeLayer*, TileLayer*, ScaleShiftLayer*, PReLULayer*, PowerLayer*, BatchNormalizationLayer*,
|
||||
ClampLayer*, TensorIterator*, LSTMCell*, GRUCell*, RNNCell*, RNNSequenceLayer*, QuantizeLayer*,
|
||||
BinaryConvolutionLayer*, WeightableLayer*, OneHotLayer*, MathLayer*, ReduceLayer*, UniqueLayer*,
|
||||
NonMaxSuppressionLayer*, ScatterLayer*, CNNLayer*>;
|
||||
NonMaxSuppressionLayer*, ScatterLayer*, TanHLayer*, SigmoidLayer*, LogLayer*, NegHalfLogLayer*,DivByNLayer*, IdentityLayer*, ExpLayer*, CNNLayer*>;
|
||||
|
||||
/**
|
||||
* @brief checks whether type inxed as P has a parent among element in range I..N
|
||||
|
||||
@@ -125,6 +125,10 @@ REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, ELU);
|
||||
REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, TanH);
|
||||
REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, Logistic);
|
||||
REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, Sigmoid);
|
||||
REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, DivByN);
|
||||
REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, Identity);
|
||||
REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, Log);
|
||||
REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, NegHalfLog);
|
||||
REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, PReLU);
|
||||
REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, SoftMax);
|
||||
REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, LogSoftMax);
|
||||
@@ -219,7 +223,6 @@ REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Cosh);
|
||||
REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Erf);
|
||||
REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Floor);
|
||||
REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, HardSigmoid);
|
||||
REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Log);
|
||||
REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Exp);
|
||||
REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Neg);
|
||||
REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Reciprocal);
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "other/add_output.hpp"
|
||||
|
||||
const auto addOutputParams =
|
||||
::testing::Combine(::testing::Values("Memory_1"), ::testing::Values(CommonTestUtils::DEVICE_CPU));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(AddOutputBasic, AddOutputTestsCommonClass, addOutputParams,
|
||||
AddOutputTestsCommonClass::getTestCaseName);
|
||||
|
||||
TEST_P(AddOutputTestsCommonClass, basic) {
|
||||
run_test();
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "other/add_output.hpp"
|
||||
|
||||
const auto addOutputParams =
|
||||
::testing::Combine(::testing::Values("Memory_1"), ::testing::Values(CommonTestUtils::DEVICE_GNA));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(AddOutputBasic, AddOutputTestsCommonClass, addOutputParams,
|
||||
AddOutputTestsCommonClass::getTestCaseName);
|
||||
|
||||
TEST_P(AddOutputTestsCommonClass, basic) {
|
||||
run_test();
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "common_test_utils/common_layers_params.hpp"
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
#include "common_test_utils/test_common.hpp"
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
#include "common_test_utils/xml_net_builder/ir_net.hpp"
|
||||
#include "common_test_utils/xml_net_builder/xml_filler.hpp"
|
||||
#include "ie_core.hpp"
|
||||
|
||||
class AddOutputTestsCommonClass : public CommonTestUtils::TestsCommon,
|
||||
public testing::WithParamInterface<std::tuple<std::string, std::string>> {
|
||||
private:
|
||||
static std::string generate_model();
|
||||
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<std::tuple<std::string, std::string>> obj);
|
||||
void run_test();
|
||||
};
|
||||
@@ -0,0 +1,76 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "other/add_output.hpp"
|
||||
|
||||
// TODO: Replace IRBuilder with NGraph when it supports Memory Layer
|
||||
std::string AddOutputTestsCommonClass::generate_model() {
|
||||
CommonTestUtils::IRBuilder_v6 test_model_builder("model");
|
||||
|
||||
auto precision = InferenceEngine::Precision::FP32;
|
||||
|
||||
auto Memory_1_layer =
|
||||
test_model_builder.AddLayer("Memory_1", "Memory", precision, {{"id", "r_1-3"}, {"index", "1"}, {"size", "2"}})
|
||||
.AddOutPort({1, 200})
|
||||
.getLayer();
|
||||
auto Input_2_layer = test_model_builder.AddLayer("Input_2", "input", precision).AddOutPort({1, 200}).getLayer();
|
||||
auto Eltwise_3_layer = test_model_builder.AddLayer("Eltwise_3", "Eltwise", precision, {{"operation", "mul"}})
|
||||
.AddInPort({1, 200})
|
||||
.AddInPort({1, 200})
|
||||
.AddOutPort({1, 200})
|
||||
.getLayer();
|
||||
|
||||
auto Activation_4_layer =
|
||||
test_model_builder.AddLayer("Activation_4", "Activation", precision, {{"type", "sigmoid"}})
|
||||
.AddInPort({1, 200})
|
||||
.AddOutPort({1, 200})
|
||||
.getLayer();
|
||||
auto Memory_5_layer =
|
||||
test_model_builder.AddLayer("Memory_5", "Memory", precision, {{"id", "r_1-3"}, {"index", "0"}, {"size", "2"}})
|
||||
.AddInPort({1, 200})
|
||||
.getLayer();
|
||||
|
||||
test_model_builder.AddEdge(Memory_1_layer.out(0), Eltwise_3_layer.in(0));
|
||||
test_model_builder.AddEdge(Input_2_layer.out(0), Eltwise_3_layer.in(1));
|
||||
test_model_builder.AddEdge(Eltwise_3_layer.out(0), Activation_4_layer.in(0));
|
||||
test_model_builder.AddEdge(Activation_4_layer.out(0), Memory_5_layer.in(0));
|
||||
|
||||
auto serial = test_model_builder.serialize();
|
||||
|
||||
return serial;
|
||||
}
|
||||
|
||||
std::string AddOutputTestsCommonClass::getTestCaseName(
|
||||
testing::TestParamInfo<std::tuple<std::string, std::string>> obj) {
|
||||
std::string layer;
|
||||
std::string engine;
|
||||
|
||||
std::tie(layer, engine) = obj.param;
|
||||
return layer + "_" + engine;
|
||||
}
|
||||
|
||||
void AddOutputTestsCommonClass::run_test() {
|
||||
std::string layer_name;
|
||||
std::string engine_type;
|
||||
|
||||
std::tie(layer_name, engine_type) = this->GetParam();
|
||||
|
||||
auto model = this->generate_model();
|
||||
|
||||
InferenceEngine::Core ie;
|
||||
InferenceEngine::CNNNetwork network;
|
||||
InferenceEngine::ExecutableNetwork executableNet;
|
||||
|
||||
auto null_blob = CommonTestUtils::getWeightsBlob(0);
|
||||
network = ie.ReadNetwork(model, null_blob);
|
||||
network.addOutput(layer_name);
|
||||
executableNet = ie.LoadNetwork(network, engine_type);
|
||||
|
||||
auto outputs = executableNet.GetOutputsInfo();
|
||||
|
||||
auto layer_output = outputs[layer_name];
|
||||
|
||||
ASSERT_EQ(true, layer_output && "layer not found in outputs");
|
||||
}
|
||||
1
inference-engine/thirdparty/CMakeLists.txt
vendored
1
inference-engine/thirdparty/CMakeLists.txt
vendored
@@ -36,7 +36,6 @@ function(build_with_lto)
|
||||
endif()
|
||||
|
||||
add_subdirectory(pugixml)
|
||||
add_subdirectory(stb_lib)
|
||||
add_subdirectory(ade)
|
||||
add_subdirectory(fluid/modules/gapi)
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ if(DEFINED BUILD_DEFINES)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
if(BUILD_SHARED_LIBS)
|
||||
if(NOT BUILD_SHARED_LIBS)
|
||||
add_library(pugixml SHARED ${SOURCES})
|
||||
else()
|
||||
add_library(pugixml STATIC ${SOURCES})
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
#===============================================================================
|
||||
# Copyright (C) 2018-2019 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#===============================================================================
|
||||
|
||||
set(TARGET stb_image)
|
||||
|
||||
add_library(${TARGET} STATIC stb_image.cpp)
|
||||
target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
@@ -1,11 +0,0 @@
|
||||
|
||||
#define STB_IMAGE_IMPLEMENTATION
|
||||
#include "stb_image.h"
|
||||
|
||||
#define STB_IMAGE_RESIZE_IMPLEMENTATION
|
||||
#define STBIR_DEFAULT_FILTER_UPSAMPLE STBIR_FILTER_BOX
|
||||
#define STBIR_DEFAULT_FILTER_DOWNSAMPLE STBIR_FILTER_BOX
|
||||
#include "stb_image_resize.h"
|
||||
|
||||
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
||||
#include "stb_image_write.h"
|
||||
7187
inference-engine/thirdparty/stb_lib/stb_image.h
vendored
7187
inference-engine/thirdparty/stb_lib/stb_image.h
vendored
File diff suppressed because it is too large
Load Diff
2627
inference-engine/thirdparty/stb_lib/stb_image_resize.h
vendored
2627
inference-engine/thirdparty/stb_lib/stb_image_resize.h
vendored
File diff suppressed because it is too large
Load Diff
1458
inference-engine/thirdparty/stb_lib/stb_image_write.h
vendored
1458
inference-engine/thirdparty/stb_lib/stb_image_write.h
vendored
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user