Compare commits

...

2 Commits

Author SHA1 Message Date
anikulk
a4a1bff1cc Enable intel-openvino ChromeOS build with intel-gnalib library (#4761)
We use the header files from /usr/include.
Enable necessary build changes for the same.
- Remove unused stb_lib libraries.
- Set SSE4.2 implementation as default
- Add div_by_n to the CMake builder rules
2021-03-12 20:01:22 +03:00
anikulk
83964338b0 Chromeos enabling (#4704)
* ChromeOS enabling changes in DLDT/OPENVINO

Changes for new activations
Enable Debug messages
Add exp activation files
Enable FP32 functions for newly added activations
Enable activation names in debug files
Modify Scale Factor calcuation

* Add support for SetState in GNAMemory

* Use rounding when changing the scale + switch to Elevoc's identity

* Merge branch 'private/kmagiers/GNAPlugin_Memory_layer_as_output' into 'master'

Input memory layer support in addOutput();

See merge request inference-engine/dldt!7016

* Porting Dldt to ChromeOS

Signed-off-by: Anisha Kulkarni <anisha.dattatraya.kulkarni@intel.com>

* Optimizations for builder & GNAPlugin

 - Optimize graph building addLayer
 - Cache Activation functions pwl
 - Use AVX/SSE intrinsics for Quantization
 - Add config to add Identity layer Scale Factor
 - Port Permute related changes, Permute
   operation is needed for Layer Normalization
   batching.
 - Additionally, enable Negetive Half Log
   Activation Layer

* Work Around for DivByN and permute

- Scale Factors need further tuning before
  the DivByN operation in GNA Layer Norm
- Work around for Permute

* BACKPORT:Enable CoreThreadingTestsWithIterations tests for GNA

Co-authored-by: Raviraj P Sitaram <raviraj.p.sitaram@intel.com>
Co-authored-by: Denis Orlov <denis.orlov@intel.com>
Co-authored-by: Smirnov, Eugene <eugene.smirnov@intel.com>
Co-authored-by: Anisha Kulkarni <anisha.dattatraya.kulkarni@intel.corp-partner.google.com>
2021-03-11 12:10:53 +03:00
66 changed files with 1978 additions and 11510 deletions

View File

@@ -139,7 +139,7 @@ if(WIN32)
else()
# TODO: enable for C sources as well
# ie_add_compiler_flags(-Werror)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error")
ie_add_compiler_flags(-ffunction-sections -fdata-sections)
ie_add_compiler_flags(-fvisibility=hidden)
ie_add_compiler_flags(-fdiagnostics-show-option)

View File

@@ -85,11 +85,15 @@ add_cpplint_report_target()
ie_cpack_add_component(cpp_samples REQUIRED DEPENDS core)
if(UNIX)
install(DIRECTORY samples/
DESTINATION ${IE_CPACK_IE_DIR}/samples/cpp
COMPONENT cpp_samples
USE_SOURCE_PERMISSIONS
PATTERN *.bat EXCLUDE)
if (${TARGET_OS} STREQUAL "CHROMEOS")
message(STATUS " Skipping cpp samples")
else()
install(DIRECTORY samples/
DESTINATION ${IE_CPACK_IE_DIR}/samples/cpp
COMPONENT cpp_samples
USE_SOURCE_PERMISSIONS
PATTERN *.bat EXCLUDE)
endif()
elseif(WIN32)
install(DIRECTORY samples
DESTINATION ${IE_CPACK_IE_DIR}/samples/cpp
@@ -103,31 +107,42 @@ endif()
ie_cpack_add_component(c_samples REQUIRED DEPENDS core)
if(UNIX)
install(PROGRAMS samples/build_samples.sh
DESTINATION ${IE_CPACK_IE_DIR}/samples/c
COMPONENT c_samples)
if (${TARGET_OS} STREQUAL "CHROMEOS")
message(STATUS " Skipping cpp samples")
else()
install(PROGRAMS samples/build_samples.sh
DESTINATION ${IE_CPACK_IE_DIR}/samples/c
COMPONENT c_samples)
endif()
elseif(WIN32)
install(PROGRAMS samples/build_samples_msvc.bat
DESTINATION ${IE_CPACK_IE_DIR}/samples/c
COMPONENT c_samples)
endif()
install(DIRECTORY ie_bridges/c/samples/
DESTINATION ${IE_CPACK_IE_DIR}/samples/c
COMPONENT c_samples
PATTERN ie_bridges/c/samples/CMakeLists.txt EXCLUDE)
if (${TARGET_OS} STREQUAL "CHROMEOS")
message(STATUS " Skipping ie_bridges c/cpp samples")
else()
install(DIRECTORY ie_bridges/c/samples/
DESTINATION ${IE_CPACK_IE_DIR}/samples/c
COMPONENT c_samples
PATTERN ie_bridges/c/samples/CMakeLists.txt EXCLUDE)
install(FILES samples/CMakeLists.txt
DESTINATION ${IE_CPACK_IE_DIR}/samples/c
COMPONENT c_samples)
install(FILES samples/CMakeLists.txt
DESTINATION ${IE_CPACK_IE_DIR}/samples/c
COMPONENT c_samples)
endif()
# install Python samples
if (${TARGET_OS} STREQUAL "CHROMEOS")
message(STATUS "Skipping python samples")
else()
ie_cpack_add_component(python_samples REQUIRED DEPENDS core)
ie_cpack_add_component(python_samples REQUIRED DEPENDS core)
install(DIRECTORY ${ie_python_api_SOURCE_DIR}/sample/
DESTINATION ${IE_CPACK_IE_DIR}/samples/python
COMPONENT python_samples)
install(DIRECTORY ${ie_python_api_SOURCE_DIR}/sample/
DESTINATION ${IE_CPACK_IE_DIR}/samples/python
COMPONENT python_samples)
endif()
# Custom target to build only Inference Engine Developer Package targets

View File

@@ -35,7 +35,9 @@ else()
set(GNA_LIB_DIR x64 CACHE STRING "" FORCE)
set(libGNA_INCLUDE_DIRS "${GNA}/include" CACHE STRING "" FORCE)
endif()
set(libGNA_LIBRARIES_BASE_PATH ${GNA}/${GNA_PLATFORM_DIR}/${GNA_LIB_DIR} CACHE STRING "" FORCE)
#set(libGNA_LIBRARIES_BASE_PATH ${GNA}/${GNA_PLATFORM_DIR}/${GNA_LIB_DIR} CACHE STRING "" FORCE)
# Ebuild fails to copy x64 named folder during build process
set(libGNA_LIBRARIES_BASE_PATH ${GNA}/${GNA_PLATFORM_DIR} CACHE STRING "" FORCE)
add_library(libGNA::KERNEL SHARED IMPORTED)
find_library(GNA_KERNEL_LIBRARY

View File

@@ -49,26 +49,31 @@ endif ()
## Intel OMP package
if (THREADING STREQUAL "OMP")
if (WIN32)
RESOLVE_DEPENDENCY(OMP
ARCHIVE_WIN "iomp.zip"
TARGET_PATH "${TEMP}/omp"
ENVIRONMENT "OMP"
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
elseif(LINUX)
RESOLVE_DEPENDENCY(OMP
ARCHIVE_LIN "iomp.tgz"
TARGET_PATH "${TEMP}/omp"
ENVIRONMENT "OMP"
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
else(APPLE)
RESOLVE_DEPENDENCY(OMP
ARCHIVE_MAC "iomp_20190130_mac.tgz"
TARGET_PATH "${TEMP}/omp"
ENVIRONMENT "OMP"
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
endif()
log_rpath_from_dir(OMP "${OMP}/lib")
if (NOT DEFINED OMP_DIR)
if (WIN32)
RESOLVE_DEPENDENCY(OMP
ARCHIVE_WIN "iomp.zip"
TARGET_PATH "${TEMP}/omp"
ENVIRONMENT "OMP"
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
elseif(LINUX)
RESOLVE_DEPENDENCY(OMP
ARCHIVE_LIN "iomp.tgz"
TARGET_PATH "${TEMP}/omp"
ENVIRONMENT "OMP"
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
else(APPLE)
RESOLVE_DEPENDENCY(OMP
ARCHIVE_MAC "iomp_20190130_mac.tgz"
TARGET_PATH "${TEMP}/omp"
ENVIRONMENT "OMP"
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
endif()
log_rpath_from_dir(OMP "${OMP}/lib")
else()
set(OMP ${IE_MAIN_SOURCE_DIR}/${OMP_DIR})
log_rpath_from_dir(OMP "${OMP}/lib")
endif()
debug_message(STATUS "intel_omp=" ${OMP})
endif ()
@@ -183,22 +188,26 @@ if (ENABLE_GNA)
GNA_LIB_DIR
libGNA_INCLUDE_DIRS
libGNA_LIBRARIES_BASE_PATH)
if (GNA_LIBRARY_VERSION STREQUAL "GNA1")
RESOLVE_DEPENDENCY(GNA
ARCHIVE_UNIFIED "gna_20181120.zip"
TARGET_PATH "${TEMP}/gna")
else()
if(GNA_LIBRARY_VERSION STREQUAL "GNA1_1401")
set(GNA_VERSION "01.00.00.1401")
endif()
if(GNA_LIBRARY_VERSION STREQUAL "GNA2")
set(GNA_VERSION "02.00.00.0587")
endif()
RESOLVE_DEPENDENCY(GNA
ARCHIVE_UNIFIED "GNA_${GNA_VERSION}.zip"
TARGET_PATH "${TEMP}/gna_${GNA_VERSION}"
VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*")
endif()
if (NOT DEFINED GNA_DIR)
if (GNA_LIBRARY_VERSION STREQUAL "GNA1")
RESOLVE_DEPENDENCY(GNA
ARCHIVE_UNIFIED "gna_20181120.zip"
TARGET_PATH "${TEMP}/gna")
else()
if(GNA_LIBRARY_VERSION STREQUAL "GNA1_1401")
set(GNA_VERSION "01.00.00.1401")
endif()
if(GNA_LIBRARY_VERSION STREQUAL "GNA2")
set(GNA_VERSION "02.00.00.0587")
endif()
RESOLVE_DEPENDENCY(GNA
ARCHIVE_UNIFIED "GNA_${GNA_VERSION}.zip"
TARGET_PATH "${TEMP}/gna_${GNA_VERSION}"
VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*")
endif()
else()
set(GNA ${IE_MAIN_SOURCE_DIR}/${GNA_DIR})
endif()
debug_message(STATUS "gna=" ${GNA})
endif()

View File

@@ -4,29 +4,33 @@
if (LINUX)
function(get_linux_name res_var)
if (NOT EXISTS "/etc/lsb-release")
execute_process(COMMAND find -L /etc/ -maxdepth 1 -type f -name *-release -exec cat {} \;
OUTPUT_VARIABLE release_data RESULT_VARIABLE result)
set(name_regex "NAME=\"([^ \"\n]*).*\"\n")
set(version_regex "VERSION=\"([0-9]+(\\.[0-9]+)?)[^\n]*\"")
else ()
#linux version detection using cat /etc/lsb-release
file(READ "/etc/lsb-release" release_data)
set(name_regex "DISTRIB_ID=([^ \n]*)\n")
set(version_regex "DISTRIB_RELEASE=([0-9]+(\\.[0-9]+)?)")
endif ()
if (NOT DEFINED TARGET_OS)
if (NOT EXISTS "/etc/lsb-release")
execute_process(COMMAND find -L /etc/ -maxdepth 1 -type f -name *-release -exec cat {} \;
OUTPUT_VARIABLE release_data RESULT_VARIABLE result)
set(name_regex "NAME=\"([^ \"\n]*).*\"\n")
set(version_regex "VERSION=\"([0-9]+(\\.[0-9]+)?)[^\n]*\"")
else ()
#linux version detection using cat /etc/lsb-release
file(READ "/etc/lsb-release" release_data)
set(name_regex "DISTRIB_ID=([^ \n]*)\n")
set(version_regex "DISTRIB_RELEASE=([0-9]+(\\.[0-9]+)?)")
endif ()
string(REGEX MATCH ${name_regex} name ${release_data})
set(os_name ${CMAKE_MATCH_1})
string(REGEX MATCH ${name_regex} name ${release_data})
set(os_name ${CMAKE_MATCH_1})
string(REGEX MATCH ${version_regex} version ${release_data})
set(os_name "${os_name} ${CMAKE_MATCH_1}")
if (os_name)
set(${res_var} ${os_name} PARENT_SCOPE)
else ()
set(${res_var} NOTFOUND PARENT_SCOPE)
endif ()
string(REGEX MATCH ${version_regex} version ${release_data})
set(os_name "${os_name} ${CMAKE_MATCH_1}")
if (os_name)
set(${res_var} ${os_name} PARENT_SCOPE)
else ()
set(${res_var} NOTFOUND PARENT_SCOPE)
endif ()
else()
set(os_name ${TARGET_OS})
set(${res_var} ${os_name} PARENT_SCOPE)
endif()
endfunction()
endif ()

View File

@@ -0,0 +1,55 @@
// Copyright (C) 2018-2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <builders/ie_layer_decorator.hpp>
#include <ie_network.hpp>
#include <string>
namespace InferenceEngine {
namespace Builder {
/**
* @brief The class represents a builder for Log layer
*/
class INFERENCE_ENGINE_API_CLASS(DivByNLayer): public LayerDecorator {
public:
/**
* @brief The constructor creates a builder with the name
* @param name Layer name
*/
explicit DivByNLayer(const std::string& name = "");
/**
* @brief The constructor creates a builder from generic builder
* @param layer pointer to generic builder
*/
explicit DivByNLayer(const Layer::Ptr& layer);
/**
* @brief The constructor creates a builder from generic builder
* @param layer constant pointer to generic builder
*/
explicit DivByNLayer(const Layer::CPtr& layer);
/**
* @brief Sets the name for the layer
* @param name Layer name
* @return reference to layer builder
*/
DivByNLayer& setName(const std::string& name);
/**
* @brief Returns port with shapes for the layer
* @return Port with shapes
*/
const Port& getPort() const;
/**
* @brief Sets port shapes for the layer
* @param port Port with shapes
* @return reference to layer builder
*/
DivByNLayer& setPort(const Port& port);
};
} // namespace Builder
} // namespace InferenceEngine

View File

@@ -0,0 +1,55 @@
// Copyright (C) 2018-2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <builders/ie_layer_decorator.hpp>
#include <ie_network.hpp>
#include <string>
namespace InferenceEngine {
namespace Builder {
/**
* @brief The class represents a builder for Log layer
*/
class INFERENCE_ENGINE_API_CLASS(ExpLayer): public LayerDecorator {
public:
/**
* @brief The constructor creates a builder with the name
* @param name Layer name
*/
explicit ExpLayer(const std::string& name = "");
/**
* @brief The constructor creates a builder from generic builder
* @param layer pointer to generic builder
*/
explicit ExpLayer(const Layer::Ptr& layer);
/**
* @brief The constructor creates a builder from generic builder
* @param layer constant pointer to generic builder
*/
explicit ExpLayer(const Layer::CPtr& layer);
/**
* @brief Sets the name for the layer
* @param name Layer name
* @return reference to layer builder
*/
ExpLayer& setName(const std::string& name);
/**
* @brief Returns port with shapes for the layer
* @return Port with shapes
*/
const Port& getPort() const;
/**
* @brief Sets port shapes for the layer
* @param port Port with shapes
* @return reference to layer builder
*/
ExpLayer& setPort(const Port& port);
};
} // namespace Builder
} // namespace InferenceEngine

View File

@@ -0,0 +1,55 @@
// Copyright (C) 2018-2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <builders/ie_layer_decorator.hpp>
#include <ie_network.hpp>
#include <string>
namespace InferenceEngine {
namespace Builder {
/**
* @brief The class represents a builder for Log layer
*/
class INFERENCE_ENGINE_API_CLASS(IdentityLayer): public LayerDecorator {
public:
/**
* @brief The constructor creates a builder with the name
* @param name Layer name
*/
explicit IdentityLayer(const std::string& name = "");
/**
* @brief The constructor creates a builder from generic builder
* @param layer pointer to generic builder
*/
explicit IdentityLayer(const Layer::Ptr& layer);
/**
* @brief The constructor creates a builder from generic builder
* @param layer constant pointer to generic builder
*/
explicit IdentityLayer(const Layer::CPtr& layer);
/**
* @brief Sets the name for the layer
* @param name Layer name
* @return reference to layer builder
*/
IdentityLayer& setName(const std::string& name);
/**
* @brief Returns port with shapes for the layer
* @return Port with shapes
*/
const Port& getPort() const;
/**
* @brief Sets port shapes for the layer
* @param port Port with shapes
* @return reference to layer builder
*/
IdentityLayer& setPort(const Port& port);
};
} // namespace Builder
} // namespace InferenceEngine

View File

@@ -0,0 +1,55 @@
// Copyright (C) 2018-2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <builders/ie_layer_decorator.hpp>
#include <ie_network.hpp>
#include <string>
namespace InferenceEngine {
namespace Builder {
/**
* @brief The class represents a builder for Log layer
*/
class INFERENCE_ENGINE_API_CLASS(LogLayer): public LayerDecorator {
public:
/**
* @brief The constructor creates a builder with the name
* @param name Layer name
*/
explicit LogLayer(const std::string& name = "");
/**
* @brief The constructor creates a builder from generic builder
* @param layer pointer to generic builder
*/
explicit LogLayer(const Layer::Ptr& layer);
/**
* @brief The constructor creates a builder from generic builder
* @param layer constant pointer to generic builder
*/
explicit LogLayer(const Layer::CPtr& layer);
/**
* @brief Sets the name for the layer
* @param name Layer name
* @return reference to layer builder
*/
LogLayer& setName(const std::string& name);
/**
* @brief Returns port with shapes for the layer
* @return Port with shapes
*/
const Port& getPort() const;
/**
* @brief Sets port shapes for the layer
* @param port Port with shapes
* @return reference to layer builder
*/
LogLayer& setPort(const Port& port);
};
} // namespace Builder
} // namespace InferenceEngine

View File

@@ -0,0 +1,55 @@
// Copyright (C) 2018-2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <builders/ie_layer_decorator.hpp>
#include <ie_network.hpp>
#include <string>
namespace InferenceEngine {
namespace Builder {
/**
* @brief The class represents a builder for Log layer
*/
class INFERENCE_ENGINE_API_CLASS(NegHalfLogLayer): public LayerDecorator {
public:
/**
* @brief The constructor creates a builder with the name
* @param name Layer name
*/
explicit NegHalfLogLayer(const std::string& name = "");
/**
* @brief The constructor creates a builder from generic builder
* @param layer pointer to generic builder
*/
explicit NegHalfLogLayer(const Layer::Ptr& layer);
/**
* @brief The constructor creates a builder from generic builder
* @param layer constant pointer to generic builder
*/
explicit NegHalfLogLayer(const Layer::CPtr& layer);
/**
* @brief Sets the name for the layer
* @param name Layer name
* @return reference to layer builder
*/
NegHalfLogLayer& setName(const std::string& name);
/**
* @brief Returns port with shapes for the layer
* @return Port with shapes
*/
const Port& getPort() const;
/**
* @brief Sets port shapes for the layer
* @param port Port with shapes
* @return reference to layer builder
*/
NegHalfLogLayer& setPort(const Port& port);
};
} // namespace Builder
} // namespace InferenceEngine

View File

@@ -56,7 +56,7 @@ DECLARE_GNA_CONFIG_KEY(SCALE_FACTOR);
* currently supported values are I16, I8
*/
DECLARE_GNA_CONFIG_KEY(PRECISION);
DECLARE_GNA_CONFIG_KEY(IDENTITY_SCALE_FACTOR);
/**
* @brief if turned on, dump GNA firmware model into specified file

View File

@@ -50,6 +50,11 @@
#include <builders/ie_roi_pooling_layer.hpp>
#include <builders/ie_scale_shift_layer.hpp>
#include <builders/ie_sigmoid_layer.hpp>
#include <builders/ie_log_layer.hpp>
#include <builders/ie_neghalf_log_layer.hpp>
#include <builders/ie_exp_layer.hpp>
#include <builders/ie_divbyn_layer.hpp>
#include <builders/ie_identity_layer.hpp>
#include <builders/ie_simpler_nms_layer.hpp>
#include <builders/ie_softmax_layer.hpp>
#include <builders/ie_split_layer.hpp>

View File

@@ -2079,4 +2079,127 @@ public:
virtual ~ScatterLayer();
};
class INFERENCE_ENGINE_API_CLASS(TanHLayer): public CNNLayer {
public:
/**
* @brief A default constructor. Creates a new ReLULayer instance and initializes layer parameters with the given values.
* @param prms Initial layer parameters
*/
//explicit TanHLayer(const LayerParams &prms) : CNNLayer(prms), negative_slope(0.0f) {}
using CNNLayer::CNNLayer;
virtual ~TanHLayer();
/**
* @brief Negative slope is used to takle negative inputs instead of setting them to 0
*/
float negative_slope;
};
class INFERENCE_ENGINE_API_CLASS(SigmoidLayer): public CNNLayer {
public:
/**
* @brief A default constructor. Creates a new ReLULayer instance and initializes layer parameters with the given values.
* @param prms Initial layer parameters
*/
//explicit SigmoidLayer(const LayerParams &prms) : CNNLayer(prms), negative_slope(0.0f) {}
using CNNLayer::CNNLayer;
virtual ~SigmoidLayer();
/**
* @brief Negative slope is used to takle negative inputs instead of setting them to 0
*/
float negative_slope;
};
class INFERENCE_ENGINE_API_CLASS(DivByNLayer) : public CNNLayer {
public:
/**
* @brief A default constructor. Creates a new ReLULayer instance and initializes layer parameters with the given values.
* @param prms Initial layer parameters
*/
// explicit LogLayer(const LayerParams &prms) : CNNLayer(prms), negative_slope(0.0f) {}
using CNNLayer::CNNLayer;
virtual ~DivByNLayer();
/**
* @brief Negative slope is used to takle negative inputs instead of setting them to 0
*/
float negative_slope;
};
class INFERENCE_ENGINE_API_CLASS(IdentityLayer) : public CNNLayer {
public:
/**
* @brief A default constructor. Creates a new ReLULayer instance and initializes layer parameters with the given values.
* @param prms Initial layer parameters
*/
// explicit LogLayer(const LayerParams &prms) : CNNLayer(prms), negative_slope(0.0f) {}
using CNNLayer::CNNLayer;
virtual ~IdentityLayer();
/**
* @brief Negative slope is used to takle negative inputs instead of setting them to 0
*/
float negative_slope;
};
class INFERENCE_ENGINE_API_CLASS(ExpLayer) : public CNNLayer {
public:
/**
* @brief A default constructor. Creates a new ReLULayer instance and initializes layer parameters with the given values.
* @param prms Initial layer parameters
*/
// explicit LogLayer(const LayerParams &prms) : CNNLayer(prms), negative_slope(0.0f) {}
using CNNLayer::CNNLayer;
virtual ~ExpLayer();
/**
* @brief Negative slope is used to takle negative inputs instead of setting them to 0
*/
float negative_slope;
};
class INFERENCE_ENGINE_API_CLASS(NegHalfLogLayer) : public CNNLayer {
public:
/**
* @brief A default constructor. Creates a new ReLULayer instance and initializes layer parameters with the given values.
* @param prms Initial layer parameters
*/
// explicit LogLayer(const LayerParams &prms) : CNNLayer(prms), negative_slope(0.0f) {}
using CNNLayer::CNNLayer;
virtual ~NegHalfLogLayer();
/**
* @brief Negative slope is used to takle negative inputs instead of setting them to 0
*/
float negative_slope;
};
class INFERENCE_ENGINE_API_CLASS(LogLayer) : public CNNLayer {
public:
/**
* @brief A default constructor. Creates a new ReLULayer instance and initializes layer parameters with the given values.
* @param prms Initial layer parameters
*/
// explicit LogLayer(const LayerParams &prms) : CNNLayer(prms), negative_slope(0.0f) {}
using CNNLayer::CNNLayer;
virtual ~LogLayer();
/**
* @brief Negative slope is used to takle negative inputs instead of setting them to 0
*/
float negative_slope;
};
} // namespace InferenceEngine

View File

@@ -235,6 +235,7 @@ DECLARE_CONFIG_KEY(GPU_THROUGHPUT_STREAMS);
* PluginConfigParams::YES or PluginConfigParams::NO
*/
DECLARE_CONFIG_KEY(PERF_COUNT);
DECLARE_CONFIG_KEY(IDENTITY_SCALE_FACTOR);
/**
* @brief The key defines dynamic limit of batch processing.

View File

@@ -23,14 +23,14 @@ class BitMap : public Reader {
private:
static Register<BitMap> reg;
typedef struct {
typedef struct BmpHeader{
unsigned short type = 0u; /* Magic identifier */
unsigned int size = 0u; /* File size in bytes */
unsigned int reserved = 0u;
unsigned int offset = 0u; /* Offset to image data, bytes */
} BmpHeader;
typedef struct {
typedef struct BmpInfoHeader{
unsigned int size = 0u; /* Header size in bytes */
int width = 0, height = 0; /* Width and height of image */
unsigned short planes = 0u; /* Number of colour planes */

View File

@@ -26,9 +26,13 @@ else()
endif()
#saving rpath to GNA shared library be used by CI
log_rpath_from_dir(GNA ${libGNA_LIBRARIES_BASE_PATH})
#log_rpath_from_dir(GNA ${libGNA_LIBRARIES_BASE_PATH})
target_link_libraries(${TARGET_NAME} PRIVATE inference_engine ${INTEL_ITT_LIBS} Threads::Threads libGNA)
if(TARGET_OS STREQUAL "CHROMEOS")
target_link_libraries(${TARGET_NAME} PRIVATE inference_engine ${INTEL_ITT_LIBS} Threads::Threads gna)
else()
target_link_libraries(${TARGET_NAME} PRIVATE inference_engine ${INTEL_ITT_LIBS} Threads::Threads libGNA)
endif()
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_compile_definitions(${TARGET_NAME}
PRIVATE
@@ -45,7 +49,11 @@ target_compile_definitions(${TARGET_NAME}_test_static
GNA_LIB_VER=${GNA_LIBRARY_VERSION_NUMBER}
INTEGER_LOW_P
USE_STATIC_IE)
target_link_libraries(${TARGET_NAME}_test_static PUBLIC inference_engine_preproc_s libGNA::API)
if(TARGET_OS STREQUAL "CHROMEOS")
target_link_libraries(${TARGET_NAME}_test_static PUBLIC inference_engine_preproc_s gna)
else()
target_link_libraries(${TARGET_NAME}_test_static PUBLIC inference_engine_preproc_s libGNA::API)
endif()
target_include_directories(${TARGET_NAME}_test_static PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
set_target_properties(${TARGET_NAME}_test_static PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}_test_static)

View File

@@ -309,6 +309,65 @@ void GNAPluginNS::backend::AMIntelDNN::InitPiecewiseLinearComponentPrivate(intel
}
}
void GNAPluginNS::backend::AMIntelDNN::InitInterleaveComponentPrivate(intel_dnn_component_t &comp,
uint32_t num_rows_in,
uint32_t num_columns_in,
uint32_t num_bytes_per_input,
uint32_t num_bytes_per_output,
float output_scale_factor,
void *&ptr_inputs,
void *&ptr_outputs,
bool postInitMem) {
comp.num_rows_in = num_rows_in;
comp.num_columns_in = num_columns_in;
comp.num_rows_out = num_columns_in;
comp.num_columns_out = num_rows_in;
comp.num_bytes_per_input = num_bytes_per_input;
comp.num_bytes_per_output = num_bytes_per_output;
comp.operation = kDnnInterleaveOp;
comp.macro_operation = kDnnMacroOpNone;
comp.orientation_in = kDnnNonInterleavedOrientation;
comp.orientation_out = kDnnInterleavedOrientation;
comp.output_scale_factor = output_scale_factor;
comp.input_scale_factor = output_scale_factor;
if (!postInitMem) {
comp.ptr_inputs = ptr_inputs;
comp.ptr_outputs = ptr_outputs;
} else {
ptr_inputs = &comp.ptr_inputs;
ptr_outputs = &comp.ptr_outputs;
}
}
void GNAPluginNS::backend::AMIntelDNN::InitDeinterleaveComponentPrivate(intel_dnn_component_t &comp,
uint32_t num_rows_in,
uint32_t num_columns_in,
uint32_t num_bytes_per_input,
uint32_t num_bytes_per_output,
float output_scale_factor,
void *&ptr_inputs,
void *&ptr_outputs,
bool postInitMem) {
comp.num_rows_in = num_rows_in;
comp.num_columns_in = num_columns_in;
comp.num_rows_out = num_columns_in;
comp.num_columns_out = num_rows_in;
comp.num_bytes_per_input = num_bytes_per_input;
comp.num_bytes_per_output = num_bytes_per_output;
comp.operation = kDnnDeinterleaveOp;
comp.macro_operation = kDnnMacroOpNone;
comp.orientation_in = kDnnInterleavedOrientation;
comp.orientation_out = kDnnInterleavedOrientation;
comp.output_scale_factor = output_scale_factor;
comp.input_scale_factor = output_scale_factor;
if (!postInitMem) {
comp.ptr_inputs = ptr_inputs;
comp.ptr_outputs = ptr_outputs;
} else {
ptr_inputs = &comp.ptr_inputs;
ptr_outputs = &comp.ptr_outputs;
}
}
void GNAPluginNS::backend::AMIntelDNN::Propagate() {
for (uint32_t i = 0; i < component.size(); i++) {

View File

@@ -192,6 +192,46 @@ public:
ptr_segments,
true);
}
template<class A, class B>
static void InitDeinterleaveComponent(intel_dnn_component_t &cmp,
uint32_t num_rows_in,
uint32_t num_columns_in,
uint32_t num_bytes_per_input,
uint32_t num_bytes_per_output,
float output_scale_factor,
A *&ptr_inputs,
B *&ptr_outputs) {
InitDeinterleaveComponentPrivate(cmp,
num_rows_in,
num_columns_in,
num_bytes_per_input,
num_bytes_per_output,
output_scale_factor,
(void *&) ptr_inputs,
(void *&) ptr_outputs,
true);
}
template<class A, class B>
static void InitInterleaveComponent(intel_dnn_component_t &cmp,
uint32_t num_rows_in,
uint32_t num_columns_in,
uint32_t num_bytes_per_input,
uint32_t num_bytes_per_output,
float output_scale_factor,
A *&ptr_inputs,
B *&ptr_outputs) {
InitInterleaveComponentPrivate(cmp,
num_rows_in,
num_columns_in,
num_bytes_per_input,
num_bytes_per_output,
output_scale_factor,
(void *&) ptr_inputs,
(void *&) ptr_outputs,
true);
}
template<class A, class B>
@@ -342,6 +382,26 @@ private:
void *&ptr_outputs,
intel_pwl_segment_t *ptr_segments,
bool postInitMem);
static void InitInterleaveComponentPrivate(intel_dnn_component_t &cmp,
uint32_t num_rows_in,
uint32_t num_columns_in,
uint32_t num_bytes_per_input,
uint32_t num_bytes_per_output,
float output_scale_factor,
void *&ptr_inputs,
void *&ptr_outputs,
bool postInitMem);
static void InitDeinterleaveComponentPrivate(intel_dnn_component_t &cmp,
uint32_t num_rows_in,
uint32_t num_columns_in,
uint32_t num_bytes_per_input,
uint32_t num_bytes_per_output,
float output_scale_factor,
void *&ptr_inputs,
void *&ptr_outputs,
bool postInitMem);
static void InitConvolutional1DComponentPrivate(intel_dnn_component_t &comp,
uint32_t num_rows_in,

View File

@@ -19,6 +19,10 @@ enum DnnActivationType : uint8_t {
kActIdentity,
kActKaldiLstmClipping,
kActCustom,
kActExp,
kActLog,
kActNegHalfLog,
kActDivByN,
kActNumType
};
@@ -47,7 +51,11 @@ static const char *intel_dnn_activation_name[kActNumType] = {
"kActLeakyRelu",
"kActIdentity",
"kActKaldiLstmClipping",
"kActCustom"
"kActCustom",
"kActExp",
"kActLog",
"kActNegHalfLog",
"kActDivByN"
};
typedef enum DnnSoftmaxType {

View File

@@ -2,6 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#define PWL_FROM_FILE
#include <vector>
#include <iostream>
@@ -16,9 +18,13 @@ void make_gna_pwl(const DnnActivation fun,
const double u_bound,
const double in_scale,
const double out_scale,
std::vector<intel_pwl_segment_t> &gna_pwl) {
std::vector<intel_pwl_segment_t> &gna_pwl,
const uint32_t n) {
pwl_gna_slope_scale_t s;
uint32_t pwl_size = static_cast<int32_t>(pwl.size());
gnalog() << "make_gna_pwl\n";
gnalog() << " in_scale " << in_scale << "\n";
gnalog() << " out_scale " << out_scale << "\n";
switch (fun) {
case kActSigmoid:
case kActTanh: {
@@ -46,7 +52,7 @@ void make_gna_pwl(const DnnActivation fun,
gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index;
gnalog() << (gna_pwl[1].xBase/in_scale)
gnalog() << ((int32_t)(gna_pwl[1].xBase & XBASEMASK) / in_scale)
<< " " << (gna_pwl[1].yBase) / out_scale
<< " " << pwl[0].m
<< "\n";
@@ -75,6 +81,130 @@ void make_gna_pwl(const DnnActivation fun,
<< "\n";
break;
}
case kActExp: {
auto n_segments = static_cast<int32_t> (pwl_size) + 1;
gna_pwl.resize(n_segments);
// insert extra segment for x values < l_bound
gna_pwl[0].xBase = static_cast<int32_t> (INT32_MIN & XBASEMASK); // zero out the 2 lsb
gnalog() << "=========================== Exp Segments ===========================\n";
gna_pwl[0].yBase = gna_pwl[1].yBase = 0;
gna_pwl[1].xBase = (static_cast<int32_t> (in_scale * (-pwl[0].b / pwl[0].m))) & XBASEMASK;
gna_pwl[0].slope = 0;
gnalog() << (gna_pwl[0].xBase) / in_scale
<< " " << (gna_pwl[0].yBase) / out_scale
<< " " << 0.0
<< "\n";
s = gna_slope(pwl[0].m, in_scale, out_scale);
gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index;
gnalog() << (gna_pwl[1].xBase / in_scale)
<< " " << (gna_pwl[1].yBase) / out_scale
<< " " << pwl[0].m
<< "\n";
for (uint32_t i = 1; i < pwl_size - 1; ++i) {
s = gna_slope(pwl[i].m, in_scale, out_scale);
gna_pwl[i + 1].xBase = (static_cast<int32_t> (in_scale * pwl[i].alpha)) & XBASEMASK;
gna_pwl[i + 1].yBase = FLOAT_TO_INT16(pwl[i].beta * out_scale);
gna_pwl[i + 1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
gna_pwl[i + 1].xBase = gna_pwl[i + 1].xBase | s.slope_scale_index;
gnalog() << (pwl[i].alpha)
<< " " << pwl[i].beta
<< " " << pwl[i].m
<< "\n";
}
// insert extra segment for xvalues > u_bound
gna_pwl[n_segments - 1].xBase =
((uint32_t)(in_scale * (INT16_MAX/out_scale - pwl[pwl_size - 2].b) / pwl[pwl_size - 2].m)) & XBASEMASK;
gna_pwl[n_segments - 1].yBase = INT16_MAX;
gna_pwl[n_segments - 1].slope = 0;
gnalog() << (gna_pwl[n_segments - 1].xBase / in_scale)
<< " " << 1.0
<< " " << 0.0
<< "\n";
break;
}
case kActLog: {
auto n_segments = static_cast<int32_t> (pwl_size);
gna_pwl.resize(n_segments);
// insert extra segment for x values < l_bound
gna_pwl[0].xBase = static_cast<int32_t> (INT32_MIN & XBASEMASK); // zero out the 2 lsb
gnalog() << "=========================== Exp Segments ===========================\n";
gna_pwl[0].yBase = gna_pwl[1].yBase = INT16_MIN;
gna_pwl[1].xBase = (static_cast<int32_t> (1 + ~XBASEMASK)); // smallest representable value
gna_pwl[0].slope = 0;
gnalog() << (gna_pwl[0].xBase) / in_scale
<< " " << (gna_pwl[0].yBase) / out_scale
<< " " << 0.0
<< "\n";
s = gna_slope(pwl[0].m, in_scale, out_scale);
gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index;
gnalog() << (gna_pwl[1].xBase / in_scale)
<< " " << (gna_pwl[1].yBase) / out_scale
<< " " << pwl[0].m
<< "\n";
for (uint32_t i = 1; i < pwl_size - 1; ++i) {
s = gna_slope(pwl[i].m, in_scale, out_scale);
gna_pwl[i + 1].xBase = (static_cast<int32_t> (in_scale * pwl[i].alpha)) & XBASEMASK;
gna_pwl[i + 1].yBase = FLOAT_TO_INT16(pwl[i].beta * out_scale);
gna_pwl[i + 1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
gna_pwl[i + 1].xBase = gna_pwl[i + 1].xBase | s.slope_scale_index;
gnalog() << (pwl[i].alpha)
<< " " << pwl[i].beta
<< " " << pwl[i].m
<< "\n";
}
break;
}
case kActNegHalfLog: {
auto n_segments = static_cast<int32_t> (pwl_size);
gna_pwl.resize(n_segments);
// insert extra segment for x values < l_bound
gna_pwl[0].xBase = static_cast<int32_t> (INT32_MIN & XBASEMASK); // zero out the 2 lsb
gnalog() << "=========================== NegHalfLog Segments ===========================\n";
gna_pwl[0].yBase = gna_pwl[1].yBase = INT16_MAX;
gna_pwl[1].xBase = (static_cast<int32_t> (1 + ~XBASEMASK)); // smallest representable value
gna_pwl[0].slope = 0;
gnalog() << gna_pwl[0].xBase / in_scale
<< " " << (gna_pwl[0].yBase) / out_scale
<< " " << 0.0
<< "\n";
s = gna_slope(pwl[0].m, in_scale, out_scale);
gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index;
gnalog() << ((gna_pwl[1].xBase & XBASEMASK) / in_scale)
<< " " << (gna_pwl[1].yBase) / out_scale
<< " " << pwl[0].m
<< "\n";
for (uint32_t i = 1; i < pwl_size - 1; ++i) {
s = gna_slope(pwl[i].m, in_scale, out_scale);
gna_pwl[i + 1].xBase = (static_cast<int32_t> (in_scale * pwl[i].alpha)) & XBASEMASK;
gna_pwl[i + 1].yBase = FLOAT_TO_INT16(pwl[i].beta * out_scale);
gna_pwl[i + 1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
gna_pwl[i + 1].xBase = gna_pwl[i + 1].xBase | s.slope_scale_index;
gnalog() << (pwl[i].alpha)
<< " " << pwl[i].beta
<< " " << pwl[i].m
<< "\n";
}
break;
}
case kActRelu:
case kActLeakyRelu: {
auto n_segments = 2;
@@ -106,7 +236,8 @@ void make_gna_pwl(const DnnActivation fun,
break;
}
case kActIdentity:
case kActKaldiLstmClipping: {
case kActKaldiLstmClipping:
case kActDivByN: {
int32_t x_lower = INT32_MIN;
int32_t x_upper = INT32_MAX;
int16_t y_lower = INT16_MIN;
@@ -130,14 +261,20 @@ void make_gna_pwl(const DnnActivation fun,
x_upper = FLOAT_TO_INT32(y_upper * in_scale / out_scale);
}
}
} else {
} else if (fun == kActIdentity) {
gnalog() << "=========================== Identity Segments ===========================\n";
if (x_lower < y_lower * in_scale / out_scale) x_lower = FLOAT_TO_INT32(y_lower * in_scale / out_scale);
if (x_upper > y_upper * in_scale / out_scale) x_upper = FLOAT_TO_INT32(y_upper * in_scale / out_scale);
if (y_lower < x_lower * out_scale / in_scale) y_lower = FLOAT_TO_INT16(x_lower * out_scale / in_scale);
if (y_upper > x_upper * out_scale / in_scale) y_upper = FLOAT_TO_INT16(x_upper * out_scale / in_scale);
} else {
gnalog() << "=========================== DivByN Segments ===========================\n";
if (x_lower < y_lower * (float)n * in_scale / out_scale) x_lower = FLOAT_TO_INT32(y_lower * (float)n * in_scale / out_scale);
if (x_upper > y_upper * (float)n * in_scale / out_scale) x_upper = FLOAT_TO_INT32(y_upper * (float)n * in_scale / out_scale);
if (y_lower < x_lower * (1.0 / n) * out_scale / in_scale) y_lower = FLOAT_TO_INT16(x_lower * (1.0 / n) * out_scale / in_scale);
if (y_upper > x_upper* (1.0 / n) * out_scale / in_scale) y_upper = FLOAT_TO_INT16(x_upper * (1.0 / n) * out_scale / in_scale);
}
gna_pwl.resize(n_segments);
gna_pwl.resize(n_segments);
gna_pwl[0].xBase = INT32_MIN & XBASEMASK; // zero out the 2 lsb
gna_pwl[0].yBase = y_lower;
gna_pwl[0].slope = 0;
@@ -148,13 +285,19 @@ void make_gna_pwl(const DnnActivation fun,
gna_pwl[1].xBase = x_lower & XBASEMASK; // zero out the 2 lsb
gna_pwl[1].yBase = y_lower;
s = gna_slope(1.0, in_scale, out_scale);
if (fun == kActDivByN) {
s = gna_slope(1.0 / n, in_scale, out_scale);
} else {
s = gna_slope(1.0, in_scale, out_scale);
}
gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index;
gnalog() << gna_pwl[1].xBase / in_scale
<< " " << gna_pwl[1].yBase / out_scale
<< " " << 1.0
<< "\n";
int32_t round_scale = FLOAT_TO_INT16(0.5f / s.slope) & XBASEMASK;
gna_pwl[1].xBase = (gna_pwl[1].xBase - round_scale) | s.slope_scale_index;
gnalog() << (int32_t)(gna_pwl[1].xBase & XBASEMASK) / in_scale
<< " " << gna_pwl[1].yBase / out_scale
<< " " << 1.0
<< "\n";
if (INT32_MAX > x_upper) { // need a right segment
gna_pwl.push_back({
@@ -162,10 +305,10 @@ void make_gna_pwl(const DnnActivation fun,
y_upper,
0 });
gnalog() << gna_pwl[n_segments].xBase / in_scale
<< " " << gna_pwl[n_segments].yBase / out_scale
<< " " << 0
<< "\n";
gnalog() << (x_upper & XBASEMASK) / in_scale
<< " " << gna_pwl[n_segments].yBase / out_scale
<< " " << 0
<< "\n";
}
break;
}

View File

@@ -14,4 +14,5 @@ void make_gna_pwl(const DnnActivation fun,
const double u_bound,
const double in_scale,
const double out_scale,
std::vector<intel_pwl_segment_t> &gna_pwl);
std::vector<intel_pwl_segment_t> &gna_pwl,
const uint32_t n);

View File

@@ -6,6 +6,8 @@
#include <iostream>
#include <details/ie_exception.hpp>
#include "quantization.h"
#include <xmmintrin.h>
#include <smmintrin.h>
void QuantizeAffine16(float *ptr_float_weights,
float *ptr_float_biases,
@@ -20,7 +22,7 @@ void QuantizeAffine16(float *ptr_float_weights,
uint32_t num_columns_padded) {
uint32_t num_saturate = 0;
if (*ptr_weight_scale_factor == 1.0) {
/* if (*ptr_weight_scale_factor == 1.0) {
// scale factor for weights is not calculated yet
float mean_weight = 0.0;
float mean_weight_squared = 0.0;
@@ -48,7 +50,7 @@ void QuantizeAffine16(float *ptr_float_weights,
*ptr_weight_scale_factor = static_cast<float>(MAX_VAL_2B_WEIGHT) / max_weight;
}
*ptr_output_scale_factor = input_scale_factor * *ptr_weight_scale_factor;
}
}*/
for (uint32_t row = 0; row < num_rows; row++) {
for (uint32_t col = 0; col < num_columns; col++) {
@@ -104,26 +106,156 @@ void QuantizeAffine16(float *ptr_float_weights,
}
}
float ScaleFactorForQuantization(void *ptr_float_memory, float target_max, size_t num_elements) {
float *ptr_float_feat = reinterpret_cast<float *>(ptr_float_memory);
float max = 0.0;
__attribute__ ((target ("default")))
float ScaleFactorForQuantization(void *ptr_float_memory, float target_max, size_t num_elements)
{
float *ptr_float_feat = (float*)ptr_float_memory;
float min = 0.0;
float buf[4];
float scale_factor;
float abs_f;
__m128 zero = _mm_setzero_ps();
__m128 total_abs = _mm_setzero_ps();
char not_aligned_buffer[32];
__m128 v, neg_v, abs;
__m128 v2, neg_v2, abs2;
for (size_t i = 0; i < num_elements; i++) {
if (fabs(ptr_float_feat[i]) > max) {
max = fabs(ptr_float_feat[i]);
uint32_t moves = num_elements >>3;
uint32_t mod = num_elements % 8;
uint32_t i;
v = _mm_load_ps(ptr_float_feat);
neg_v = _mm_sub_ps(zero, v);
abs = _mm_max_ps(v, neg_v);
total_abs = _mm_max_ps(total_abs, abs);
for (i = 0; i<moves; i++, ptr_float_feat +=8)
{
v = _mm_load_ps(ptr_float_feat);
v2 = _mm_load_ps(ptr_float_feat+4);
neg_v = _mm_sub_ps(zero, v);
abs = _mm_max_ps(v, neg_v);
neg_v2 = _mm_sub_ps(zero, v2);
abs2 = _mm_max_ps(v2, neg_v2);
total_abs = _mm_min_ps(total_abs, abs);
total_abs = _mm_min_ps(total_abs, abs2);
}
_mm_storeu_ps(buf, total_abs);
float single_min_lo = buf[0] < buf[1] ? buf[0] : buf[1];
float single_min_hi = buf[2] < buf[3] ? buf[2] : buf[3];
float single_min = single_min_lo < single_min_hi ? single_min_lo : single_min_hi;
for (i = 0; i < mod; i++)
{
abs_f = fabs(ptr_float_feat[i]);
if (abs_f < min) {
min = abs_f;
}
}
if (max == 0) {
scale_factor = -1.0f; // need to handle all zeros as a special case
} else {
scale_factor = target_max / max;
return(single_min != 0 ? (single_min < 1.0 ? 1 / single_min : 1.0f) : 1.0f);
}
float accessmember(__m128 v, int index)
{
union vec{ __m128 sse;
float f[4];
};
vec U;
U.sse = v;
return U.f[index];
}
__attribute__ ((target ("default")))
void QuantizeBias8(float *ptr_float_biases, intel_compound_bias_t *ptr_int_biases, float *ptr_output_scale_factor, uint32_t num_rows)
{
float *ptr_float_feat = (float*)ptr_float_biases;
intel_compound_bias_t *ptr_int = (intel_compound_bias_t*)ptr_int_biases;
uint32_t moves = num_rows / 4;
uint32_t mod = num_rows % 4;
uint32_t i, j;
__m128 v, zero, half, neg_half, scale_factores, mask, rounding_values, min, max, values;
#ifdef ROUND_AND_CAST
__m128i tmp;
#endif
zero = _mm_setzero_ps();
half = _mm_set1_ps(0.5f);
neg_half = _mm_set1_ps(-0.5f);
max = _mm_set1_ps(2147483647.0f);
min = _mm_set1_ps(-2147483648.0f);
scale_factores = _mm_set1_ps(*ptr_output_scale_factor);
for (i = 0; i < moves; i++, ptr_float_feat += 4, ptr_int += 4) {
v = _mm_load_ps(ptr_float_feat);
//rounding_values = (v>0) ? 0.5f : -0.5f;
mask = _mm_min_ps(v, zero);
rounding_values = _mm_blendv_ps(half, neg_half, mask);
// values = v * scale_factores + rounding_values
values = _mm_mul_ps(v, scale_factores);
values = _mm_add_ps(values, rounding_values);
// shrink to <-2147483648.0f, 2147483647.0f>
values = _mm_min_ps(values, max);
values = _mm_max_ps(values, min);
#ifdef ROUND_AND_CAST
// round and cast float to int16 ... much faster than "only cast" in MS compiler ??
tmp = _mm_cvtps_epi32(values);
ptr_int[0].bias = tmp.m128i_i32[0];
ptr_int[1].bias = tmp.m128i_i32[1];
ptr_int[2].bias = tmp.m128i_i32[2];
ptr_int[3].bias = tmp.m128i_i32[3];
#else
// only cast float to int16
for (j = 0; j < 4; j++)
ptr_int[j].bias = (int32_t)accessmember(values, j);
#endif
}
return (scale_factor);
for (i = 0; i < mod; i++) {
float rounding_value = (ptr_float_feat[i]>0) ? 0.5f : -0.5f;
float value = ptr_float_feat[i] * *ptr_output_scale_factor + rounding_value;
if (value > 2147483647.0) {
ptr_int[i].bias = 2147483647L;
}
else if (value < -2147483648.0) {
ptr_int[i].bias = -2147483648LL;
}
else {
ptr_int[i].bias = (int32_t)value;
}
}
}
/*__attribute__ ((target ("default")))
void QuantizeBias8(float *ptr_float_biases, intel_compound_bias_t *ptr_int_biases, float *ptr_output_scale_factor, uint32_t num_rows)
{
uint32_t num_saturate = 0;
for (uint32_t j = 0; j < num_rows; j++) {
float rounding_value = (ptr_float_biases[j] > 0) ? 0.5f : -0.5f;
float value = ptr_float_biases[j] * *ptr_output_scale_factor + rounding_value;
if (value > 2147483647.0) {
ptr_int_biases[j].bias = 2147483647L;
num_saturate++;
} else if (value < -2147483648.0) {
ptr_int_biases[j].bias = -2147483648LL;
num_saturate++;
} else {
ptr_int_biases[j].bias = (int32_t) value;
}
}
}*/
void QuantizeVector16(float *ptr_float_memory, int16_t *ptr_int_memory, uint32_t num_elements, float scale_factor) {
float *ptr_float_feat = reinterpret_cast<float *>(ptr_float_memory);
uint32_t num_saturate = 0;
@@ -158,7 +290,7 @@ void QuantizeAffine8(float *ptr_float_weights, float *ptr_float_biases,
}
uint32_t num_saturate = 0;
if (*ptr_weight_scale_factor == 1.0) {
/*if (*ptr_weight_scale_factor == 1.0) {
// scale factor for weights is not calculated yet
float mean_weight = 0.0;
float mean_weight_squared = 0.0;
@@ -191,7 +323,7 @@ void QuantizeAffine8(float *ptr_float_weights, float *ptr_float_biases,
// 4. quantize and store scaled row
*ptr_weight_scale_factor = MAX_OUT_MULTIPLIER * *ptr_weight_scale_factor; // increase dynamic range by max multiplier
*ptr_output_scale_factor = input_scale_factor * *ptr_weight_scale_factor;
}
}*/
float valueAcc = 0.0;
for (uint32_t row = 0; row < num_rows; row++) {
float scaled_row_max = 0;
@@ -237,19 +369,7 @@ void QuantizeAffine8(float *ptr_float_weights, float *ptr_float_biases,
// bias value of the bas will be only used when input bias provided
if (ptr_float_biases != nullptr) {
for (uint32_t j = 0; j < num_rows; j++) {
float rounding_value = (ptr_float_biases[j] > 0) ? 0.5f : -0.5f;
float value = ptr_float_biases[j] * *ptr_output_scale_factor + rounding_value;
if (value > 2147483647.0) {
ptr_int_biases[j].bias = 2147483647L;
num_saturate++;
} else if (value < -2147483648.0) {
ptr_int_biases[j].bias = -2147483648LL;
num_saturate++;
} else {
ptr_int_biases[j].bias = (int32_t) value;
}
}
QuantizeBias8(ptr_float_biases, ptr_int_biases, ptr_output_scale_factor, num_rows);
}
if (num_saturate > 0) {

View File

@@ -18,6 +18,7 @@
#include "gna_slope_scale.h"
namespace GNAPluginNS {
extern float identity_SF;
namespace frontend {
struct ScaleFactorUpdateResult {
InferenceEngine::CNNLayer *restartLayer = nullptr;
@@ -53,7 +54,7 @@ template<>
class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
private :
const float activation_scale_factor = 2048.f;
const float identity_scale_factor = 2049.0f;
const float identity_scale_factor = identity_SF;
const float k = 5;
const float k_identity = 6;
@@ -472,7 +473,7 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
quant->_weights_quant.scale = 1.0f;
}
if (wl->_biases) {
/*if (wl->_biases) {
quant->_bias_quant.scale = ScaleFactorForQuantization(wl->_biases->buffer().as<float *>(),
MAX_VAL_4B_BIAS,
wl->_biases->size());
@@ -480,7 +481,7 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
quant->_bias_quant.scale = std::min(quant->_weights_quant.scale * quant->_src_quant.scale, quant->_bias_quant.scale);
quant->_weights_quant.scale = quant->_bias_quant.scale / quant->_src_quant.scale;
}
}
}*/
// TODO: findout why ???
if (weightsSize == 1) {
@@ -552,7 +553,7 @@ class ScaleFactorPerLayer<InferenceEngine::ConvolutionLayer*> : public ScaleFact
*/
class ScaleFactorCalculator {
using Cnt = std::vector<InferenceEngine::CNNLayerPtr>;
Cnt net;
Cnt net;
mutable Cnt::const_iterator idx;
mutable bool needRestart = false;
int weightsBytesSize;

View File

@@ -59,8 +59,10 @@ void ExportLdForNoMmu(uint32_t modelId, std::ostream & outStream) {
status = Gna2ModelExportConfigSetSource(exportConfig, 0, modelId);
GNADeviceHelper::checkGna2Status(status);
#if GNA_LIB_VER != 2 && GNA_LIB_VER != 1
status = Gna2ModelExportConfigSetTarget(exportConfig, Gna2DeviceVersionEmbedded3_0);
GNADeviceHelper::checkGna2Status(status);
#endif
void * ldNoMmu;
uint32_t ldNoMmuSize;

View File

@@ -24,7 +24,10 @@
#include "details/ie_exception.hpp"
#include "gna_plugin_log.hpp"
std::mutex GNADeviceHelper::acrossPluginsSync{};
uint8_t* GNADeviceHelper::alloc(uint32_t size_requested, uint32_t *size_granted) {
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
void * memPtr;
#if GNA_LIB_VER == 1
memPtr = GNAAlloc(nGNAHandle, size_requested, size_granted);
@@ -41,6 +44,7 @@ uint8_t* GNADeviceHelper::alloc(uint32_t size_requested, uint32_t *size_granted)
}
void GNADeviceHelper::free(void * ptr) {
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
#if GNA_LIB_VER == 1
GNAFree(nGNAHandle);
#else
@@ -53,6 +57,7 @@ void GNADeviceHelper::free(void * ptr) {
uint32_t GNADeviceHelper::propagate(const intel_nnet_type_t *pNeuralNetwork,
const uint32_t *pActiveIndices,
uint32_t nActiveIndices) {
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
uint32_t reqId;
nGNAStatus = GNAPropagateForward(nGNAHandle, pNeuralNetwork,
@@ -62,14 +67,17 @@ uint32_t GNADeviceHelper::propagate(const intel_nnet_type_t *pNeuralNetwork,
}
#else
void GNADeviceHelper::setUpActiveList(const uint32_t requestConfigId, uint32_t layerIndex, uint32_t* ptr_active_indices, uint32_t num_active_indices) {
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
const auto status = Gna2RequestConfigEnableActiveList(requestConfigId, layerIndex, num_active_indices, ptr_active_indices);
checkGna2Status(status);
}
void GNADeviceHelper::propagateSync(const uint32_t requestConfigId) {
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
wait(propagate(requestConfigId));
}
uint32_t GNADeviceHelper::propagate(const uint32_t requestConfigId) {
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
uint32_t reqId;
const auto status = Gna2RequestEnqueue(requestConfigId, &reqId);
checkGna2Status(status);
@@ -77,6 +85,7 @@ uint32_t GNADeviceHelper::propagate(const uint32_t requestConfigId) {
}
uint32_t GNADeviceHelper::createModel(const Gna2Model& gnaModel) const {
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
uint32_t modelId;
const auto status = Gna2ModelCreate(nGnaDeviceIndex, &gnaModel, &modelId);
checkGna2Status(status);
@@ -84,11 +93,13 @@ uint32_t GNADeviceHelper::createModel(const Gna2Model& gnaModel) const {
}
void GNADeviceHelper::releseModel(const uint32_t model_id) {
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
const auto status = Gna2ModelRelease(model_id);
checkGna2Status(status);
}
uint32_t GNADeviceHelper::createRequestConfig(const uint32_t model_id) {
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
uint32_t reqConfId;
auto status = Gna2RequestConfigCreate(model_id, &reqConfId);
checkGna2Status(status);
@@ -121,6 +132,7 @@ void GNADeviceHelper::checkGna2Status(Gna2Status status) {
#endif
void GNADeviceHelper::wait(uint32_t reqId) {
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
#if GNA_LIB_VER == 2
const auto status = Gna2RequestWait(reqId, GNA_TIMEOUT);
checkGna2Status(status);
@@ -220,9 +232,11 @@ void GNADeviceHelper::open(uint8_t n_threads) {
void GNADeviceHelper::close() {
#if GNA_LIB_VER == 1
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
GNADeviceClose(nGNAHandle);
nGNAHandle = 0;
#else
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
const auto status = Gna2DeviceClose(nGnaDeviceIndex);
checkGna2Status(status);
#endif

View File

@@ -30,7 +30,10 @@
/**
* holds gna - style handle in RAII way
*/
typedef uint32_t gna_device_id; // TODO:3:API redesign: remove and use uint32_t instead.
class GNADeviceHelper {
static std::mutex acrossPluginsSync;
#if GNA_LIB_VER == 1
intel_gna_status_t nGNAStatus = GNA_NOERROR;
intel_gna_handle_t nGNAHandle = 0;
@@ -159,6 +162,7 @@ public:
void setOMPThreads(uint8_t const n_threads);
void initGnaPerfCounters() {
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
#if GNA_LIB_VER == 1
nGNAPerfResults = {{0, 0, 0, 0, 0, 0, 0}, {0, 0}, {0, 0, 0}, {0, 0}};
nGNAPerfResultsTotal = {{0, 0, 0, 0, 0, 0, 0}, {0, 0}, {0, 0, 0}, {0, 0}};

View File

@@ -803,6 +803,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
uint32_t num_rows_in = FROM_IR_DIM(inputs, 1);
uint32_t num_columns_in = FROM_IR_DIM(inputs, 2);
uint32_t num_rows_out = isDiag ? num_rows_in : FROM_IR_DIM(outputs, 1);
uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
uint32_t num_padding_out = isDiag ? num_padding : 0;
@@ -1190,6 +1191,7 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
num_rows = FROM_IR_DIM(inputs, 1);
}
uint32_t non_batch_dim = (orientation == kDnnNonInterleavedOrientation) ? num_columns : num_rows;
size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()))
* outputs->getPrecision().size();
@@ -1198,6 +1200,10 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
static InferenceEngine::details::caseless_unordered_map<std::string, DnnActivationType> supportedActivations = {
{"sigmoid", kActSigmoid},
{"divbyn", kActDivByN},
{"log", kActLog},
{"neghalflog", kActNegHalfLog},
{"exp", kActExp},
{"tanh", kActTanh},
{"relu", kActRelu},
{"leakyrelu", kActLeakyRelu},
@@ -1262,12 +1268,14 @@ case name:\
&*ptr_pwl_segments.begin(),
static_cast<uint32_t>(ptr_pwl_segments.size()),
input_pwl_scale_factor,
output_pwl_scale_factor);
output_pwl_scale_factor,
non_batch_dim);
} else {
PwlDesignOpt16(activation_type,
ptr_pwl_segments,
input_pwl_scale_factor,
output_pwl_scale_factor);
output_pwl_scale_factor,
non_batch_dim);
}
ptr_pwl_segments_target = reinterpret_cast<intel_pwl_segment_t*>(&ptr_pwl_segments_target);
}
@@ -1298,30 +1306,85 @@ case name:\
}
void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
static int count = 0;
count++;
if (LayerInfo(layer).isTrivialPermute()) {
return;
}
auto layerOrder = layer->GetParamAsInts("order");
string dimMessage;
if (layerOrder == vector<int>({0, 3, 2, 1})) {
return; // supported case
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
if (layer->insData.empty()) {
THROW_GNA_EXCEPTION << "Input layer pointer is unexpectedly absent";
}
auto inputs = layer->insData.begin()->lock();
auto inputsOrder = inputs->getTensorDesc().getDims();
auto outputs = layer->outData.front();
// squeeze order vector
SizeVector squeezedInputOrder;
for (auto input_shape : inputsOrder) {
if (input_shape != 1) squeezedInputOrder.push_back(input_shape);
}
SizeVector squeezedOutputOrder;
for (auto output_shape : layerOrder) {
if (output_shape != 0) squeezedOutputOrder.push_back(output_shape);
}
if (layerOrder == vector<int>({1, 0, 2})) {
IE_ASSERT(!layer->insData.empty());
auto inputs = layer->insData.begin()->lock();
auto inputs_size = inputs->getTensorDesc().getDims().size();
if (inputs_size != layerOrder.size()) {
THROW_IE_EXCEPTION << "[GNA plugin] Invalid input tensor size for permute layer " <<
layer->GetParamAsString("order");
}
auto permuteDim0 = FROM_IR_DIM(inputs, inputs_size);
auto permuteDim1 = FROM_IR_DIM(inputs, inputs_size - 1);
if (permuteDim0 == 1 || permuteDim1 == 1) {
return; // supported case
}
dimMessage = " (with first dim = " + to_string(permuteDim0) + ", second dim = " + to_string(permuteDim1) + ")";
void* ptr_inputs = nullptr;
void* ptr_outputs = nullptr;
if (squeezedInputOrder.size() > 2) {
THROW_GNA_EXCEPTION << "unsupported permute (requested transpose is not 2D)";
}
THROW_IE_EXCEPTION << "[GNA plugin] Unsupported permute order: was " << layer->GetParamAsString("order") <<
dimMessage << ", but only support 1,0,2 (with first or second dim = 1) and 0,3,2,1";
if (count%2 == 0) {
auto temp = squeezedInputOrder[0];
squeezedInputOrder[0] = squeezedInputOrder[1];
squeezedInputOrder[1] = temp;
}
if (std::min(squeezedInputOrder[0], squeezedInputOrder[1]) > 8) {
THROW_GNA_EXCEPTION << "unsupported permute (minor dimension="
<< std::min(squeezedInputOrder[0], squeezedInputOrder[1]) << " > 8)";
}
// now this can be run on GNA
if (squeezedInputOrder[0] < squeezedInputOrder[1]) { // interleave case
if (ALIGN(squeezedInputOrder[1], 8) != squeezedInputOrder[1]) {
THROW_GNA_EXCEPTION << "unsupported permute (row size not a multiple of 8)";
} else {
auto& currentComponent = dnnComponents.addComponent(layer->name, "interleave");
dnn->InitInterleaveComponent(currentComponent,
squeezedInputOrder[0],
squeezedInputOrder[1],
inputs->getPrecision().size(),
outputs->getPrecision().size(),
(quantized == nullptr) ? 1.0f : quantized->_dst_quant.scale,
ptr_inputs,
ptr_outputs);
}
} else { // deinterleave case
if (ALIGN(squeezedInputOrder[0], 8) != squeezedInputOrder[0]) {
THROW_GNA_EXCEPTION << "[GNA plugin] unsupported permute (column size not a multiple of 8)";
} else {
auto& currentComponent = dnnComponents.addComponent(layer->name, "deinterleave");
dnn->InitDeinterleaveComponent(currentComponent,
squeezedInputOrder[0],
squeezedInputOrder[1],
inputs->getPrecision().size(),
outputs->getPrecision().size(),
quantized == nullptr ? 1 : quantized->_dst_quant.scale,
ptr_inputs,
ptr_outputs);
}
}
size_t num_data_bytes_out = ALIGN(InferenceEngine::details::product(
begin(outputs->getDims()), end(outputs->getDims())), 8)
* outputs->getPrecision().size();
size_t num_data_bytes_in = squeezedInputOrder[0] * squeezedInputOrder[1] * inputs->getPrecision().size();
connectInput(layer, ptr_inputs, num_data_bytes_in);
connectOutput(layer, ptr_outputs, num_data_bytes_out);
}
void SKIP(GNAGraphCompiler*, CNNLayerPtr) {}
@@ -1338,7 +1401,7 @@ void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) {
{{"Split"}, SKIP}, // skip information about which part of prev layer need to consume handle during layer creation
{{"Slice"}, SKIP},
{{"link"}, SKIP},
{{"clamp", "sigmoid", "relu", "tanh", "identity"}, CREATE(PWLPrimitive)},
{{"clamp", "sigmoid", "relu", "tanh", "log", "neghalflog", "divbyn", "exp", "identity"}, CREATE(PWLPrimitive)},
{{"Convolution"}, CREATE(ConvolutionPrimitive)},
{{"Permute"}, CREATE(PermutePrimitive)}, // permute of certain form (2D transpose) can be assimilated in followed FC layer
{{"Pooling"}, CREATE(PoolingPrimitive)},
@@ -1644,11 +1707,14 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
}
if (LayerInfo(prevLayer).isPermute()) {
gnalog() << "Skipping permute layer: " << prevLayer->name << "\n";
return {connectInput(prevLayer, ptr, num_data_bytes_in, offset, 0).input, true, prevLayer};
if (!LayerInfo(prevLayer).isTrivialPermute()) {
// we should have GNA primitive for it
THROW_GNA_EXCEPTION << "missed gna primitive for permute: " << prevLayer->name;
}
gnalog() << "Skipping trivial permute layer: " << prevLayer->name << "\n";
return connectInput(prevLayer, ptr, num_data_bytes_in, offset, 0);
}
THROW_GNA_EXCEPTION << "Cannot connect input for: " << layer->name;
}

View File

@@ -14,6 +14,7 @@
#include "descriptions/gna_input_desc.hpp"
#include "descriptions/gna_flags.hpp"
#include "cpp_interfaces/base/ie_plugin_base.hpp"
#include "cpp_interfaces/impl/ie_memory_state_internal.hpp"
#include "connection_details.hpp"
#include "backend/dnn.hpp"
#include "memory/polymorph_allocator.hpp"
@@ -52,6 +53,7 @@ public:
GNAPluginNS::backend::DnnComponents dnnComponents;
MemoryConnection memory_connection;
ConcatConnection concat_connection;
std::vector<InferenceEngine::IMemoryStateInternal::Ptr> memoryStates;
void setGNAMemoryPtr(std::shared_ptr<GNAPluginNS::gna_memory_type> gnaMemPtr);
void setDNNPtr(std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnnPtr);

View File

@@ -18,8 +18,15 @@
#include "gna_lib_ver_selector.hpp"
typedef struct _nnet_type_t
{
uint32_t nLayers; // The number of layers in the network.
uint32_t nGroup; // Input vector grouping level.
intel_nnet_layer_t *pLayers; // Layer configurations.
} intel_nnet_type_t;
#ifndef WIN32
#include <profiler.h>
void clearTimeB(timeb & tb) {
tb.time = 0;

View File

@@ -57,6 +57,7 @@ uint32_t ToByteSize(const Gna2DataType type) {
}
}
float GNAPluginNS::identity_SF = 256.0f;
constexpr uint32_t GNAPluginNS::GNAPlugin::FAKE_REQUEST_CONFIG_ID;
#endif
using namespace InferenceEngine;
@@ -501,11 +502,11 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
// auto idx = std::distance(outputsDataMap.begin(), outputPort);
auto & desc = outputsDesc[idx];
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
desc.ptrs.resize(gnaFlags->gna_lib_async_threads_num);
desc.orientation = component.orientation_out;
desc.num_bytes_per_element = component.num_bytes_per_output;
desc.scale_factor = quantized != nullptr ? quantized->_dst_quant.scale : 1.0f;
// TODO: this need to be fixed
desc.num_elements = component.num_rows_out;
@@ -518,6 +519,18 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
// gets output layer pointer in original topology not in cloned
auto outLayer = outPort.second->getCreatorLayer().lock();
// Memory layers are not dnnComponents hence we need to make switch with identity layer
if (outLayer->type == "Memory") {
// traverse memory connection to find corresponding output_memory
for (auto && memConnection : graphCompiler.memory_connection) {
if (memConnection.second.getInput()->name == outLayer->name) {
// if connection is found, replace memory input layer with memory output layer
outLayer = memConnection.second.getOutput();
break;
}
}
}
// searching for outData represented in GNA blob
// using ufs - upper first search
gnalog() << "[UFS] searching for : "<< outPort.first << " representation in GNA\n";
@@ -693,10 +706,25 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
num_rotate_rows = dnn->num_rotate_rows;
num_rotate_columns = dnn->num_rotate_columns;
for (auto& gnaMemoryConn : graphCompiler.memory_connection) {
std::string name = gnaMemoryConn.first;
GNAMemoryLayer memLayer = gnaMemoryConn.second;
InferenceEngine::CNNLayerPtr layer = memLayer.getInput();
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
auto scale_factor = quantized != nullptr ? quantized->_dst_quant.scale : 1.0f;
auto ptr = make_blob_with_precision(TensorDesc(InferenceEngine::Precision::I16,
memLayer.getDims(),
memLayer.getDims().size() == 2 ? NC : NCHW),
memLayer.gna_ptr);
graphCompiler.memoryStates.emplace_back(std::make_shared<memory::GNAMemoryState>(name, ptr, scale_factor));
}
DumpXNNToFile();
#ifdef PLOT
dnn->WriteGraphWizModel("gna-blob.dot");
dnn->WriteGraphWizModel("/data/local/tmp/gna-blob.dot");
#endif
#if GNA_LIB_VER == 2
createRequestConfigsForGnaModels();
@@ -1047,7 +1075,7 @@ std::vector<InferenceEngine::MemoryStateInternal::Ptr> GNAPlugin::QueryState()
return {};
}
return {std::make_shared<memory::GNAMemoryState>(shared_from_this())};
return graphCompiler.memoryStates;
}
std::string GNAPlugin::GetName() const noexcept {
@@ -1400,6 +1428,14 @@ void GNAPlugin::SetConfig(const std::map<std::string, std::string> &config) {
}
});
if_set(CONFIG_KEY(IDENTITY_SCALE_FACTOR), [&] {
auto idScaleFactor = InferenceEngine::CNNLayer::ie_parse_float(value);
if (fp32eq(idScaleFactor, 0.0f)) {
THROW_GNA_EXCEPTION << "identity scale factor of 0.0f not supported";
}
identity_SF = idScaleFactor;
});
if_set(GNA_CONFIG_KEY(LIB_N_THREADS), [&] {
uint64_t lib_threads = std::stoul(value, NULL, 10);
if (lib_threads == 0 || lib_threads > std::numeric_limits<uint8_t>::max()/2-1) {

View File

@@ -28,6 +28,7 @@
#endif
namespace GNAPluginNS {
extern float identity_SF;
class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::enable_shared_from_this<GNAPlugin> {
protected:
std::string _pluginName = "GNA";

View File

@@ -6,8 +6,28 @@
#include <ostream>
#include <details/ie_exception.hpp>
#include "sys/timeb.h"
// #define GNA_DEBUG
typedef unsigned long long time_tsc;
typedef struct
{
time_tsc start; // time value on profiler start
time_tsc stop; // time value on profiler stop
time_tsc passed; // time passed between start and stop
} intel_gna_profiler_tsc;
typedef struct timeb time_rtc;
typedef struct
{
time_rtc start; // time value on profiler start
time_rtc stop; // time value on profiler stop
time_rtc passed; // time passed between start and stop
} intel_gna_profiler_rtc;
//#define GNA_DEBUG
#ifdef GNA_DEBUG
#include <iostream>
/**
@@ -58,7 +78,7 @@ inline GnaLog & gnawarn() {
#ifdef __PRETTY_FUNCTION__
#undef __PRETTY_FUNCTION__
#endif
#ifdef _WIN32
#if defined _WIN32
# define __PRETTY_FUNCTION__ __FUNCSIG__
#else
# define __PRETTY_FUNCTION__ __FUNCTION__

View File

@@ -111,6 +111,7 @@ std::map<std::string, std::string> GNAPlugin::supportedConfigKeysWithDefaults()
{GNA_CONFIG_KEY(PRECISION), Precision(Precision::I8).name()},
{GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN), CONFIG_VALUE(YES)},
{CONFIG_KEY(PERF_COUNT), CONFIG_VALUE(NO)},
{CONFIG_KEY(IDENTITY_SCALE_FACTOR), "2048.0"},
{GNA_CONFIG_KEY(LIB_N_THREADS), "1"},
{CONFIG_KEY(SINGLE_THREAD), CONFIG_VALUE(YES)}
};

View File

@@ -6,7 +6,7 @@
#include <cstdint>
typedef struct {
typedef struct pwl_gna_slope_scale_t{
double slope;
uint64_t slope_scale = 0;
uint32_t slope_scale_index;

View File

@@ -10,7 +10,7 @@
#include "details/caseless.hpp"
#include "ie_algorithm.hpp"
#include "gna-api.h"
#include "gna_permute.hpp"
namespace GNAPluginNS {
@@ -68,7 +68,7 @@ class LayerInfo {
IS_VALID();
static InferenceEngine::details::caseless_set<std::string> activations =
{ "clamp", "sigmoid", "identity", "relu",
"leakyrelu", "tanh", "prelu", "exp", "log", "sign", "abs", "neghalflog"};
"leakyrelu", "tanh", "prelu", "exp", "log", "sign", "abs", "neghalflog", "divbyn"};
return activations.find(layer->type) != activations.end();
}
@@ -155,6 +155,43 @@ class LayerInfo {
bool isPermute() const noexcept {
return isOfType("permute");
}
// @brief this not only mathematically trivial, has some WA for kaldi case
bool isTrivialPermute() {
if (!isPermute()) return false;
auto layerOrder = layer->GetParamAsInts("order");
if (layerOrder == std::vector<int>({ 0, 3, 2, 1 })) {
return true; // supported case
}
IE_ASSERT(!layer->insData.empty());
auto inputs = layer->insData.begin()->lock();
auto inputsOrder = inputs->getTensorDesc().getDims();
// cases when all permutations happened either between 1 and X shape where no other dims in between
auto permuteSequence = genPermutations(layerOrder.begin(), layerOrder.end());
auto inputsOrderTransformed = inputsOrder;
for (auto && permute : permuteSequence) {
// check dims of permuted
if (inputsOrderTransformed[permute.first] == 1 &&
inputsOrderTransformed[permute.second] == 1) {
return true;
}
if (inputsOrderTransformed[permute.first] != 1 &&
inputsOrderTransformed[permute.second] != 1) {
return false;
}
// check dims in between
for (int j = permute.first + 1; j != permute.second; j++) {
if (inputsOrderTransformed[j] != 1) {
return false;
}
}
// apply permutation
std::swap(inputsOrderTransformed[permute.first], inputsOrderTransformed[permute.second]);
}
return true;
}
bool isPooling() const noexcept {
return isOfType("pooling");
}

View File

@@ -38,6 +38,11 @@ enum LayerType {
Memory,
Power,
Crop,
Exp,
Log,
NegHalfLog,
Identity,
DivByN,
LSTMCell,
TensorIterator,
NO_TYPE
@@ -66,6 +71,11 @@ static const InferenceEngine::details::caseless_map<std::string, GNAPluginNS::La
{ "Power" , Power},
{ "Memory" , Memory },
{ "Crop" , Crop },
{ "Log", Log },
{ "NegHalfLog" , NegHalfLog },
{ "DivByN", DivByN },
{ "Identity", Identity },
{ "Exp", Exp },
{ "LSTMCell", LSTMCell },
{ "TensorIterator", TensorIterator }
};

View File

@@ -0,0 +1,104 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <vector>
#include <list>
#include <utility>
#include "gna_plugin_log.hpp"
namespace GNAPluginNS {
template <class T>
class PermuteSequence {
public:
using cnt_type = std::vector<std::pair<T, T>>;
private:
std::vector<T> orderVec;
cnt_type permutes;
public:
explicit PermuteSequence(std::vector<T> && orderVecIn) : orderVec(std::move(orderVecIn)) {
std::vector<bool> counter(orderVec.size());
for (auto && x : this->orderVec) {
if (x < 0) {
THROW_GNA_EXCEPTION << "invalid order: element " << x << " should be >= 0";
}
if (x >= counter.size()) {
THROW_GNA_EXCEPTION << "invalid order: element " << x << " should be < "<< counter.size();
}
if (counter[x]) {
THROW_GNA_EXCEPTION << "invalid order: element " << x << " present more than once";
}
counter[x] = true;
}
// generating permutation graph
std::fill(counter.begin(), counter.end(), false);
// length of current cycle
std::list<cnt_type> permuteCycles;
bool newSeq = false;
for (int i = 0; i != orderVec.size();) {
// we have this permutation on the list already
if (counter[i]) {
newSeq = false;
i++;
continue;
}
counter[i] = true;
// looks we found a permutation
if (orderVec[i] != i) {
if (!newSeq) {
newSeq = true;
permuteCycles.push_back({});
}
permuteCycles.back().push_back({i, orderVec[i]});
counter[i] = true;
i = orderVec[i];
continue;
}
// this dims not permuted
i++;
}
for (auto && cycle : permuteCycles) {
for (int i = 0; i + 1 < cycle.size(); i++) {
permutes.push_back(cycle[i]);
}
}
}
const cnt_type & cnt() const noexcept {
return permutes;
}
};
/**
* @brief generates permutations sequence in order to reach given order
* @tparam Iterator
* @return
*/
template <class Iterator>
inline typename PermuteSequence<typename std::iterator_traits<Iterator>::value_type>::cnt_type genPermutations(
Iterator beg, Iterator en) {
static_assert(
std::is_same<std::random_access_iterator_tag,
typename std::iterator_traits<Iterator>::iterator_category>::value,
"The genPermutations() function only accepts random access iterators or raw pointers to an array.\n");
using value_type = typename std::iterator_traits<Iterator>::value_type;
std::vector<value_type> v;
for (; beg != en; beg++) {
v.push_back(*beg);
}
auto permute = PermuteSequence<value_type> (std::move(v));
return permute.cnt();
}
template <class T>
inline typename PermuteSequence<T>::cnt_type genPermutations(const std::initializer_list<T> & lst) {
return genPermutations(lst.begin(), lst.end());
}
} // namespace GNAPluginNS

View File

@@ -7,19 +7,54 @@
#include <memory>
#include <utility>
#include <cpp_interfaces/impl/ie_memory_state_internal.hpp>
#include <ie_blob.h>
#include "gna_plugin.hpp"
#include "preprocessing.hpp"
namespace GNAPluginNS {
namespace memory {
class GNAMemoryState : public InferenceEngine::MemoryStateInternal {
std::shared_ptr<GNAPlugin> plg;
std::string stateName;
InferenceEngine::Blob::Ptr memState;
float scalefactor;
public:
using Ptr = InferenceEngine::MemoryStateInternal::Ptr;
explicit GNAMemoryState(std::shared_ptr<GNAPlugin> plg)
: InferenceEngine::MemoryStateInternal("GNAResetState"), plg(plg) {}
explicit GNAMemoryState(std::string name,
InferenceEngine::Blob::Ptr state,
float scale_factor)
: InferenceEngine::MemoryStateInternal(name), stateName(name),
memState(state), scalefactor(scale_factor) {}
void Reset() override {
plg->Reset();
std::memset(memState->buffer().as<int16_t*>(), 0, memState->byteSize());
}
void SetState(InferenceEngine::Blob::Ptr newState) override {
if (newState->getTensorDesc().getDims().size() != 2) {
THROW_GNA_EXCEPTION << "SetState failed for blob dimensions > 2";
}
if ((newState->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) &&
(newState->byteSize()/2 == memState->byteSize())) {
ConvertToInt16(memState->buffer().as<int16_t*>(),
newState->buffer().as<float*>(),
newState->getTensorDesc().getDims()[0],
newState->getTensorDesc().getDims()[1],
scalefactor);
} else if ((newState->getTensorDesc().getPrecision() == InferenceEngine::Precision::I16) &&
(newState->byteSize() == memState->byteSize())) {
std::memcpy(memState->buffer().as<uint8_t*>(),
newState->buffer().as<uint8_t*>(),
newState->byteSize());
} else {
THROW_GNA_EXCEPTION << "SetState call failed. Invalid precision / size";
}
}
InferenceEngine::Blob::CPtr GetLastState() const override {
return memState;
}
};
} // namespace memory

View File

@@ -8,6 +8,7 @@
#include <iostream>
#include <limits>
#include <cstdint>
#include <map>
#ifdef _NO_MKL_
#include <cmath>
@@ -30,6 +31,140 @@
#include "round_float_define.hpp"
double first_deriv_tanh(const double x) { return(1.0 - tanh(x) * tanh(x)); }
double first_deriv_exp(const double x) { return(exp(x)); }
double first_deriv_log(const double x) { return(1.0 / x); }
double neghalflog(const double x) { return(-0.5*log(x)); }
double first_deriv_neghalflog(const double x) { return(-0.5 / x); }
std::map<std::string, std::vector<pwl_t>> pwl_search_map {
{"log", {{1.0769533473860933e-05 , 8.4918474385631271e-06 , -11.662751279293021 , 92854.532875275778 , -12.451257806448908},
{1.7021658371797054e-05 , 1.3421901942456181e-05 , -11.204973371284382 , 58748.682305649265 , -11.993492424439317},
{2.6901160981803783e-05 , 2.121301943569138e-05 , -10.747255484868321 , 37173.116828541606 , -11.5358095346374},
{4.2508975575310986e-05 , 3.3523097254749539e-05 , -10.289651523932033 , 23524.443637281987 , -11.078263735848511},
{6.7159638615065504e-05 , 5.2968285045173431e-05 , -9.8322142997398423 , 14889.895488146327 , -10.620906528248819},
{0.00010607938536412906 , 8.3674876369534097e-05 , -9.3749963641228913 , 9426.9022823556988 , -10.163791247146683},
{0.00016750484167401942 , 0.0001321476299142904 , -8.9180484530997628 , 5969.97668847147 , -9.7069667231248324},
{0.00026440839561966089 , 0.00020863498397988411 , -8.4614207323653048 , 3782.0281676624659 , -9.2504841185370346},
{0.00041721037390891224 , 0.00032927394464933357 , -8.0051607849959225 , 2396.8723275762272 , -8.794388391117776},
{0.00065803047888043399 , 0.00051945736028058084 , -7.5493154189054579 , 1519.6864462895246 , -8.3387277287491912},
{0.001037362756270747 , 0.00081911613772069059 , -7.0939280363180339 , 963.98294035052527 , -7.8835420192465913},
{0.0016345178691889663 , 0.0012909987974520291 , -6.6390412024897918 , 611.8012037985186 , -7.4288758208733832},
{0.0025739995666866798 , 0.0020336387647946299 , -6.1846931764806961 , 388.50045390148483 , -6.9747627596750643},
{0.004051067401148134 , 0.0032016468460507675 , -5.7309215067520842 , 246.84852187761314 , -6.5212432982738378},
{0.0063717878399278975 , 0.0050374410331472524 , -5.2777584251958025 , 156.94182309926316 , -6.0683436046929682},
{0.010015399744147635 , 0.0079208190427630248 , -4.8252358236823785 , 99.846239345996764 , -5.6160998176424242},
{0.015731959416888035 , 0.012446344656992419 , -4.3733791100375914 , 63.564872848992557 , -5.1645294255940524},
{0.024693977573234764 , 0.019544055268788288 , -3.9222140374798418 , 40.495703741299124 , -4.7136643095482684},
{0.038733765155746253 , 0.030667594605217476 , -3.4717584839571183 , 25.817268111660667 , -4.2635119962197363},
{0.060710894498842323 , 0.048087203788268418 , -3.0220317632779459 , 16.471508256546421 , -3.8141005375106394},
{0.095087507492990303 , 0.07534567638130947 , -2.5730436069008258 , 10.516628591549917 , -3.3654261013821722},
{0.14881589164648179 , 0.1179673976691515 , -2.1248067941840345 , 6.7197124509762807 , -2.9175137851107023},
{0.23272951343232598 , 0.18456019070518948 , -1.6773223736744836 , 4.2968336299589422 , -2.4703468078481774},
{0.36368021928596278 , 0.28852643303350278 , -1.2305967272577258 , 2.7496683816440872 , -2.0239487374384986},
{0.56789454936972605 , 0.45071871177964989 , -0.78462174664264084 , 1.7608902939988476 , -1.5782879515390904},
{0.88610468810159015 , 0.70355895583540895 , -0.33939781495255494 , 1.1285348259949077 , -1.1333885987534271},
{1.3816335789411809 , 1.0974241494088073 , 0.10509277274225079 , 0.72378090344789758 , -0.68920186958239626},
{2.1526786672071778 , 1.7105408556958117 , 0.54885493633765814 , 0.46453751562343976 , -0.24575546313966701},
{3.3517682459038256 , 2.6643098700159529 , 0.99191642472855346 , 0.29834998324305195 , 0.19701961965499604},
{5.2150907594192395 , 4.1470315870077039 , 1.4342864241471516 , 0.19175121702222525 , 0.63908807030881409},
{8.1092990368633888 , 6.4506248753490381 , 1.8760032407108322 , 0.12331522064412515 , 1.0805430109146834},
{12.601356824411839 , 10.02743923506598 , 2.3170788926824022 , 0.079356533898219603 , 1.5213360711125516},
{19.571181442374591 , 15.578135417317394 , 2.7575629024279547 , 0.051095535695910903 , 1.9615897281366799},
{30.377546048984698 , 24.187602070756132 , 3.1974682131414882 , 0.032919051406834175 , 2.4012352971662185},
{47.129543514739666 , 37.535139255177242 , 3.6368564758700774 , 0.02121811342575923 , 2.840431633702059},
{73.079549140200285 , 58.219475920898645 , 4.0757390773799447 , 0.013683718793633217 , 3.2790801405656671},
{113.27901633813272 , 90.261352592829454 , 4.5141911073789212 , 0.0088277602712848978 , 3.7173855249275034},
{175.50631439356027 , 139.88074429844423 , 4.9522192021630707 , 0.0056978006942677408 , 4.155206600184707},
{271.85671771564853 , 216.69890827860715 , 5.3899137902216161 , 0.0036784082747808367 , 4.5928067328736137},
{420.92464268160882 , 335.59785527230628 , 5.8272726607059671 , 0.0023757221568907016 , 5.02998540013055},
{651.67787232666603 , 519.59734639612975 , 6.2644043286254485 , 0.0015345004678918 , 5.4670819574652496},
{1008.5639944354145 , 804.30768425774158 , 6.7012924752877234 , 0.00099150872479816517 , 5.9038143889239647},
{1561.0124734004803 , 1244.8233916434529 , 7.1380676425712917 , 0.00064060987150321626 , 6.3406214896063817},
{2415.300015438163 , 1926.4004262204676 , 7.5746926191112163 , 0.00041402724034620179 , 6.7771103668414092},
{0 , 2981 , 8.0113255703134367 , 0 , 0}}},
{"exp", {{-5.2905549738656035 , -7.6246190071105957 , -0.0029375872840971921 , 0.0050389629907875762 , 0.035482585711588618},
{-3.2765565204702316 , -3.966387017312524 , 0.015496108324210485 , 0.037758052285013388 , 0.16525915670649505},
{-2.304345998844584 , -2.7128986917228044 , 0.062825386060480992 , 0.099824064363453618 , 0.33363795967454735},
{-1.6505759560844804 , -1.9420942111377082 , 0.1397702221420461 , 0.19193932815516293 , 0.512534480241849},
{-1.1551903296891044 , -1.3825157257724856 , 0.24717534067313054 , 0.31499757780654358 , 0.6826644455709191},
{-0.75535692384923692 , -0.94198675918566444 , 0.38594089810159893 , 0.46984288666759971 , 0.82852667624004861},
{-0.41999557300201118 , -0.57832149976517178 , 0.55680643336844471 , 0.65704972856644539 , 0.93679241781329048},
{-0.13153483933686491 , -0.2688406706913587 , 0.76015072810791207 , 0.87674872926137593 , 0.99585644451033684},
{0.4235904473306159 , 0 , 0.94308787650971637 , 1.5274359002153208 , 0.94308787650971637},
{1.0937368353927797 , 0.79581116380659767 , 2.1586384179000495 , 2.9854092373666563 , -0.2171835817276766},
{ 1.5979006351086957 , 1.3669113028909896 , 3.8636060485839758 , 4.9426451092152783 , -2.8925514173812577},
{ 2.0041923346353099 , 1.8147648658250355 , 6.0771872710645756 , 7.420098519377718 , -7.3885468228624731},
{ 2.3452256111159864 , 2.1843822125126793 , 8.8197843979578803 , 10.435626855401082 , -13.975613281399871},
{ 2.6387547172826245 , 2.4991598196640292 , 12.1046860486254 , 13.99576406829804 , -22.872965156362628},
{ 0 , 2.7725581832447883 , 15.931105041960471 , -0 , -0}}},
{"sigmoid", {{-6.0269768546940687 , -10 , -0.0033685324745532531 , 0.0024011761556240077 , 0.020643229081686823},
{-3.4572777895083773 , -4.2646607997060624 , 0.010403027257608216 , 0.029619100828046807 , 0.13671844548152082},
{-2.302945392313446 , -2.7960754970003254 , 0.053901203413037058 , 0.082620267964448268 , 0.28491371022403178},
{-1.4431692770391085 , -1.8482809500056467 , 0.13220824286098024 , 0.15455301637191463 , 0.41786563878710092},
{-0.58709153507881506 , -1.0390898867848257 , 0.25727116250295479 , 0.22963741468060328 , 0.49588507772498291},
{0.58709153507881362 , 0 , 0.50411492227501709 , 0.22963741468060325 , 0.50411492227501709},
{1.4431692770391091 , 1.0390898867848253 , 0.7427288374970451 , 0.15455301637191451 , 0.58213436121289919},
{2.3029453923134513 , 1.8482809500056492 , 0.86779175713902001 , 0.082620267964447991 , 0.71508628977596878},
{3.4572777895083746 , 2.7960754970003263 , 0.94609879658696283 , 0.029619100828046918 , 0.86328155451847877},
{6.0269768546940705 , 4.2646607997060606 , 0.98959697274239178 , 0.0024011761556240298 , 0.97935677091831308},
{ 0 , 10 , 1.0033685324745534 , 0 , 0}}},
{"tanh", {{-3.0134884273470361 , -5 , -1.0067370649491065 , 0.0096047046224959371 , -0.95871354183662683},
{-1.7286388947541886 , -2.1323303998530339 , -0.979193945484784 , 0.11847640331218724 , -0.72656310903695842},
{-1.1514726961567241 , -1.3980377485001632 , -0.892197593173926 , 0.3304810718577928 , -0.43017257955193672},
{-0.72158463851955434 , -0.92414047500282348 , -0.73558351427803959 , 0.61821206548765828 , -0.16426872242579849},
{-0.29354576753940709 , -0.51954494339241275 , -0.48545767499409032 , 0.91854965872241312 , -0.0082298445500341155},
{0.29354576753940703 , 0 , 0.0082298445500341155 , 0.91854965872241323 , 0.0082298445500341155},
{0.72158463851955434 , 0.51954494339241275 , 0.48545767499409037 , 0.6182120654876585 , 0.16426872242579826},
{1.1514726961567245 , 0.92414047500282415 , 0.73558351427804003 , 0.33048107185779213 , 0.43017257955193755},
{1.7286388947541889 , 1.3980377485001632 , 0.89219759317392588 , 0.11847640331218723 , 0.72656310903695842},
{3.0134884273470322 , 2.1323303998530312 , 0.97919394548478356 , 0.0096047046224960447 , 0.95871354183662627},
{ 0 , 5 , 1.0067370649491065 , 0 , 0}}},
{"NegHalfLog", {{1.0769533473860933e-05 ,8.4918474385631271e-06 ,5.8313756396465104 ,-46427.266437637889 ,6.2256289032244538 },
{1.7021658371797054e-05 ,1.3421901942456181e-05 ,5.6024866856421909 ,-29374.341152824632 ,5.9967462122196586 },
{2.6901160981803783e-05 ,2.121301943569138e-05 ,5.3736277424341603 ,-18586.558414270803 ,5.7679047673186998 },
{4.2508975575310986e-05 ,3.3523097254749539e-05 ,5.1448257619660165 ,-11762.221818640994 ,5.5391318679242554 },
{6.7159638615065504e-05 ,5.2968285045173431e-05 ,4.9161071498699211 ,-7444.9477440731634 ,5.3104532641244093 },
{0.00010607938536412906 ,8.3674876369534097e-05 ,4.6874981820614456 ,-4713.4511411778494 ,5.0818956235733417 },
{0.00016750484167401942 ,0.0001321476299142904 ,4.4590242265498814 ,-2984.988344235735 ,4.8534833615624162 },
{0.00026440839561966089 ,0.00020863498397988411 ,4.2307103661826524 ,-1891.0140838312329 ,4.6252420592685173 },
{0.00041721037390891224 ,0.00032927394464933357 ,4.0025803924979613 ,-1198.4361637881136 ,4.397194195558888 },
{0.00065803047888043399 ,0.00051945736028058084 ,3.7746577094527289 ,-759.84322314476231 ,4.1693638643745956 },
{0.001037362756270747 ,0.00081911613772069059 ,3.546964018159017 ,-481.99147017526263 ,3.9417710096232956 },
{0.0016345178691889663 ,0.0012909987974520291 ,3.3195206012448959 ,-305.9006018992593 ,3.7144379104366916 },
{0.0025739995666866798 ,0.0020336387647946299 ,3.092346588240348 ,-194.25022695074242 ,3.4873813798375322 },
{0.004051067401148134 ,0.0032016468460507675 ,2.8654607533760421 ,-123.42426093880657 ,3.2606216491369189 },
{0.0063717878399278975 ,0.0050374410331472524 ,2.6388792125979013 ,-78.47091154963158 ,3.0341718023464841 },
{0.010015399744147635 ,0.0079208190427630248 ,2.4126179118411892 ,-49.923119672998382 ,2.8080499088212121 },
{0.015731959416888035 ,0.012446344656992419 ,2.1866895550187957 ,-31.782436424496279 ,2.5822647127970262 },
{0.024693977573234764 ,0.019544055268788288 ,1.9611070187399209 ,-20.247851870649562 ,2.3568321547741342 },
{0.038733765155746253 ,0.030667594605217476 ,1.7358792419785591 ,-12.908634055830333 ,2.1317559981098682 },
{0.060710894498842323 ,0.048087203788268418 ,1.5110158816389729 ,-8.2357541282732107 ,1.9070502687553197 },
{0.095087507492990303 ,0.07534567638130947 ,1.2865218034504129 ,-5.2583142957749587 ,1.6827130506910861 },
{0.14881589164648179 ,0.1179673976691515 ,1.0624033970920173 ,-3.3598562254881403 ,1.4587568925553511 },
{0.23272951343232598 ,0.18456019070518948 ,0.83866118683724178 ,-2.1484168149794711 ,1.2351734039240887 },
{0.36368021928596278 ,0.28852643303350278 ,0.61529836362886292 ,-1.3748341908220436 ,1.0119743687192493 },
{0.56789454936972605 ,0.45071871177964989 ,0.39231087332132042 ,-0.88044514699942378 ,0.7891439757695452 },
{0.88610468810159015 ,0.70355895583540895 ,0.16969890747627747 ,-0.56426741299745387 ,0.56669429937671356 },
{1.3816335789411809 ,1.0974241494088073 ,-0.052546386371125395 ,-0.36189045172394879 ,0.34460093479119813 },
{2.1526786672071778 ,1.7105408556958117 ,-0.27442746816882907 ,-0.23226875781171988 ,0.1228777315698335 },
{3.3517682459038256 ,2.6643098700159529 ,-0.49595821236427673 ,-0.14917499162152598 ,-0.098509809827498018 },
{5.2150907594192395 ,4.1470315870077039 ,-0.71714321207357579 ,-0.095875608511112625 ,-0.31954403515440705 },
{8.1092990368633888 ,6.4506248753490381 ,-0.93800162035541612 ,-0.061657610322062573 ,-0.54027150545734171 },
{12.601356824411839 ,10.02743923506598 ,-1.1585394463412011 ,-0.039678266949109801 ,-0.76066803555627582 },
{19.571181442374591 ,15.578135417317394 ,-1.3787814512139773 ,-0.025547767847955451 ,-0.98079486406833993 },
{30.377546048984698 ,24.187602070756132 ,-1.5987341065707441 ,-0.016459525703417088 ,-1.2006176485831093 },
{47.129543514739666 ,37.535139255177242 ,-1.8184282379350387 ,-0.010609056712879615 ,-1.4202158168510295 },
{73.079549140200285 ,58.219475920898645 ,-2.0378695386899723 ,-0.0068418593968166087 ,-1.6395400702828336 },
{113.27901633813272 ,90.261352592829454 ,-2.2570955536894606 ,-0.0044138801356424489 ,-1.8586927624637517 },
{175.50631439356027 ,139.88074429844423 ,-2.4761096010815353 ,-0.0028489003471338704 ,-2.0776033000923535 },
{271.85671771564853 ,216.69890827860715 ,-2.694956895110808 ,-0.0018392041373904184 ,-2.2964033664368069 },
{420.92464268160882 ,335.59785527230628 ,-2.9136363303529835 ,-0.0011878610784453508 ,-2.514992700065275 },
{651.67787232666603 ,519.59734639612975 ,-3.1322021643127242 ,-0.00076725023394590001 ,-2.7335409787326248 },
{1008.5639944354145 ,804.30768425774158 ,-3.3506462376438617 ,-0.00049575436239908258 ,-2.9519071944619824 },
{1561.0124734004803 ,1244.8233916434529 ,-3.5690338212856458 ,-0.00032030493575160813 ,-3.1703107448031909 },
{2415.300015438163 ,1926.4004262204676 ,-3.7873463095556081 ,-0.00020701362017310089 ,-3.3885551834207046 },
{0 ,2981 ,-4.0056627851567184 ,-0 ,-0 }}}
};
double sigmoid(const double x) { return(0.5 * (1.0 + tanh(x / 2))); }
double first_deriv_sigmoid(const double x) { return(sigmoid(x) * (1.0 - sigmoid(x))); }
@@ -174,6 +309,15 @@ double calculate_error_pct(const DnnActivationType fun,
min_val = max_val = sigmoid(l_bound); break;
case kActTanh:
min_val = max_val = tanh(l_bound); break;\
case kActExp:
min_val = max_val = exp(l_bound);
break;
case kActLog:
min_val = max_val = log(l_bound);
break;
case kActNegHalfLog:
min_val = max_val = neghalflog(l_bound);
break;
default:
break;
}
@@ -188,6 +332,15 @@ double calculate_error_pct(const DnnActivationType fun,
case kActTanh:
val = tanh(arg);
break;
case kActExp:
val = exp(arg);
break;
case kActLog:
val = log(arg);
break;
case kActNegHalfLog:
val = neghalflog(arg);
break;
default:
break;
}
@@ -209,6 +362,7 @@ bool split_search(const DnnActivationType fun,
switch (fun) {
case kActSigmoid:
case kActTanh:
case kActExp:
if ((l_bound < 0.0) && (u_bound > 0.0)) {
is_split = true;
}
@@ -254,7 +408,9 @@ std::vector<pwl_t> pwl_search(const DnnActivationType fun,
pwl = pwl_search(fun, l_bound, 0.0, threshold, allowed_err_pct, samples, err_pct1);
pwl = negative_pwl(pwl);
pwl2 = pwl_search(fun, 0.0, u_bound, threshold, allowed_err_pct, samples, err_pct2);
if (fun == kActExp) {
pwl2 = negative_pwl(pwl2); // both regions of exp are concave
}
// merge
pwl.pop_back(); // remove final alpha and beta from first half
pwl.insert(pwl.end(), pwl2.begin(), pwl2.end()); // concatenate the two halves
@@ -274,10 +430,12 @@ std::vector<pwl_t> pwl_search(const DnnActivationType fun,
pwl[0].alpha = pwl[0].t = pwl[0].beta = -std::numeric_limits<float>::infinity();
pwl[0].m = 0.0;
pwl[0].b = pwl[0].beta = KALDI_LSTM_CLIP_LOWER;
pwl[1].alpha = pwl[0].t = pwl[1].beta = KALDI_LSTM_CLIP_LOWER;
//pwl[1].alpha = pwl[0].t = pwl[1].beta = KALDI_LSTM_CLIP_LOWER;
pwl[1].alpha = pwl[1].t = pwl[1].beta = KALDI_LSTM_CLIP_LOWER;
pwl[1].m = 1.0;
pwl[1].b = 0.0;
pwl[2].alpha = pwl[0].t = pwl[1].beta = KALDI_LSTM_CLIP_UPPER;
//pwl[2].alpha = pwl[0].t = pwl[1].beta = KALDI_LSTM_CLIP_UPPER;
pwl[2].alpha = pwl[2].t = pwl[2].beta = KALDI_LSTM_CLIP_UPPER;
pwl[2].m = 0.0;
pwl[2].b = KALDI_LSTM_CLIP_UPPER;
pwl[3].alpha = pwl[3].beta = std::numeric_limits<float>::infinity();
@@ -294,6 +452,17 @@ std::vector<pwl_t> pwl_search(const DnnActivationType fun,
if (u_bound == 0) negative = true; // make left half convex
err = pivot_search(pwl, tanh, first_deriv_tanh, n_segments, l_bound, u_bound, threshold, negative);
break;
case kActExp:
negative = true; // make function convex
err = pivot_search(pwl, exp, first_deriv_exp, n_segments, l_bound, u_bound, threshold, negative);
break;
case kActLog:
err = pivot_search(pwl, log, first_deriv_log, n_segments, l_bound, u_bound, threshold, negative);
break;
case kActNegHalfLog:
negative = true; // make function convex
err = pivot_search(pwl, neghalflog, first_deriv_neghalflog, n_segments, l_bound, u_bound, threshold, negative);
break;
default:
break;
}
@@ -308,6 +477,15 @@ std::vector<pwl_t> pwl_search(const DnnActivationType fun,
case kActTanh:
err = pivot_search(pwl, tanh, first_deriv_tanh, n_segments, l_bound, u_bound, threshold, negative);
break;
case kActExp:
err = pivot_search(pwl, exp, first_deriv_exp, n_segments, l_bound, u_bound, threshold, negative);
break;
case kActLog:
err = pivot_search(pwl, log, first_deriv_log, n_segments, l_bound, u_bound, threshold, negative);
break;
case kActNegHalfLog:
err = pivot_search(pwl, neghalflog, first_deriv_neghalflog, n_segments, l_bound, u_bound, threshold, negative);
break;
default:
break;
}
@@ -326,30 +504,80 @@ std::vector<pwl_t> pwl_search(const DnnActivationType fun,
void PwlDesignOpt16(const DnnActivation activation_type,
std::vector<intel_pwl_segment_t> &ptr_segment,
const float scale_in,
const float scale_out) {
const float scale_out,
const uint32_t n) {
std::vector<pwl_t> pwl;
double err_pct = 0.0;
switch (activation_type) {
case kActSigmoid:
pwl = pwl_search(kActSigmoid, -SIGMOID_DOMAIN, SIGMOID_DOMAIN, PWL_DESIGN_THRESHOLD, PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
make_gna_pwl(activation_type, pwl, -SIGMOID_DOMAIN, SIGMOID_DOMAIN, scale_in, scale_out, ptr_segment);
if ( pwl_search_map.find("sigmoid") == pwl_search_map.end() ) {
pwl = pwl_search(kActSigmoid, -SIGMOID_DOMAIN, SIGMOID_DOMAIN, PWL_DESIGN_THRESHOLD, PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
} else {
pwl = pwl_search_map["sigmoid"];
}
make_gna_pwl(activation_type, pwl, -SIGMOID_DOMAIN, SIGMOID_DOMAIN, scale_in, scale_out, ptr_segment, n);
break;
case kActTanh:
pwl = pwl_search(kActTanh, -TANH_DOMAIN, TANH_DOMAIN, PWL_DESIGN_THRESHOLD, PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
make_gna_pwl(activation_type, pwl, -TANH_DOMAIN, TANH_DOMAIN, scale_in, scale_out, ptr_segment);
if ( pwl_search_map.find("tanh") == pwl_search_map.end() ) {
pwl = pwl_search(kActTanh, -TANH_DOMAIN, TANH_DOMAIN, PWL_DESIGN_THRESHOLD, PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
} else {
pwl = pwl_search_map["tanh"];
}
make_gna_pwl(activation_type, pwl, -TANH_DOMAIN, TANH_DOMAIN, scale_in, scale_out, ptr_segment, n);
break;
case kActRelu:
make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment);
make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment, n);
break;
case kActLeakyRelu:
make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment);
make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment, n);
break;
case kActIdentity:
make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment);
make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment, n);
break;
case kActKaldiLstmClipping:
make_gna_pwl(activation_type, pwl, KALDI_LSTM_CLIP_LOWER, KALDI_LSTM_CLIP_UPPER, scale_in, scale_out, ptr_segment);
make_gna_pwl(activation_type, pwl, KALDI_LSTM_CLIP_LOWER, KALDI_LSTM_CLIP_UPPER, scale_in, scale_out, ptr_segment, n);
break;
case kActDivByN: {
int32_t n_4 = n/4;
make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment, n_4);
break;
}
case kActLog: {
double x_min = (1 + ~XBASEMASK) / scale_in;
double x_max = ((INT32_MAX / scale_in) < LOG_DOMAIN) ? (INT32_MAX / scale_in) : LOG_DOMAIN;
if ( pwl_search_map.find("log") == pwl_search_map.end() ) {
pwl = pwl_search(kActLog, x_min, x_max, PWL_DESIGN_THRESHOLD, 0.066*PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
} else {
pwl = pwl_search_map["log"];
}
make_gna_pwl(activation_type, pwl, x_min, x_max, scale_in, scale_out, ptr_segment, n);
break;
}
case kActExp: {
double x_min = -log(scale_out);
double x_max = x_min + log(INT16_MAX);
if ( pwl_search_map.find("exp") == pwl_search_map.end() ) {
pwl = pwl_search(kActExp, x_min, x_max, PWL_DESIGN_THRESHOLD, 0.5*PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
} else {
pwl = pwl_search_map["exp"];
}
make_gna_pwl(activation_type, pwl, x_min, x_max, scale_in, scale_out, ptr_segment, n);
break;
}
case kActNegHalfLog: {
double x_min = (1 + ~XBASEMASK) / scale_in;
double x_max = ((INT32_MAX / scale_in) < LOG_DOMAIN) ? (INT32_MAX / scale_in) : LOG_DOMAIN;
if ( pwl_search_map.find("NegHalfLog") == pwl_search_map.end() ) {
pwl = pwl_search(kActNegHalfLog, x_min, x_max, PWL_DESIGN_THRESHOLD, 0.066*PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
pwl = negative_pwl(pwl);
} else {
pwl = pwl_search_map["NegHalfLog"];
}
make_gna_pwl(activation_type, pwl, x_min, x_max, scale_in, scale_out, ptr_segment, n);
break;
}
default:
break;
}
@@ -359,7 +587,8 @@ void PwlDesign16(const DnnActivation activation_type,
intel_pwl_segment_t *ptr_segment,
const uint32_t num_segments,
const float scale_in,
const float scale_out) {
const float scale_out,
const uint32_t n) {
switch (activation_type) {
case kActSigmoid:
{
@@ -651,6 +880,27 @@ void PwlApply32(intel_dnn_component_t *component,
}
}
break;
case kActDivByN:
for (uint32_t i = num_row_start; i <= num_row_end; i++) {
for (uint32_t j = num_col_start; j <= num_col_end; j++) {
ptr_out[i * num_columns + j] = ptr_in[i * num_columns + j]/(float)(num_row_end-num_row_start+1);
}
}
break;
case kActExp:
for (uint32_t i = num_row_start; i <= num_row_end; i++) {
for (uint32_t j = num_col_start; j <= num_col_end; j++) {
ptr_out[i * num_columns + j] = exp(ptr_in[i * num_columns + j]);
}
}
break;
case kActLog:
for (uint32_t i = num_row_start; i <= num_row_end; i++) {
for (uint32_t j = num_col_start; j <= num_col_end; j++) {
ptr_out[i * num_columns + j] = log(ptr_in[i * num_columns + j]);
}
}
break;
case kActCustom:
// break;
default:fprintf(stderr, "Unknown piecewise linear function type!\n");

View File

@@ -27,6 +27,8 @@
#define XBASEMASK 0xFFFFFFFC // only top 30 bits are used
#define KALDI_LSTM_CLIP_LOWER (-50.0)
#define KALDI_LSTM_CLIP_UPPER (50.0)
#define LOG_DOMAIN (2981.0)
#define EXP_DOMAIN (8.0)
typedef struct {
double t;
@@ -88,8 +90,10 @@ void PwlDesign16(const DnnActivation activation_type,
intel_pwl_segment_t *ptr_segment,
const uint32_t num_segments,
const float scale_in,
const float scale_out);
const float scale_out,
const uint32_t n);
void PwlDesignOpt16(const DnnActivation activation_type,
std::vector<intel_pwl_segment_t> &ptr_segment,
const float scale_in,
const float scale_out);
const float scale_out,
const uint32_t n);

View File

@@ -14,17 +14,13 @@ set(builder_files_src ${CMAKE_CURRENT_SOURCE_DIR}/builders/ie_layer_decorator.cp
${CMAKE_CURRENT_SOURCE_DIR}/builders/ie_const_layer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/builders/ie_split_layer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/builders/ie_pooling_layer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/builders/ie_divbyn_layer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/builders/ie_network_builder.cpp)
file (GLOB LIBRARY_SRC
${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/low_precision_transformations/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/low_precision_transformations/common/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/transform/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/transform/transformations/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/transform/transformations/fusion/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/transform/transformations/utils/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ngraph_ops/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/low_precision_transformations/common/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp_interfaces/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/shape_infer/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/shape_infer/built-in/*.cpp
@@ -55,6 +51,7 @@ file (GLOB LIBRARY_HEADERS
)
if (NOT ENABLE_NGRAPH)
message(error " removing enable ngraph")
list(REMOVE_ITEM LIBRARY_SRC "${CMAKE_CURRENT_SOURCE_DIR}/ie_cnn_layer_builder_ngraph.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/cnn_network_ngraph_impl.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/ie_ir_parser.cpp"
@@ -318,7 +315,7 @@ install(TARGETS ${TARGET_NAME} ${TARGET_NAME}_nn_builder
LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH}
COMPONENT core)
install(FILES "${OpenVINO_BINARY_DIR}/share/ie_parallel.cmake"
"${OpenVINO_BINARY_DIR}/share/ie_rh_decoder.cmake"
#"${OpenVINO_BINARY_DIR}/share/ie_rh_decoder.cmake"
"${OpenVINO_BINARY_DIR}/share/InferenceEngineConfig.cmake"
"${OpenVINO_BINARY_DIR}/share/InferenceEngineConfig-version.cmake"
DESTINATION ${IE_CPACK_IE_DIR}/share

View File

@@ -0,0 +1,37 @@
// Copyright (C) 2018-2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <builders/ie_divbyn_layer.hpp>
#include <string>
using namespace InferenceEngine;
Builder::DivByNLayer::DivByNLayer(const std::string& name): LayerDecorator("DivByN", name) {
getLayer()->getOutputPorts().resize(1);
getLayer()->getInputPorts().resize(1);
}
Builder::DivByNLayer::DivByNLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
checkType("DivByN");
}
Builder::DivByNLayer::DivByNLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
checkType("DivByN");
}
Builder::DivByNLayer& Builder::DivByNLayer::setName(const std::string& name) {
getLayer()->setName(name);
return *this;
}
const Port& Builder::DivByNLayer::getPort() const {
return getLayer()->getOutputPorts()[0];
}
Builder::DivByNLayer& Builder::DivByNLayer::setPort(const Port &port) {
getLayer()->getOutputPorts()[0] = port;
getLayer()->getInputPorts()[0] = port;
return *this;
}

View File

@@ -0,0 +1,37 @@
// Copyright (C) 2018-2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <builders/ie_exp_layer.hpp>
#include <string>
using namespace InferenceEngine;
Builder::ExpLayer::ExpLayer(const std::string& name): LayerDecorator("Exp", name) {
getLayer()->getOutputPorts().resize(1);
getLayer()->getInputPorts().resize(1);
}
Builder::ExpLayer::ExpLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
checkType("Exp");
}
Builder::ExpLayer::ExpLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
checkType("Exp");
}
Builder::ExpLayer& Builder::ExpLayer::setName(const std::string& name) {
getLayer()->setName(name);
return *this;
}
const Port& Builder::ExpLayer::getPort() const {
return getLayer()->getOutputPorts()[0];
}
Builder::ExpLayer& Builder::ExpLayer::setPort(const Port &port) {
getLayer()->getOutputPorts()[0] = port;
getLayer()->getInputPorts()[0] = port;
return *this;
}

View File

@@ -0,0 +1,37 @@
// Copyright (C) 2018-2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <builders/ie_identity_layer.hpp>
#include <string>
using namespace InferenceEngine;
Builder::IdentityLayer::IdentityLayer(const std::string& name): LayerDecorator("Identity", name) {
getLayer()->getOutputPorts().resize(1);
getLayer()->getInputPorts().resize(1);
}
Builder::IdentityLayer::IdentityLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
checkType("Identity");
}
Builder::IdentityLayer::IdentityLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
checkType("Identity");
}
Builder::IdentityLayer& Builder::IdentityLayer::setName(const std::string& name) {
getLayer()->setName(name);
return *this;
}
const Port& Builder::IdentityLayer::getPort() const {
return getLayer()->getOutputPorts()[0];
}
Builder::IdentityLayer& Builder::IdentityLayer::setPort(const Port &port) {
getLayer()->getOutputPorts()[0] = port;
getLayer()->getInputPorts()[0] = port;
return *this;
}

View File

@@ -0,0 +1,37 @@
// Copyright (C) 2018-2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <builders/ie_log_layer.hpp>
#include <string>
using namespace InferenceEngine;
Builder::LogLayer::LogLayer(const std::string& name): LayerDecorator("Log", name) {
getLayer()->getOutputPorts().resize(1);
getLayer()->getInputPorts().resize(1);
}
Builder::LogLayer::LogLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
checkType("Log");
}
Builder::LogLayer::LogLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
checkType("Log");
}
Builder::LogLayer& Builder::LogLayer::setName(const std::string& name) {
getLayer()->setName(name);
return *this;
}
const Port& Builder::LogLayer::getPort() const {
return getLayer()->getOutputPorts()[0];
}
Builder::LogLayer& Builder::LogLayer::setPort(const Port &port) {
getLayer()->getOutputPorts()[0] = port;
getLayer()->getInputPorts()[0] = port;
return *this;
}

View File

@@ -0,0 +1,37 @@
// Copyright (C) 2018-2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <builders/ie_neghalf_log_layer.hpp>
#include <string>
using namespace InferenceEngine;
Builder::NegHalfLogLayer::NegHalfLogLayer(const std::string& name): LayerDecorator("NegHalfLog", name) {
getLayer()->getOutputPorts().resize(1);
getLayer()->getInputPorts().resize(1);
}
Builder::NegHalfLogLayer::NegHalfLogLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
checkType("NegHalfLog");
}
Builder::NegHalfLogLayer::NegHalfLogLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
checkType("NegHalfLog");
}
Builder::NegHalfLogLayer& Builder::NegHalfLogLayer::setName(const std::string& name) {
getLayer()->setName(name);
return *this;
}
const Port& Builder::NegHalfLogLayer::getPort() const {
return getLayer()->getOutputPorts()[0];
}
Builder::NegHalfLogLayer& Builder::NegHalfLogLayer::setPort(const Port &port) {
getLayer()->getOutputPorts()[0] = port;
getLayer()->getInputPorts()[0] = port;
return *this;
}

View File

@@ -269,13 +269,14 @@ idx_t Builder::Network::addLayer(const std::vector<PortInfo>& inputs, const Laye
}
idx_t Builder::Network::addLayer(const Layer& layer) {
auto &layerParam = parameters["layers"].as<std::vector<Layer::Ptr>>();
auto getAvailableId = [&](idx_t defaultId) {
if (defaultId == (std::numeric_limits<idx_t>::max)()) defaultId = 0;
auto it = parameters["layers"].as<std::vector<Layer::Ptr>>().begin();
while (it != parameters["layers"].as<std::vector<Layer::Ptr>>().end()) {
for (it = parameters["layers"].as<std::vector<Layer::Ptr>>().begin();
it != parameters["layers"].as<std::vector<Layer::Ptr>>().end(); it++) {
auto it = layerParam.begin();
while (it != layerParam.end()) {
for (it = layerParam.begin();
it != layerParam.end(); it++) {
if ((*it)->getId() == defaultId) {
defaultId++;
break;
@@ -302,10 +303,8 @@ idx_t Builder::Network::addLayer(const Layer& layer) {
};
idx_t generatedId = getAvailableId(layer.getId());
const auto name = generateAvailableName(layer.getName(), generatedId);
parameters["layers"].as<std::vector<Layer::Ptr>>().emplace_back(std::make_shared<Layer>(generatedId, layer));
parameters["layers"]
.as<std::vector<Layer::Ptr>>()[parameters["layers"].as<std::vector<Layer::Ptr>>().size() - 1]
->setName(name);
layerParam.emplace_back(std::make_shared<Layer>(generatedId, layer));
layerParam[layerParam.size() - 1]->setName(name);
return generatedId;
}

View File

@@ -81,6 +81,11 @@ public:
{"elu", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("ELU")},
{"sigmoid", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("Sigmoid")},
{"tanh", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("TanH")},
{"neghalflog", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("NegHalfLog")},
{"log", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("Log")},
{"divbyn", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("DivByN")},
{"exp", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("exp")},
{"identity", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("identity")},
};
auto typeIt = layer->getParameters().find("type");

View File

@@ -13,7 +13,9 @@
#include <utility>
#include <vector>
#if defined(ENABLE_NGRAPH)
#include "cnn_network_ngraph_impl.hpp"
#endif
#include "debug.h"
#include "details/os/os_filesystem.hpp"
#include "ie_format_parser.h"

View File

@@ -15,7 +15,9 @@
#include <utility>
#include <vector>
#if defined(ENABLE_NGRAPH)
#include <ngraph/opsets/opset.hpp>
#endif
#include "cpp_interfaces/base/ie_plugin_base.hpp"
#include "details/caseless.hpp"
#include "details/ie_exception_conversion.hpp"
@@ -232,7 +234,6 @@ public:
*/
InferencePlugin GetCPPPluginByName(const std::string& deviceName) const {
IE_SUPPRESS_DEPRECATED_START
auto it = pluginRegistry.find(deviceName);
if (it == pluginRegistry.end()) {
THROW_IE_EXCEPTION << "Device with \"" << deviceName << "\" name is not registered in the InferenceEngine";
@@ -368,6 +369,7 @@ public:
}
void addExtension(const IExtensionPtr& extension) {
#if defined(ENABLE_NGRAPH)
std::map<std::string, ngraph::OpSet> opsets;
try {
opsets = extension->getOpSets();
@@ -377,6 +379,7 @@ public:
THROW_IE_EXCEPTION << "Cannot add opset with name: " << it.first << ". Opset with the same name already exists.";
opsetNames.insert(it.first);
}
#endif
extensions.emplace_back(extension);
}

View File

@@ -38,6 +38,11 @@ CNNLayer::Ptr ActivationLayerCreator::CreateLayer(pugi::xml_node& node, LayerPar
{"clamp", std::make_shared<LayerCreator<ClampLayer>>("Clamp")},
{"elu", std::make_shared<LayerCreator<CNNLayer>>("ELU")},
{"sigmoid", std::make_shared<LayerCreator<CNNLayer>>("Sigmoid")},
{"log", std::make_shared<LayerCreator<CNNLayer>>("Log")},
{"neghalflog", std::make_shared<LayerCreator<CNNLayer>>("NegHalfLog")},
{"divbyn", std::make_shared<LayerCreator<CNNLayer>>("DivByN")},
{"identity", std::make_shared<LayerCreator<CNNLayer>>("Identity")},
{"exp", std::make_shared<LayerCreator<CNNLayer>>("Exp")},
{"tanh", std::make_shared<LayerCreator<CNNLayer>>("TanH")},
};

View File

@@ -70,6 +70,13 @@ FillLayer::~FillLayer() {}
SelectLayer::~SelectLayer() {}
BroadcastLayer::~BroadcastLayer() {}
QuantizeLayer::~QuantizeLayer() {}
SigmoidLayer::~SigmoidLayer() {}
DivByNLayer::~DivByNLayer() {}
LogLayer::~LogLayer() {}
IdentityLayer::~IdentityLayer() {}
NegHalfLogLayer::~NegHalfLogLayer() {}
ExpLayer::~ExpLayer() {}
TanHLayer::~TanHLayer() {}
MathLayer::~MathLayer() {}
ReduceLayer::~ReduceLayer() {}
TopKLayer::~TopKLayer() {}

View File

@@ -134,6 +134,12 @@ CNNLayerPtr clonelayer(const CNNLayer& source) {
&layerCloneImpl<QuantizeLayer>,
&layerCloneImpl<BinaryConvolutionLayer>,
&layerCloneImpl<WeightableLayer>,
&layerCloneImpl<TanHLayer>,
&layerCloneImpl<LogLayer>,
&layerCloneImpl<NegHalfLogLayer>,
&layerCloneImpl<IdentityLayer>,
&layerCloneImpl<DivByNLayer>,
&layerCloneImpl<SigmoidLayer>,
&layerCloneImpl<OneHotLayer>,
&layerCloneImpl<CNNLayer>,
&layerCloneImpl<UniqueLayer>};

View File

@@ -31,7 +31,7 @@ using AllLayers =
ReshapeLayer*, TileLayer*, ScaleShiftLayer*, PReLULayer*, PowerLayer*, BatchNormalizationLayer*,
ClampLayer*, TensorIterator*, LSTMCell*, GRUCell*, RNNCell*, RNNSequenceLayer*, QuantizeLayer*,
BinaryConvolutionLayer*, WeightableLayer*, OneHotLayer*, MathLayer*, ReduceLayer*, UniqueLayer*,
NonMaxSuppressionLayer*, ScatterLayer*, CNNLayer*>;
NonMaxSuppressionLayer*, ScatterLayer*, TanHLayer*, SigmoidLayer*, LogLayer*, NegHalfLogLayer*,DivByNLayer*, IdentityLayer*, ExpLayer*, CNNLayer*>;
/**
* @brief checks whether type inxed as P has a parent among element in range I..N

View File

@@ -125,6 +125,10 @@ REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, ELU);
REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, TanH);
REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, Logistic);
REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, Sigmoid);
REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, DivByN);
REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, Identity);
REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, Log);
REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, NegHalfLog);
REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, PReLU);
REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, SoftMax);
REG_SHAPE_INFER_FOR_TYPE(EqualShapeProp, LogSoftMax);
@@ -219,7 +223,6 @@ REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Cosh);
REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Erf);
REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Floor);
REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, HardSigmoid);
REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Log);
REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Exp);
REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Neg);
REG_SHAPE_INFER_FOR_TYPE(MathShapeProp, Reciprocal);

View File

@@ -0,0 +1,16 @@
// Copyright (C) 2020 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
#include "other/add_output.hpp"
const auto addOutputParams =
::testing::Combine(::testing::Values("Memory_1"), ::testing::Values(CommonTestUtils::DEVICE_CPU));
INSTANTIATE_TEST_CASE_P(AddOutputBasic, AddOutputTestsCommonClass, addOutputParams,
AddOutputTestsCommonClass::getTestCaseName);
TEST_P(AddOutputTestsCommonClass, basic) {
run_test();
}

View File

@@ -0,0 +1,15 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "other/add_output.hpp"
const auto addOutputParams =
::testing::Combine(::testing::Values("Memory_1"), ::testing::Values(CommonTestUtils::DEVICE_GNA));
INSTANTIATE_TEST_CASE_P(AddOutputBasic, AddOutputTestsCommonClass, addOutputParams,
AddOutputTestsCommonClass::getTestCaseName);
TEST_P(AddOutputTestsCommonClass, basic) {
run_test();
}

View File

@@ -0,0 +1,26 @@
// Copyright (C) 2020 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <map>
#include "common_test_utils/common_layers_params.hpp"
#include "common_test_utils/common_utils.hpp"
#include "common_test_utils/test_common.hpp"
#include "common_test_utils/test_constants.hpp"
#include "common_test_utils/xml_net_builder/ir_net.hpp"
#include "common_test_utils/xml_net_builder/xml_filler.hpp"
#include "ie_core.hpp"
class AddOutputTestsCommonClass : public CommonTestUtils::TestsCommon,
public testing::WithParamInterface<std::tuple<std::string, std::string>> {
private:
static std::string generate_model();
public:
static std::string getTestCaseName(testing::TestParamInfo<std::tuple<std::string, std::string>> obj);
void run_test();
};

View File

@@ -0,0 +1,76 @@
// Copyright (C) 2020 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
#include "other/add_output.hpp"
// TODO: Replace IRBuilder with NGraph when it supports Memory Layer
std::string AddOutputTestsCommonClass::generate_model() {
CommonTestUtils::IRBuilder_v6 test_model_builder("model");
auto precision = InferenceEngine::Precision::FP32;
auto Memory_1_layer =
test_model_builder.AddLayer("Memory_1", "Memory", precision, {{"id", "r_1-3"}, {"index", "1"}, {"size", "2"}})
.AddOutPort({1, 200})
.getLayer();
auto Input_2_layer = test_model_builder.AddLayer("Input_2", "input", precision).AddOutPort({1, 200}).getLayer();
auto Eltwise_3_layer = test_model_builder.AddLayer("Eltwise_3", "Eltwise", precision, {{"operation", "mul"}})
.AddInPort({1, 200})
.AddInPort({1, 200})
.AddOutPort({1, 200})
.getLayer();
auto Activation_4_layer =
test_model_builder.AddLayer("Activation_4", "Activation", precision, {{"type", "sigmoid"}})
.AddInPort({1, 200})
.AddOutPort({1, 200})
.getLayer();
auto Memory_5_layer =
test_model_builder.AddLayer("Memory_5", "Memory", precision, {{"id", "r_1-3"}, {"index", "0"}, {"size", "2"}})
.AddInPort({1, 200})
.getLayer();
test_model_builder.AddEdge(Memory_1_layer.out(0), Eltwise_3_layer.in(0));
test_model_builder.AddEdge(Input_2_layer.out(0), Eltwise_3_layer.in(1));
test_model_builder.AddEdge(Eltwise_3_layer.out(0), Activation_4_layer.in(0));
test_model_builder.AddEdge(Activation_4_layer.out(0), Memory_5_layer.in(0));
auto serial = test_model_builder.serialize();
return serial;
}
std::string AddOutputTestsCommonClass::getTestCaseName(
testing::TestParamInfo<std::tuple<std::string, std::string>> obj) {
std::string layer;
std::string engine;
std::tie(layer, engine) = obj.param;
return layer + "_" + engine;
}
void AddOutputTestsCommonClass::run_test() {
std::string layer_name;
std::string engine_type;
std::tie(layer_name, engine_type) = this->GetParam();
auto model = this->generate_model();
InferenceEngine::Core ie;
InferenceEngine::CNNNetwork network;
InferenceEngine::ExecutableNetwork executableNet;
auto null_blob = CommonTestUtils::getWeightsBlob(0);
network = ie.ReadNetwork(model, null_blob);
network.addOutput(layer_name);
executableNet = ie.LoadNetwork(network, engine_type);
auto outputs = executableNet.GetOutputsInfo();
auto layer_output = outputs[layer_name];
ASSERT_EQ(true, layer_output && "layer not found in outputs");
}

View File

@@ -36,7 +36,6 @@ function(build_with_lto)
endif()
add_subdirectory(pugixml)
add_subdirectory(stb_lib)
add_subdirectory(ade)
add_subdirectory(fluid/modules/gapi)

View File

@@ -36,7 +36,7 @@ if(DEFINED BUILD_DEFINES)
endforeach()
endif()
if(BUILD_SHARED_LIBS)
if(NOT BUILD_SHARED_LIBS)
add_library(pugixml SHARED ${SOURCES})
else()
add_library(pugixml STATIC ${SOURCES})

View File

@@ -1,20 +0,0 @@
#===============================================================================
# Copyright (C) 2018-2019 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#===============================================================================
set(TARGET stb_image)
add_library(${TARGET} STATIC stb_image.cpp)
target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})

View File

@@ -1,11 +0,0 @@
#define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"
#define STB_IMAGE_RESIZE_IMPLEMENTATION
#define STBIR_DEFAULT_FILTER_UPSAMPLE STBIR_FILTER_BOX
#define STBIR_DEFAULT_FILTER_DOWNSAMPLE STBIR_FILTER_BOX
#include "stb_image_resize.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff