Warning as error for Windows (#13291)
* parent6e7016ccda
author Ilya Churaev <ilya.churaev@intel.com> 1664281499 +0400 committer Ilya Churaev <ilya.churaev@intel.com> 1664510018 +0400 Fixed warnings on local machine * Added CMAKE_COMPILE_WARNING_AS_ERROR usage * Fixed style * Fixed merge conflicts * Fixed typo * Fixed myriad build for macOS * Fixed warning * Fixed tests * Disabled incorrect test * Try to fix linux tests * Revert "Try to fix linux tests" This reverts commit29224c93ff
. * Fixed tests * Revert logic with incorrect cast * Fixed log softmax * Disable warning as error for cuda * Try to fix inference_engine_s * Fixed cmake * Revert "Fixed cmake" This reverts commit87e9e4e674
. * Revert "Try to fix inference_engine_s" This reverts commita1adca8b05
. * WA for static symbols in inference_engine_s test library * Fixed code style * Fixed static definition for master * Revert "Fixed static definition for master" This reverts commit20d00d215a
. * Revert "Fixed code style" This reverts commit0eb2362543
. * Revert "WA for static symbols in inference_engine_s test library" This reverts commit75ef86a79d
. * Fixed linker issue for Windows * Disable WaE by default * Disable warning as error in the developer package * Try to fix dev package * Try to fix Windows Jenkins * Revert old behavior for tread_warn_as_err variable
This commit is contained in:
parent
25f85a3beb
commit
8a9c19e3eb
@ -120,6 +120,7 @@ jobs:
|
|||||||
-DVERBOSE_BUILD=ON
|
-DVERBOSE_BUILD=ON
|
||||||
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
|
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
|
||||||
-DCMAKE_TOOLCHAIN_FILE=$(ANDROID_TOOLS)/ndk-bundle/build/cmake/android.toolchain.cmake
|
-DCMAKE_TOOLCHAIN_FILE=$(ANDROID_TOOLS)/ndk-bundle/build/cmake/android.toolchain.cmake
|
||||||
|
-DCMAKE_COMPILE_WARNING_AS_ERROR=ON
|
||||||
-DANDROID_ABI=$(ANDROID_ABI_CONFIG)
|
-DANDROID_ABI=$(ANDROID_ABI_CONFIG)
|
||||||
-DANDROID_STL=c++_shared
|
-DANDROID_STL=c++_shared
|
||||||
-DANDROID_PLATFORM=$(ANDROID_SDK_VERSION)
|
-DANDROID_PLATFORM=$(ANDROID_SDK_VERSION)
|
||||||
|
@ -177,6 +177,7 @@ jobs:
|
|||||||
-GNinja
|
-GNinja
|
||||||
-DVERBOSE_BUILD=ON
|
-DVERBOSE_BUILD=ON
|
||||||
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
|
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
|
||||||
|
-DCMAKE_COMPILE_WARNING_AS_ERROR=ON
|
||||||
-DENABLE_PYTHON=ON
|
-DENABLE_PYTHON=ON
|
||||||
-DBUILD_SHARED_LIBS=$(CMAKE_BUILD_SHARED_LIBS)
|
-DBUILD_SHARED_LIBS=$(CMAKE_BUILD_SHARED_LIBS)
|
||||||
-DENABLE_ONEDNN_FOR_GPU=$(CMAKE_BUILD_SHARED_LIBS)
|
-DENABLE_ONEDNN_FOR_GPU=$(CMAKE_BUILD_SHARED_LIBS)
|
||||||
|
@ -139,6 +139,7 @@ jobs:
|
|||||||
cmakeArgs: >
|
cmakeArgs: >
|
||||||
-GNinja
|
-GNinja
|
||||||
-DVERBOSE_BUILD=ON
|
-DVERBOSE_BUILD=ON
|
||||||
|
-DCMAKE_COMPILE_WARNING_AS_ERROR=ON
|
||||||
-DOpenCV_DIR=$(INSTALL_OPENCV)/cmake
|
-DOpenCV_DIR=$(INSTALL_OPENCV)/cmake
|
||||||
-DPYTHON_INCLUDE_DIRS=$(INSTALL_PYTHON)/include/python3.8
|
-DPYTHON_INCLUDE_DIRS=$(INSTALL_PYTHON)/include/python3.8
|
||||||
-DPYTHON_LIBRARY=$(INSTALL_PYTHON)/lib/libpython3.8.so
|
-DPYTHON_LIBRARY=$(INSTALL_PYTHON)/lib/libpython3.8.so
|
||||||
|
@ -88,6 +88,7 @@ jobs:
|
|||||||
cmakeArgs: >
|
cmakeArgs: >
|
||||||
-GNinja
|
-GNinja
|
||||||
-DVERBOSE_BUILD=ON
|
-DVERBOSE_BUILD=ON
|
||||||
|
-DCMAKE_COMPILE_WARNING_AS_ERROR=ON
|
||||||
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
|
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
|
||||||
-DENABLE_FASTER_BUILD=ON
|
-DENABLE_FASTER_BUILD=ON
|
||||||
-DENABLE_PROFILING_ITT=ON
|
-DENABLE_PROFILING_ITT=ON
|
||||||
|
@ -122,7 +122,6 @@ jobs:
|
|||||||
-DVERBOSE_BUILD=ON
|
-DVERBOSE_BUILD=ON
|
||||||
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
|
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
|
||||||
-DIE_EXTRA_MODULES=/root/repos/openvino_contrib/modules
|
-DIE_EXTRA_MODULES=/root/repos/openvino_contrib/modules
|
||||||
-DNGRAPH_ONNX_IMPORT_ENABLE=OFF
|
|
||||||
-DENABLE_MKL_DNN=OFF
|
-DENABLE_MKL_DNN=OFF
|
||||||
-DENABLE_CLDNN=OFF
|
-DENABLE_CLDNN=OFF
|
||||||
-DENABLE_VPU=OFF
|
-DENABLE_VPU=OFF
|
||||||
|
@ -155,6 +155,7 @@ jobs:
|
|||||||
-GNinja
|
-GNinja
|
||||||
-DVERBOSE_BUILD=ON
|
-DVERBOSE_BUILD=ON
|
||||||
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
|
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
|
||||||
|
-DCMAKE_COMPILE_WARNING_AS_ERROR=ON
|
||||||
-DENABLE_PYTHON=ON
|
-DENABLE_PYTHON=ON
|
||||||
-DENABLE_OPENCV=OFF
|
-DENABLE_OPENCV=OFF
|
||||||
-DPYTHON_EXECUTABLE=/usr/bin/python3.8
|
-DPYTHON_EXECUTABLE=/usr/bin/python3.8
|
||||||
|
@ -104,6 +104,7 @@ jobs:
|
|||||||
cmakeArgs: >
|
cmakeArgs: >
|
||||||
-GNinja
|
-GNinja
|
||||||
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
|
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
|
||||||
|
-DCMAKE_COMPILE_WARNING_AS_ERROR=ON
|
||||||
-DENABLE_PYTHON=ON
|
-DENABLE_PYTHON=ON
|
||||||
-DPYTHON_EXECUTABLE=/usr/bin/python3.8
|
-DPYTHON_EXECUTABLE=/usr/bin/python3.8
|
||||||
-DENABLE_INTEL_MYRIAD_COMMON=OFF
|
-DENABLE_INTEL_MYRIAD_COMMON=OFF
|
||||||
|
@ -117,6 +117,7 @@ jobs:
|
|||||||
cmake -GNinja -DVERBOSE_BUILD=ON \
|
cmake -GNinja -DVERBOSE_BUILD=ON \
|
||||||
-DENABLE_REQUIREMENTS_INSTALL=OFF \
|
-DENABLE_REQUIREMENTS_INSTALL=OFF \
|
||||||
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \
|
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \
|
||||||
|
-DCMAKE_COMPILE_WARNING_AS_ERROR=ON
|
||||||
-DENABLE_PYTHON=ON \
|
-DENABLE_PYTHON=ON \
|
||||||
-DENABLE_TESTS=OFF \
|
-DENABLE_TESTS=OFF \
|
||||||
-DENABLE_STRICT_DEPENDENCIES=OFF \
|
-DENABLE_STRICT_DEPENDENCIES=OFF \
|
||||||
|
@ -156,6 +156,7 @@ jobs:
|
|||||||
-DENABLE_FASTER_BUILD=ON ^
|
-DENABLE_FASTER_BUILD=ON ^
|
||||||
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE) ^
|
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE) ^
|
||||||
-DENABLE_TESTS=ON ^
|
-DENABLE_TESTS=ON ^
|
||||||
|
-DCMAKE_COMPILE_WARNING_AS_ERROR=ON ^
|
||||||
-DENABLE_STRICT_DEPENDENCIES=OFF ^
|
-DENABLE_STRICT_DEPENDENCIES=OFF ^
|
||||||
-DENABLE_PYTHON=ON ^
|
-DENABLE_PYTHON=ON ^
|
||||||
-DBUILD_nvidia_plugin=OFF ^
|
-DBUILD_nvidia_plugin=OFF ^
|
||||||
|
@ -78,7 +78,7 @@ jobs:
|
|||||||
|
|
||||||
- script: |
|
- script: |
|
||||||
set PATH=$(WORK_DIR)\ninja-win;%PATH%
|
set PATH=$(WORK_DIR)\ninja-win;%PATH%
|
||||||
call "$(MSVS_VARS_PATH)" && cmake -GNinja -DENABLE_FASTER_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
|
call "$(MSVS_VARS_PATH)" && cmake -GNinja -DENABLE_FASTER_BUILD=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
|
||||||
workingDirectory: $(BUILD_DIR)
|
workingDirectory: $(BUILD_DIR)
|
||||||
displayName: 'CMake'
|
displayName: 'CMake'
|
||||||
|
|
||||||
|
@ -264,12 +264,9 @@ if(WIN32)
|
|||||||
ie_add_compiler_flags(/Gy) # remove unreferenced functions: function level linking
|
ie_add_compiler_flags(/Gy) # remove unreferenced functions: function level linking
|
||||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LARGEADDRESSAWARE")
|
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LARGEADDRESSAWARE")
|
||||||
|
|
||||||
if (TREAT_WARNING_AS_ERROR)
|
if (CMAKE_COMPILE_WARNING_AS_ERROR)
|
||||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||||
ie_add_compiler_flags(/WX)
|
|
||||||
ie_add_compiler_flags(/Qdiag-warning:47,1740,1786)
|
ie_add_compiler_flags(/Qdiag-warning:47,1740,1786)
|
||||||
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
|
||||||
# ie_add_compiler_flags(/WX) # Too many warnings
|
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
@ -310,12 +307,6 @@ if(WIN32)
|
|||||||
string(REPLACE "/Zi" "/Z7" CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO}")
|
string(REPLACE "/Zi" "/Z7" CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO}")
|
||||||
string(REPLACE "/Zi" "/Z7" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
|
string(REPLACE "/Zi" "/Z7" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
|
||||||
else()
|
else()
|
||||||
# TODO: enable for C sources as well
|
|
||||||
# ie_add_compiler_flags(-Werror)
|
|
||||||
if(TREAT_WARNING_AS_ERROR)
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
ie_add_compiler_flags(-ffunction-sections -fdata-sections)
|
ie_add_compiler_flags(-ffunction-sections -fdata-sections)
|
||||||
ie_add_compiler_flags(-fdiagnostics-show-option)
|
ie_add_compiler_flags(-fdiagnostics-show-option)
|
||||||
ie_add_compiler_flags(-Wundef)
|
ie_add_compiler_flags(-Wundef)
|
||||||
|
@ -17,13 +17,17 @@ else()
|
|||||||
ie_option(USE_BUILD_TYPE_SUBFOLDER "Create dedicated sub-folder per build type for output binaries" ON)
|
ie_option(USE_BUILD_TYPE_SUBFOLDER "Create dedicated sub-folder per build type for output binaries" ON)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# FIXME: ARM cross-compiler generates several "false positive" warnings regarding __builtin_memcpy buffer overflow
|
if(CI_BUILD_NUMBER)
|
||||||
if(X86 OR X86_64)
|
|
||||||
set(TREAT_WARNING_AS_ERROR_DEFAULT ON)
|
set(TREAT_WARNING_AS_ERROR_DEFAULT ON)
|
||||||
else()
|
else()
|
||||||
set(TREAT_WARNING_AS_ERROR_DEFAULT OFF)
|
set(TREAT_WARNING_AS_ERROR_DEFAULT OFF)
|
||||||
endif()
|
endif()
|
||||||
ie_option (TREAT_WARNING_AS_ERROR "Treat build warnings as errors" ${TREAT_WARNING_AS_ERROR_DEFAULT})
|
|
||||||
|
ie_dependent_option (TREAT_WARNING_AS_ERROR "WILL BE REMOVED SOON, NEED TO FIX PRIVATE COMPONENTS" ON "X86_64 OR X86" OFF)
|
||||||
|
|
||||||
|
if(NOT DEFINED CMAKE_COMPILE_WARNING_AS_ERROR)
|
||||||
|
set(CMAKE_COMPILE_WARNING_AS_ERROR ${TREAT_WARNING_AS_ERROR_DEFAULT})
|
||||||
|
endif()
|
||||||
|
|
||||||
ie_dependent_option (ENABLE_INTEGRITYCHECK "build DLLs with /INTEGRITYCHECK flag" OFF "CMAKE_CXX_COMPILER_ID STREQUAL MSVC" OFF)
|
ie_dependent_option (ENABLE_INTEGRITYCHECK "build DLLs with /INTEGRITYCHECK flag" OFF "CMAKE_CXX_COMPILER_ID STREQUAL MSVC" OFF)
|
||||||
|
|
||||||
|
@ -31,6 +31,11 @@ message(" ")
|
|||||||
# for samples in 3rd party projects
|
# for samples in 3rd party projects
|
||||||
set_and_check(gflags_DIR "@gflags_BINARY_DIR@")
|
set_and_check(gflags_DIR "@gflags_BINARY_DIR@")
|
||||||
|
|
||||||
|
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||||
|
# Disable warning as error for private components
|
||||||
|
set(CMAKE_COMPILE_WARNING_AS_ERROR OFF)
|
||||||
|
endif()
|
||||||
|
|
||||||
#
|
#
|
||||||
# Content
|
# Content
|
||||||
#
|
#
|
||||||
|
@ -29,6 +29,11 @@ message(" ")
|
|||||||
# for samples in 3rd party projects
|
# for samples in 3rd party projects
|
||||||
set_and_check(gflags_DIR "@gflags_BINARY_DIR@")
|
set_and_check(gflags_DIR "@gflags_BINARY_DIR@")
|
||||||
|
|
||||||
|
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||||
|
# Disable warning as error for private components
|
||||||
|
set(CMAKE_COMPILE_WARNING_AS_ERROR OFF)
|
||||||
|
endif()
|
||||||
|
|
||||||
#
|
#
|
||||||
# Content
|
# Content
|
||||||
#
|
#
|
||||||
|
@ -23,7 +23,7 @@ ppp.input("input").preprocess().mean(128).scale(127);
|
|||||||
ppp.input("input").model().set_layout("NCHW"); // N=1, C=3, H=224, W=224
|
ppp.input("input").model().set_layout("NCHW"); // N=1, C=3, H=224, W=224
|
||||||
// Mean/Scale has 3 values which matches with C=3
|
// Mean/Scale has 3 values which matches with C=3
|
||||||
ppp.input("input").preprocess()
|
ppp.input("input").preprocess()
|
||||||
.mean({103.94, 116.78, 123.68}).scale({57.21, 57.45, 57.73});
|
.mean({103.94f, 116.78f, 123.68f}).scale({57.21f, 57.45f, 57.73f});
|
||||||
//! [ov:preprocess:mean_scale_array]
|
//! [ov:preprocess:mean_scale_array]
|
||||||
|
|
||||||
//! [ov:preprocess:convert_element_type]
|
//! [ov:preprocess:convert_element_type]
|
||||||
@ -186,8 +186,8 @@ void save_example() {
|
|||||||
.convert_element_type()
|
.convert_element_type()
|
||||||
.convert_color(ov::preprocess::ColorFormat::RGB)
|
.convert_color(ov::preprocess::ColorFormat::RGB)
|
||||||
.resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR)
|
.resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR)
|
||||||
.mean({123.675, 116.28, 103.53}) // Subtract mean after color conversion
|
.mean({123.675f, 116.28f, 103.53f}) // Subtract mean after color conversion
|
||||||
.scale({58.624, 57.12, 57.375});
|
.scale({58.624f, 57.12f, 57.375f});
|
||||||
// Dump preprocessor
|
// Dump preprocessor
|
||||||
std::cout << "Preprocessor: " << prep << std::endl;
|
std::cout << "Preprocessor: " << prep << std::endl;
|
||||||
model = prep.build();
|
model = prep.build();
|
||||||
|
@ -45,16 +45,16 @@ set (CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${IE_MAIN_SAMPLES_DIR}/${BIN_FOLDER})
|
|||||||
set (CMAKE_PDB_OUTPUT_DIRECTORY ${IE_MAIN_SAMPLES_DIR}/${BIN_FOLDER})
|
set (CMAKE_PDB_OUTPUT_DIRECTORY ${IE_MAIN_SAMPLES_DIR}/${BIN_FOLDER})
|
||||||
set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${IE_MAIN_SAMPLES_DIR}/${BIN_FOLDER})
|
set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${IE_MAIN_SAMPLES_DIR}/${BIN_FOLDER})
|
||||||
|
|
||||||
|
if(TREAT_WARNING_AS_ERROR AND NOT DEFINED CMAKE_COMPILE_WARNING_AS_ERROR)
|
||||||
|
set(CMAKE_COMPILE_WARNING_AS_ERROR ON)
|
||||||
|
endif()
|
||||||
|
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
set_property (DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _CRT_SECURE_NO_WARNINGS)
|
set_property (DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _CRT_SECURE_NO_WARNINGS)
|
||||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_SCL_SECURE_NO_WARNINGS")
|
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_SCL_SECURE_NO_WARNINGS")
|
||||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") # no asynchronous structured exception handling
|
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") # no asynchronous structured exception handling
|
||||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LARGEADDRESSAWARE")
|
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LARGEADDRESSAWARE")
|
||||||
|
|
||||||
if (TREAT_WARNING_AS_ERROR)
|
|
||||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX") # treating warnings as errors
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qdiag-disable:177")
|
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qdiag-disable:177")
|
||||||
endif()
|
endif()
|
||||||
@ -64,11 +64,6 @@ if (WIN32)
|
|||||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4251 /wd4275 /wd4267 /wd4819")
|
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4251 /wd4275 /wd4267 /wd4819")
|
||||||
endif()
|
endif()
|
||||||
else()
|
else()
|
||||||
# treating warnings as errors
|
|
||||||
if(TREAT_WARNING_AS_ERROR)
|
|
||||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-disable:177")
|
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-disable:177")
|
||||||
endif()
|
endif()
|
||||||
|
@ -1437,7 +1437,7 @@ IEStatusCode ie_infer_request_set_batch(ie_infer_request_t* infer_request, const
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
infer_request->object.SetBatch(size);
|
infer_request->object.SetBatch(static_cast<int>(size));
|
||||||
}
|
}
|
||||||
CATCH_IE_EXCEPTIONS
|
CATCH_IE_EXCEPTIONS
|
||||||
|
|
||||||
@ -1641,7 +1641,7 @@ IEStatusCode ie_blob_size(ie_blob_t* blob, int* size_result) {
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
*size_result = blob->object->size();
|
*size_result = static_cast<int>(blob->object->size());
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
@ -1654,7 +1654,7 @@ IEStatusCode ie_blob_byte_size(ie_blob_t* blob, int* bsize_result) {
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
*bsize_result = blob->object->byteSize();
|
*bsize_result = static_cast<int>(blob->object->byteSize());
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
@ -688,5 +688,5 @@ void InferenceEnginePython::CVariableState::setState(InferenceEngine::Blob::Ptr
|
|||||||
}
|
}
|
||||||
|
|
||||||
const size_t InferenceEnginePython::product(const InferenceEngine::SizeVector& dims) {
|
const size_t InferenceEnginePython::product(const InferenceEngine::SizeVector& dims) {
|
||||||
return std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<size_t>{});
|
return std::accumulate(dims.begin(), dims.end(), size_t(1), std::multiplies<size_t>{});
|
||||||
}
|
}
|
||||||
|
@ -438,16 +438,16 @@ bool convertToRNNSeq(CNNLayerPtr cur, const N& net) {
|
|||||||
return indx == scope.size() ? -1 : indx;
|
return indx == scope.size() ? -1 : indx;
|
||||||
};
|
};
|
||||||
|
|
||||||
int in_dt_idx = _indx_in(ti->body.inputs, rsp1->insData[0].lock());
|
int in_dt_idx = static_cast<int>(_indx_in(ti->body.inputs, rsp1->insData[0].lock()));
|
||||||
int in_hs_idx = _indx_in(ti->body.inputs, cell->insData[1].lock());
|
int in_hs_idx = static_cast<int>(_indx_in(ti->body.inputs, cell->insData[1].lock()));
|
||||||
int in_cs_idx = NS == 2 ? _indx_in(ti->body.inputs, cell->insData[2].lock()) : -1;
|
int in_cs_idx = NS == 2 ? static_cast<int>(_indx_in(ti->body.inputs, cell->insData[2].lock())) : -1;
|
||||||
|
|
||||||
int out_dt_idx = _indx_in(ti->body.outputs, rsp2->outData[0]);
|
int out_dt_idx = static_cast<int>(_indx_in(ti->body.outputs, rsp2->outData[0]));
|
||||||
int out_hs_idx = _indx_in(ti->body.outputs, cell->outData[0]);
|
int out_hs_idx = static_cast<int>(_indx_in(ti->body.outputs, cell->outData[0]));
|
||||||
int out_cs_idx = NS == 2 ? _indx_in(ti->body.outputs, cell->outData[1]) : -1;
|
int out_cs_idx = NS == 2 ? static_cast<int>(_indx_in(ti->body.outputs, cell->outData[1])) : -1;
|
||||||
|
|
||||||
// indexes should be [0,1,2] : sum == 3 or [0,1,-1] : sum == 0
|
// indexes should be [0,1,2] : sum == 3 or [0,1,-1] : sum == 0
|
||||||
int sum = (NS - 1) * 3;
|
int sum = (static_cast<int>(NS) - 1) * 3;
|
||||||
if (in_hs_idx + in_cs_idx + in_dt_idx != sum || out_hs_idx + out_cs_idx + out_dt_idx != sum) return false;
|
if (in_hs_idx + in_cs_idx + in_dt_idx != sum || out_hs_idx + out_cs_idx + out_dt_idx != sum) return false;
|
||||||
|
|
||||||
std::map<int, TensorIterator::PortMap> i2map, o2map, be2map;
|
std::map<int, TensorIterator::PortMap> i2map, o2map, be2map;
|
||||||
@ -1381,7 +1381,7 @@ void convertArrayPrecision(typename PrecisionTrait<PREC_TO>::value_type* dst,
|
|||||||
using dst_type = typename PrecisionTrait<PREC_TO>::value_type;
|
using dst_type = typename PrecisionTrait<PREC_TO>::value_type;
|
||||||
|
|
||||||
for (size_t i = 0; i < nelem; i++) {
|
for (size_t i = 0; i < nelem; i++) {
|
||||||
dst[i] = PrecisionUtils::saturate_cast<dst_type>(src[i]);
|
dst[i] = PrecisionUtils::saturate_cast<dst_type>(static_cast<dst_type>(src[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -86,7 +86,8 @@ bool MoveFakeQuantize::transform(TransformationContext& context, ngraph::pattern
|
|||||||
const auto concat_axis = concat_node->get_concatenation_axis();
|
const auto concat_axis = concat_node->get_concatenation_axis();
|
||||||
for (size_t i = 0; i < 4; i++) {
|
for (size_t i = 0; i < 4; i++) {
|
||||||
curr_constants[i] = as_type_ptr<opset1::Constant>(fq->get_input_node_shared_ptr(i + 1));
|
curr_constants[i] = as_type_ptr<opset1::Constant>(fq->get_input_node_shared_ptr(i + 1));
|
||||||
if (!multi_chanels && curr_constants[i]->get_shape().size() > concat_axis && curr_constants[i]->get_shape()[concat_axis] != 1) {
|
if (!multi_chanels && concat_axis >= 0 && curr_constants[i]->get_shape().size() > static_cast<size_t>(concat_axis)
|
||||||
|
&& curr_constants[i]->get_shape()[concat_axis] != 1) {
|
||||||
multi_chanels = true;
|
multi_chanels = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1066,7 +1066,7 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
|
|||||||
fq->get_levels(),
|
fq->get_levels(),
|
||||||
fq->get_auto_broadcast()),
|
fq->get_auto_broadcast()),
|
||||||
true,
|
true,
|
||||||
outChannelsShapeIndex);
|
static_cast<int>(outChannelsShapeIndex));
|
||||||
NetworkHelper::copyInfo(fq, newFQ);
|
NetworkHelper::copyInfo(fq, newFQ);
|
||||||
|
|
||||||
std::shared_ptr<ngraph::Node> convert2;
|
std::shared_ptr<ngraph::Node> convert2;
|
||||||
@ -1804,7 +1804,7 @@ std::vector<std::vector<std::shared_ptr<ngraph::opset1::Constant>>> NetworkHelpe
|
|||||||
auto number_of_concat_inputs = concat->get_input_size();
|
auto number_of_concat_inputs = concat->get_input_size();
|
||||||
const auto concatNode = as_type_ptr<opset1::Concat>(concat);
|
const auto concatNode = as_type_ptr<opset1::Concat>(concat);
|
||||||
const auto concat_axis = concatNode->get_concatenation_axis();
|
const auto concat_axis = concatNode->get_concatenation_axis();
|
||||||
std::vector<unsigned int> shape_axis(number_of_concat_inputs);
|
std::vector<int64_t> shape_axis(number_of_concat_inputs);
|
||||||
for (size_t i{ 0 }; i < number_of_concat_inputs; ++i) {
|
for (size_t i{ 0 }; i < number_of_concat_inputs; ++i) {
|
||||||
auto shape = concat->get_input_partial_shape(i);
|
auto shape = concat->get_input_partial_shape(i);
|
||||||
shape_axis[i] = shape[concat_axis].get_length();
|
shape_axis[i] = shape[concat_axis].get_length();
|
||||||
|
@ -181,7 +181,7 @@ bool PadTransformation::canBeTransformed(const TransformationContext& context, s
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (padsBegin[i] != 0) {
|
if (padsBegin[i] != 0) {
|
||||||
beginNonZeroIdx = i;
|
beginNonZeroIdx = static_cast<int>(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -193,7 +193,7 @@ bool PadTransformation::canBeTransformed(const TransformationContext& context, s
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (padsEnd[i] != 0) {
|
if (padsEnd[i] != 0) {
|
||||||
endNonZeroIdx = i;
|
endNonZeroIdx = static_cast<int>(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -118,7 +118,7 @@ void reshapeDequantizationConstant(const std::shared_ptr<opset1::Reshape>& resha
|
|||||||
const std::shared_ptr<Node> broadcastedConstant = getBCastedConst(originalConstant, dimensionsToBroadcast);
|
const std::shared_ptr<Node> broadcastedConstant = getBCastedConst(originalConstant, dimensionsToBroadcast);
|
||||||
|
|
||||||
std::vector<int> newReshapeConstValues(reshapeOutputRank.get_length(), 1ul);
|
std::vector<int> newReshapeConstValues(reshapeOutputRank.get_length(), 1ul);
|
||||||
newReshapeConstValues[1] = reshapeOutputPShape[1].get_length();
|
newReshapeConstValues[1] = static_cast<int>(reshapeOutputPShape[1].get_length());
|
||||||
const std::shared_ptr<opset1::Constant> newReshapeConstant = std::make_shared<opset1::Constant>(
|
const std::shared_ptr<opset1::Constant> newReshapeConstant = std::make_shared<opset1::Constant>(
|
||||||
element::i32,
|
element::i32,
|
||||||
Shape({ newReshapeConstValues.size() }),
|
Shape({ newReshapeConstValues.size() }),
|
||||||
|
@ -64,7 +64,7 @@ template<> inline uint8_t saturate_cast(uint16_t x) {
|
|||||||
}
|
}
|
||||||
template<> inline uint8_t saturate_cast(float x) { return saturate_cast<uint8_t>(static_cast<int>(std::rint(x))); }
|
template<> inline uint8_t saturate_cast(float x) { return saturate_cast<uint8_t>(static_cast<int>(std::rint(x))); }
|
||||||
|
|
||||||
template<> inline float saturate_cast(double x) { return x; }
|
template<> inline float saturate_cast(double x) { return static_cast<float>(x); }
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
constexpr static const int ONE = 1 << 15;
|
constexpr static const int ONE = 1 << 15;
|
||||||
|
@ -93,7 +93,7 @@ bool ngraph::snippets::pass::AssignRegisters::run_on_model(const std::shared_ptr
|
|||||||
|
|
||||||
std::reverse(lifeIn.begin(), lifeIn.end());
|
std::reverse(lifeIn.begin(), lifeIn.end());
|
||||||
auto find_last_use = [lifeIn](int i) -> int {
|
auto find_last_use = [lifeIn](int i) -> int {
|
||||||
int ln = lifeIn.size()-1;
|
int ln = static_cast<int>(lifeIn.size()) - 1;
|
||||||
for (auto& x : lifeIn) {
|
for (auto& x : lifeIn) {
|
||||||
if (x.find(i) != x.end()) {
|
if (x.find(i) != x.end()) {
|
||||||
return ln;
|
return ln;
|
||||||
@ -104,7 +104,7 @@ bool ngraph::snippets::pass::AssignRegisters::run_on_model(const std::shared_ptr
|
|||||||
};
|
};
|
||||||
|
|
||||||
for (size_t i = 0; i < stmts.size(); i++) {
|
for (size_t i = 0; i < stmts.size(); i++) {
|
||||||
live_intervals.insert(std::make_pair(i, find_last_use(i)));
|
live_intervals.insert(std::make_pair(static_cast<int>(i), find_last_use(static_cast<int>(i))));
|
||||||
}
|
}
|
||||||
|
|
||||||
// http://web.cs.ucla.edu/~palsberg/course/cs132/linearscan.pdf
|
// http://web.cs.ucla.edu/~palsberg/course/cs132/linearscan.pdf
|
||||||
|
@ -93,7 +93,7 @@ ngraph::pass::ConvToBinaryConv::ConvToBinaryConv() {
|
|||||||
conv->get_pads_end(),
|
conv->get_pads_end(),
|
||||||
conv->get_dilations(),
|
conv->get_dilations(),
|
||||||
opset5::BinaryConvolution::BinaryConvolutionMode::XNOR_POPCOUNT,
|
opset5::BinaryConvolution::BinaryConvolutionMode::XNOR_POPCOUNT,
|
||||||
-1,
|
-1.f,
|
||||||
conv->get_auto_pad());
|
conv->get_auto_pad());
|
||||||
new_conv->set_friendly_name(conv->get_friendly_name());
|
new_conv->set_friendly_name(conv->get_friendly_name());
|
||||||
std::vector<int64_t> axes;
|
std::vector<int64_t> axes;
|
||||||
@ -129,8 +129,9 @@ ngraph::pass::ConvToBinaryConv::ConvToBinaryConv() {
|
|||||||
conv->get_pads_end(),
|
conv->get_pads_end(),
|
||||||
conv->get_dilations(),
|
conv->get_dilations(),
|
||||||
opset5::BinaryConvolution::BinaryConvolutionMode::XNOR_POPCOUNT,
|
opset5::BinaryConvolution::BinaryConvolutionMode::XNOR_POPCOUNT,
|
||||||
0,
|
0.f,
|
||||||
conv->get_auto_pad());
|
conv->get_auto_pad());
|
||||||
|
|
||||||
new_conv->set_friendly_name(conv->get_friendly_name());
|
new_conv->set_friendly_name(conv->get_friendly_name());
|
||||||
copy_runtime_info(conv, new_conv);
|
copy_runtime_info(conv, new_conv);
|
||||||
replace_node(conv, new_conv);
|
replace_node(conv, new_conv);
|
||||||
|
@ -26,7 +26,8 @@ bool check_block_first(const ngraph::PartialShape& shape_input,
|
|||||||
}
|
}
|
||||||
|
|
||||||
possible_block_size = shape_reshape_before[1].get_length();
|
possible_block_size = shape_reshape_before[1].get_length();
|
||||||
ngraph::Dimension c_dim(shape_input[1].get_length() / std::pow(possible_block_size, spatial_dims));
|
ngraph::Dimension c_dim(
|
||||||
|
static_cast<int64_t>(shape_input[1].get_length() / std::pow(possible_block_size, spatial_dims)));
|
||||||
|
|
||||||
// x' = reshape(data, [N, block_size, block_size, ..., block_size, C / (block_size ^ K), D1, D2, ..., DK])
|
// x' = reshape(data, [N, block_size, block_size, ..., block_size, C / (block_size ^ K), D1, D2, ..., DK])
|
||||||
ngraph::PartialShape expected_shape = {shape_input[0]};
|
ngraph::PartialShape expected_shape = {shape_input[0]};
|
||||||
@ -76,7 +77,8 @@ bool check_depth_first(const ngraph::PartialShape& shape_input,
|
|||||||
}
|
}
|
||||||
|
|
||||||
possible_block_size = shape_reshape_before[2].get_length();
|
possible_block_size = shape_reshape_before[2].get_length();
|
||||||
ngraph::Dimension c_dim(shape_input[1].get_length() / std::pow(possible_block_size, spatial_dims));
|
ngraph::Dimension c_dim(
|
||||||
|
static_cast<int>(shape_input[1].get_length() / std::pow(possible_block_size, spatial_dims)));
|
||||||
|
|
||||||
// x' = reshape(data, [N, C / (block_size ^ K), block_size, block_size, ..., block_size, D1, D2, ..., DK])
|
// x' = reshape(data, [N, C / (block_size ^ K), block_size, block_size, ..., block_size, D1, D2, ..., DK])
|
||||||
ngraph::PartialShape expected_shape = {shape_input[0], c_dim};
|
ngraph::PartialShape expected_shape = {shape_input[0], c_dim};
|
||||||
|
@ -49,7 +49,8 @@ ngraph::pass::GeluFusionWithErfOne::GeluFusionWithErfOne() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool valid_constant_values = op::util::has_constant_value<float>(div_const_value, M_SQRT2) &&
|
bool valid_constant_values =
|
||||||
|
op::util::has_constant_value<float>(div_const_value, static_cast<float>(M_SQRT2)) &&
|
||||||
op::util::has_constant_value<float>(add_const_value, 1.0f) &&
|
op::util::has_constant_value<float>(add_const_value, 1.0f) &&
|
||||||
op::util::has_constant_value<float>(mul_const_value, 0.5f);
|
op::util::has_constant_value<float>(mul_const_value, 0.5f);
|
||||||
|
|
||||||
@ -108,7 +109,8 @@ ngraph::pass::GeluFusionWithErfTwo::GeluFusionWithErfTwo() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool valid_constant_values = op::util::has_constant_value<float>(div_const_value, M_SQRT2) &&
|
bool valid_constant_values =
|
||||||
|
op::util::has_constant_value<float>(div_const_value, static_cast<float>(M_SQRT2)) &&
|
||||||
op::util::has_constant_value<float>(add_const_value, 1.0f) &&
|
op::util::has_constant_value<float>(add_const_value, 1.0f) &&
|
||||||
op::util::has_constant_value<float>(mul_const_value, 0.5f);
|
op::util::has_constant_value<float>(mul_const_value, 0.5f);
|
||||||
|
|
||||||
@ -167,7 +169,8 @@ ngraph::pass::GeluFusionWithErfThree::GeluFusionWithErfThree() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool valid_constant_values = op::util::has_constant_value<float>(div_const_value, M_SQRT2) &&
|
bool valid_constant_values =
|
||||||
|
op::util::has_constant_value<float>(div_const_value, static_cast<float>(M_SQRT2)) &&
|
||||||
op::util::has_constant_value<float>(add_const_value, 1.0f) &&
|
op::util::has_constant_value<float>(add_const_value, 1.0f) &&
|
||||||
op::util::has_constant_value<float>(mul_const_value, 0.5f);
|
op::util::has_constant_value<float>(mul_const_value, 0.5f);
|
||||||
|
|
||||||
|
@ -29,9 +29,9 @@ ov::pass::RemoveMultiSubGraphOpDanglingParams::RemoveMultiSubGraphOpDanglingPara
|
|||||||
const auto subgraphs_size = multi_subgraph_op->get_internal_subgraphs_size();
|
const auto subgraphs_size = multi_subgraph_op->get_internal_subgraphs_size();
|
||||||
to_remove_descriptors_indexes.resize(subgraphs_size);
|
to_remove_descriptors_indexes.resize(subgraphs_size);
|
||||||
for (size_t body_idx = 0; body_idx < subgraphs_size; ++body_idx) {
|
for (size_t body_idx = 0; body_idx < subgraphs_size; ++body_idx) {
|
||||||
auto& body_func = multi_subgraph_op->get_function(body_idx);
|
auto& body_func = multi_subgraph_op->get_function(static_cast<int>(body_idx));
|
||||||
auto& body_params = body_func->get_parameters();
|
auto& body_params = body_func->get_parameters();
|
||||||
auto& body_in_descriptors = multi_subgraph_op->get_input_descriptions(body_idx);
|
auto& body_in_descriptors = multi_subgraph_op->get_input_descriptions(static_cast<int>(body_idx));
|
||||||
// collect all descriptors which should be removed and reqired inputs
|
// collect all descriptors which should be removed and reqired inputs
|
||||||
for (size_t i = 0; i < body_in_descriptors.size(); ++i) {
|
for (size_t i = 0; i < body_in_descriptors.size(); ++i) {
|
||||||
auto& body_param = body_params[body_in_descriptors[i]->m_body_parameter_index];
|
auto& body_param = body_params[body_in_descriptors[i]->m_body_parameter_index];
|
||||||
@ -57,7 +57,7 @@ ov::pass::RemoveMultiSubGraphOpDanglingParams::RemoveMultiSubGraphOpDanglingPara
|
|||||||
auto update_op_inputs_desc = [&subgraphs_size](const std::shared_ptr<op::util::MultiSubGraphOp>& op,
|
auto update_op_inputs_desc = [&subgraphs_size](const std::shared_ptr<op::util::MultiSubGraphOp>& op,
|
||||||
uint64_t removed_loop_idx) {
|
uint64_t removed_loop_idx) {
|
||||||
for (size_t body_idx = 0; body_idx < subgraphs_size; ++body_idx) {
|
for (size_t body_idx = 0; body_idx < subgraphs_size; ++body_idx) {
|
||||||
auto& descriptors = op->get_input_descriptions(body_idx);
|
auto& descriptors = op->get_input_descriptions(static_cast<int>(body_idx));
|
||||||
for (auto& desc : descriptors) {
|
for (auto& desc : descriptors) {
|
||||||
if (desc->m_input_index > removed_loop_idx) {
|
if (desc->m_input_index > removed_loop_idx) {
|
||||||
desc->m_input_index--;
|
desc->m_input_index--;
|
||||||
@ -67,8 +67,8 @@ ov::pass::RemoveMultiSubGraphOpDanglingParams::RemoveMultiSubGraphOpDanglingPara
|
|||||||
};
|
};
|
||||||
// Remove dangling body params and input and update input descriptors
|
// Remove dangling body params and input and update input descriptors
|
||||||
for (size_t body_idx = 0; body_idx < subgraphs_size; ++body_idx) {
|
for (size_t body_idx = 0; body_idx < subgraphs_size; ++body_idx) {
|
||||||
auto& body_in_descriptors = multi_subgraph_op->get_input_descriptions(body_idx);
|
auto& body_in_descriptors = multi_subgraph_op->get_input_descriptions(static_cast<int>(body_idx));
|
||||||
auto& body_func = multi_subgraph_op->get_function(body_idx);
|
auto& body_func = multi_subgraph_op->get_function(static_cast<int>(body_idx));
|
||||||
auto& body_params = body_func->get_parameters();
|
auto& body_params = body_func->get_parameters();
|
||||||
op::util::MultiSubGraphOp::MultiSubgraphInputDescriptionVector updated_body_in_descriptors;
|
op::util::MultiSubGraphOp::MultiSubgraphInputDescriptionVector updated_body_in_descriptors;
|
||||||
for (size_t desc_idx = 0; desc_idx < body_in_descriptors.size(); ++desc_idx) {
|
for (size_t desc_idx = 0; desc_idx < body_in_descriptors.size(); ++desc_idx) {
|
||||||
@ -93,7 +93,7 @@ ov::pass::RemoveMultiSubGraphOpDanglingParams::RemoveMultiSubGraphOpDanglingPara
|
|||||||
updated_body_in_descriptors.emplace_back(body_in_descriptors[desc_idx]);
|
updated_body_in_descriptors.emplace_back(body_in_descriptors[desc_idx]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
multi_subgraph_op->set_input_descriptions(body_idx, updated_body_in_descriptors);
|
multi_subgraph_op->set_input_descriptions(static_cast<int>(body_idx), updated_body_in_descriptors);
|
||||||
}
|
}
|
||||||
multi_subgraph_op->set_arguments(op_inputs);
|
multi_subgraph_op->set_arguments(op_inputs);
|
||||||
}
|
}
|
||||||
|
@ -175,7 +175,9 @@ bool convert_precision(pass::PassBase& pass,
|
|||||||
if (auto sub_graph_node = std::dynamic_pointer_cast<op::util::MultiSubGraphOp>(node)) {
|
if (auto sub_graph_node = std::dynamic_pointer_cast<op::util::MultiSubGraphOp>(node)) {
|
||||||
size_t sub_graphs_num = sub_graph_node->get_internal_subgraphs_size();
|
size_t sub_graphs_num = sub_graph_node->get_internal_subgraphs_size();
|
||||||
for (size_t sub_graph_ind = 0; sub_graph_ind < sub_graphs_num; ++sub_graph_ind) {
|
for (size_t sub_graph_ind = 0; sub_graph_ind < sub_graphs_num; ++sub_graph_ind) {
|
||||||
is_changed |= convert_function_precision(sub_graph_node->get_function(sub_graph_ind), true);
|
is_changed |=
|
||||||
|
convert_function_precision(sub_graph_node->get_function(static_cast<int>(sub_graph_ind)),
|
||||||
|
true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
is_changed |= convert_node_input_precision(node);
|
is_changed |= convert_node_input_precision(node);
|
||||||
@ -241,7 +243,8 @@ precisions_set_t find_all_used_precisions(const std::shared_ptr<ngraph::Function
|
|||||||
if (auto sub_graph_node = std::dynamic_pointer_cast<ngraph::op::util::MultiSubGraphOp>(node)) {
|
if (auto sub_graph_node = std::dynamic_pointer_cast<ngraph::op::util::MultiSubGraphOp>(node)) {
|
||||||
size_t sub_graphs_num = sub_graph_node->get_internal_subgraphs_size();
|
size_t sub_graphs_num = sub_graph_node->get_internal_subgraphs_size();
|
||||||
for (size_t sub_graph_ind = 0; sub_graph_ind < sub_graphs_num; ++sub_graph_ind) {
|
for (size_t sub_graph_ind = 0; sub_graph_ind < sub_graphs_num; ++sub_graph_ind) {
|
||||||
auto sub_graph_precisions = find_all_used_precisions(sub_graph_node->get_function(sub_graph_ind));
|
auto sub_graph_precisions =
|
||||||
|
find_all_used_precisions(sub_graph_node->get_function(static_cast<int>(sub_graph_ind)));
|
||||||
used_precisions.insert(sub_graph_precisions.begin(), sub_graph_precisions.end());
|
used_precisions.insert(sub_graph_precisions.begin(), sub_graph_precisions.end());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -27,7 +27,7 @@ ngraph::pass::ConvertMVN1ToMVN6::ConvertMVN1ToMVN6() {
|
|||||||
if (!input_rank.is_static()) {
|
if (!input_rank.is_static()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
int64_t start_axis = 1 + (!mvn_node->get_across_channels());
|
int64_t start_axis = 1 + static_cast<int64_t>(!mvn_node->get_across_channels());
|
||||||
if (input_rank.get_length() <= start_axis) {
|
if (input_rank.get_length() <= start_axis) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -37,7 +37,7 @@ ngraph::pass::ConvertMVN1ToMVN6::ConvertMVN1ToMVN6() {
|
|||||||
auto mvn6_node = std::make_shared<ngraph::opset6::MVN>(input,
|
auto mvn6_node = std::make_shared<ngraph::opset6::MVN>(input,
|
||||||
axes,
|
axes,
|
||||||
mvn_node->get_normalize_variance(),
|
mvn_node->get_normalize_variance(),
|
||||||
mvn_node->get_eps(),
|
static_cast<float>(mvn_node->get_eps()),
|
||||||
ngraph::op::MVNEpsMode::OUTSIDE_SQRT);
|
ngraph::op::MVNEpsMode::OUTSIDE_SQRT);
|
||||||
|
|
||||||
mvn6_node->set_friendly_name(mvn_node->get_friendly_name());
|
mvn6_node->set_friendly_name(mvn_node->get_friendly_name());
|
||||||
|
@ -44,7 +44,7 @@ pass::ConvertDetectionOutput8ToDetectionOutput1::ConvertDetectionOutput8ToDetect
|
|||||||
attributes_v1.keep_top_k = attributes_v8.keep_top_k;
|
attributes_v1.keep_top_k = attributes_v8.keep_top_k;
|
||||||
attributes_v1.nms_threshold = attributes_v8.nms_threshold;
|
attributes_v1.nms_threshold = attributes_v8.nms_threshold;
|
||||||
attributes_v1.normalized = attributes_v8.normalized;
|
attributes_v1.normalized = attributes_v8.normalized;
|
||||||
attributes_v1.num_classes = num_classes.get_length();
|
attributes_v1.num_classes = static_cast<int>(num_classes.get_length());
|
||||||
attributes_v1.objectness_score = attributes_v8.objectness_score;
|
attributes_v1.objectness_score = attributes_v8.objectness_score;
|
||||||
attributes_v1.share_location = attributes_v8.share_location;
|
attributes_v1.share_location = attributes_v8.share_location;
|
||||||
attributes_v1.top_k = attributes_v8.top_k;
|
attributes_v1.top_k = attributes_v8.top_k;
|
||||||
|
@ -301,7 +301,7 @@ Output<Node> legacy_broadcast_for_binary_operation(const Output<Node>& left,
|
|||||||
// Prepare new shape of right operand for broadcasting
|
// Prepare new shape of right operand for broadcasting
|
||||||
// Remove dimensions with length=1 from back
|
// Remove dimensions with length=1 from back
|
||||||
auto new_right_shape = right_shape;
|
auto new_right_shape = right_shape;
|
||||||
for (int dimension = new_right_shape.size() - 1; dimension >= 0; --dimension) {
|
for (int dimension = static_cast<int>(new_right_shape.size()) - 1; dimension >= 0; --dimension) {
|
||||||
if (new_right_shape.at(dimension) == 1) {
|
if (new_right_shape.at(dimension) == 1) {
|
||||||
new_right_shape.pop_back();
|
new_right_shape.pop_back();
|
||||||
} else {
|
} else {
|
||||||
|
@ -27,7 +27,7 @@ public:
|
|||||||
virtual ~OpAnnotations() = default;
|
virtual ~OpAnnotations() = default;
|
||||||
|
|
||||||
void add_in_place_oi_pair(const struct oi_pair& oi) {
|
void add_in_place_oi_pair(const struct oi_pair& oi) {
|
||||||
for (auto e : m_in_place_oi_pairs) {
|
for (const auto& e : m_in_place_oi_pairs) {
|
||||||
if (e.input == oi.input || e.output == oi.output) {
|
if (e.input == oi.input || e.output == oi.output) {
|
||||||
throw ngraph_error("In_place hint conflicts with an existing entry");
|
throw ngraph_error("In_place hint conflicts with an existing entry");
|
||||||
}
|
}
|
||||||
|
@ -25,6 +25,8 @@
|
|||||||
#include "ngraph/runtime/host_tensor.hpp"
|
#include "ngraph/runtime/host_tensor.hpp"
|
||||||
#include "ngraph/runtime/tensor.hpp"
|
#include "ngraph/runtime/tensor.hpp"
|
||||||
#include "ngraph/shape.hpp"
|
#include "ngraph/shape.hpp"
|
||||||
|
#include "ngraph/type/element_type.hpp"
|
||||||
|
#include "ngraph/type/element_type_traits.hpp"
|
||||||
#include "openvino/core/enum_mask.hpp"
|
#include "openvino/core/enum_mask.hpp"
|
||||||
|
|
||||||
namespace ov {
|
namespace ov {
|
||||||
@ -278,6 +280,14 @@ std::vector<T> read_vector(std::shared_ptr<ngraph::runtime::Tensor> tv) {
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class T, ngraph::element::Type_t ET>
|
||||||
|
std::vector<T> array_2_vector(typename ngraph::element_type_traits<ET>::value_type* data, size_t size) {
|
||||||
|
std::vector<T> result(size);
|
||||||
|
for (size_t i = 0; i < size; i++) {
|
||||||
|
result[i] = static_cast<T>(data[i]);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::vector<T> host_tensor_2_vector(ngraph::HostTensorPtr tensor) {
|
std::vector<T> host_tensor_2_vector(ngraph::HostTensorPtr tensor) {
|
||||||
NGRAPH_CHECK(tensor != nullptr, "Invalid Tensor received, can't read the data from a null pointer.");
|
NGRAPH_CHECK(tensor != nullptr, "Invalid Tensor received, can't read the data from a null pointer.");
|
||||||
@ -285,58 +295,58 @@ std::vector<T> host_tensor_2_vector(ngraph::HostTensorPtr tensor) {
|
|||||||
switch (tensor->get_element_type()) {
|
switch (tensor->get_element_type()) {
|
||||||
case ngraph::element::Type_t::boolean: {
|
case ngraph::element::Type_t::boolean: {
|
||||||
auto p = tensor->get_data_ptr<ngraph::element::Type_t::boolean>();
|
auto p = tensor->get_data_ptr<ngraph::element::Type_t::boolean>();
|
||||||
return std::vector<T>(p, p + tensor->get_element_count());
|
return array_2_vector<T, ngraph::element::Type_t::boolean>(p, tensor->get_element_count());
|
||||||
}
|
}
|
||||||
case ngraph::element::Type_t::bf16: {
|
case ngraph::element::Type_t::bf16: {
|
||||||
auto p = tensor->get_data_ptr<ngraph::element::Type_t::bf16>();
|
auto p = tensor->get_data_ptr<ngraph::element::Type_t::bf16>();
|
||||||
return std::vector<T>(p, p + tensor->get_element_count());
|
return array_2_vector<T, ngraph::element::Type_t::bf16>(p, tensor->get_element_count());
|
||||||
}
|
}
|
||||||
case ngraph::element::Type_t::f16: {
|
case ngraph::element::Type_t::f16: {
|
||||||
auto p = tensor->get_data_ptr<ngraph::element::Type_t::f16>();
|
auto p = tensor->get_data_ptr<ngraph::element::Type_t::f16>();
|
||||||
return std::vector<T>(p, p + tensor->get_element_count());
|
return array_2_vector<T, ngraph::element::Type_t::f16>(p, tensor->get_element_count());
|
||||||
}
|
}
|
||||||
case ngraph::element::Type_t::f32: {
|
case ngraph::element::Type_t::f32: {
|
||||||
auto p = tensor->get_data_ptr<ngraph::element::Type_t::f32>();
|
auto p = tensor->get_data_ptr<ngraph::element::Type_t::f32>();
|
||||||
return std::vector<T>(p, p + tensor->get_element_count());
|
return array_2_vector<T, ngraph::element::Type_t::f32>(p, tensor->get_element_count());
|
||||||
}
|
}
|
||||||
case ngraph::element::Type_t::f64: {
|
case ngraph::element::Type_t::f64: {
|
||||||
auto p = tensor->get_data_ptr<ngraph::element::Type_t::f64>();
|
auto p = tensor->get_data_ptr<ngraph::element::Type_t::f64>();
|
||||||
return std::vector<T>(p, p + tensor->get_element_count());
|
return array_2_vector<T, ngraph::element::Type_t::f64>(p, tensor->get_element_count());
|
||||||
}
|
}
|
||||||
case ngraph::element::Type_t::i8: {
|
case ngraph::element::Type_t::i8: {
|
||||||
auto p = tensor->get_data_ptr<ngraph::element::Type_t::i8>();
|
auto p = tensor->get_data_ptr<ngraph::element::Type_t::i8>();
|
||||||
return std::vector<T>(p, p + tensor->get_element_count());
|
return array_2_vector<T, ngraph::element::Type_t::i8>(p, tensor->get_element_count());
|
||||||
}
|
}
|
||||||
case ngraph::element::Type_t::i16: {
|
case ngraph::element::Type_t::i16: {
|
||||||
auto p = tensor->get_data_ptr<ngraph::element::Type_t::i16>();
|
auto p = tensor->get_data_ptr<ngraph::element::Type_t::i16>();
|
||||||
return std::vector<T>(p, p + tensor->get_element_count());
|
return array_2_vector<T, ngraph::element::Type_t::i16>(p, tensor->get_element_count());
|
||||||
}
|
}
|
||||||
case ngraph::element::Type_t::i32: {
|
case ngraph::element::Type_t::i32: {
|
||||||
auto p = tensor->get_data_ptr<ngraph::element::Type_t::i32>();
|
auto p = tensor->get_data_ptr<ngraph::element::Type_t::i32>();
|
||||||
return std::vector<T>(p, p + tensor->get_element_count());
|
return array_2_vector<T, ngraph::element::Type_t::i32>(p, tensor->get_element_count());
|
||||||
}
|
}
|
||||||
case ngraph::element::Type_t::i64: {
|
case ngraph::element::Type_t::i64: {
|
||||||
auto p = tensor->get_data_ptr<ngraph::element::Type_t::i64>();
|
auto p = tensor->get_data_ptr<ngraph::element::Type_t::i64>();
|
||||||
return std::vector<T>(p, p + tensor->get_element_count());
|
return array_2_vector<T, ngraph::element::Type_t::i64>(p, tensor->get_element_count());
|
||||||
}
|
}
|
||||||
case ngraph::element::Type_t::u1:
|
case ngraph::element::Type_t::u1:
|
||||||
NGRAPH_CHECK(false, "u1 element type is unsupported");
|
NGRAPH_CHECK(false, "u1 element type is unsupported");
|
||||||
break;
|
break;
|
||||||
case ngraph::element::Type_t::u8: {
|
case ngraph::element::Type_t::u8: {
|
||||||
auto p = tensor->get_data_ptr<ngraph::element::Type_t::u8>();
|
auto p = tensor->get_data_ptr<ngraph::element::Type_t::u8>();
|
||||||
return std::vector<T>(p, p + tensor->get_element_count());
|
return array_2_vector<T, ngraph::element::Type_t::u8>(p, tensor->get_element_count());
|
||||||
}
|
}
|
||||||
case ngraph::element::Type_t::u16: {
|
case ngraph::element::Type_t::u16: {
|
||||||
auto p = tensor->get_data_ptr<ngraph::element::Type_t::u16>();
|
auto p = tensor->get_data_ptr<ngraph::element::Type_t::u16>();
|
||||||
return std::vector<T>(p, p + tensor->get_element_count());
|
return array_2_vector<T, ngraph::element::Type_t::u16>(p, tensor->get_element_count());
|
||||||
}
|
}
|
||||||
case ngraph::element::Type_t::u32: {
|
case ngraph::element::Type_t::u32: {
|
||||||
auto p = tensor->get_data_ptr<ngraph::element::Type_t::u32>();
|
auto p = tensor->get_data_ptr<ngraph::element::Type_t::u32>();
|
||||||
return std::vector<T>(p, p + tensor->get_element_count());
|
return array_2_vector<T, ngraph::element::Type_t::u32>(p, tensor->get_element_count());
|
||||||
}
|
}
|
||||||
case ngraph::element::Type_t::u64: {
|
case ngraph::element::Type_t::u64: {
|
||||||
auto p = tensor->get_data_ptr<ngraph::element::Type_t::u64>();
|
auto p = tensor->get_data_ptr<ngraph::element::Type_t::u64>();
|
||||||
return std::vector<T>(p, p + tensor->get_element_count());
|
return array_2_vector<T, ngraph::element::Type_t::u64>(p, tensor->get_element_count());
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
NGRAPH_UNREACHABLE("unsupported element type");
|
NGRAPH_UNREACHABLE("unsupported element type");
|
||||||
|
@ -47,9 +47,17 @@ template <typename VT>
|
|||||||
class OPENVINO_DEPRECATED("Please use ov::Any to store VT directly") VariantWrapper {};
|
class OPENVINO_DEPRECATED("Please use ov::Any to store VT directly") VariantWrapper {};
|
||||||
|
|
||||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
# pragma warning(push)
|
||||||
|
// '__declspec(dllexport)' and 'extern' are incompatible on an explicit instantiation
|
||||||
|
# pragma warning(disable : 4910)
|
||||||
|
#endif
|
||||||
extern template class OPENVINO_API VariantImpl<std::string>;
|
extern template class OPENVINO_API VariantImpl<std::string>;
|
||||||
extern template class OPENVINO_API VariantImpl<int64_t>;
|
extern template class OPENVINO_API VariantImpl<int64_t>;
|
||||||
extern template class OPENVINO_API VariantImpl<bool>;
|
extern template class OPENVINO_API VariantImpl<bool>;
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
# pragma warning(pop)
|
||||||
|
#endif
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
class OPENVINO_API VariantWrapper<std::string> : public VariantImpl<std::string> {
|
class OPENVINO_API VariantWrapper<std::string> : public VariantImpl<std::string> {
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
#include "openvino/core/runtime_attribute.hpp"
|
#include "openvino/core/runtime_attribute.hpp"
|
||||||
|
|
||||||
namespace InferenceEngine {
|
namespace InferenceEngine {
|
||||||
class InferencePlugin;
|
struct InferencePlugin;
|
||||||
class ExecutableNetwork;
|
class ExecutableNetwork;
|
||||||
} // namespace InferenceEngine
|
} // namespace InferenceEngine
|
||||||
|
|
||||||
@ -627,7 +627,7 @@ class OPENVINO_API Any {
|
|||||||
};
|
};
|
||||||
|
|
||||||
friend class ::ov::RuntimeAttribute;
|
friend class ::ov::RuntimeAttribute;
|
||||||
friend class ::InferenceEngine::InferencePlugin;
|
friend struct ::InferenceEngine::InferencePlugin;
|
||||||
friend class ::InferenceEngine::ExecutableNetwork;
|
friend class ::InferenceEngine::ExecutableNetwork;
|
||||||
friend class ::ov::CompiledModel;
|
friend class ::ov::CompiledModel;
|
||||||
friend class ::ov::RemoteContext;
|
friend class ::ov::RemoteContext;
|
||||||
|
@ -19,7 +19,7 @@ namespace preprocess {
|
|||||||
/// - Model's input info, which is a final input's info after preprocessing (InputInfo::model)
|
/// - Model's input info, which is a final input's info after preprocessing (InputInfo::model)
|
||||||
///
|
///
|
||||||
class OPENVINO_API InputInfo final {
|
class OPENVINO_API InputInfo final {
|
||||||
class InputInfoImpl;
|
struct InputInfoImpl;
|
||||||
std::unique_ptr<InputInfoImpl> m_impl;
|
std::unique_ptr<InputInfoImpl> m_impl;
|
||||||
friend class PrePostProcessor;
|
friend class PrePostProcessor;
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ namespace preprocess {
|
|||||||
/// - Postprocessing steps applied to user's input (OutputInfo::postprocess)
|
/// - Postprocessing steps applied to user's input (OutputInfo::postprocess)
|
||||||
/// - User's desired output parameter information, which is a final one after preprocessing (OutputInfo::tensor)
|
/// - User's desired output parameter information, which is a final one after preprocessing (OutputInfo::tensor)
|
||||||
class OPENVINO_API OutputInfo final {
|
class OPENVINO_API OutputInfo final {
|
||||||
class OutputInfoImpl;
|
struct OutputInfoImpl;
|
||||||
std::unique_ptr<OutputInfoImpl> m_impl;
|
std::unique_ptr<OutputInfoImpl> m_impl;
|
||||||
friend class PrePostProcessor;
|
friend class PrePostProcessor;
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ namespace preprocess {
|
|||||||
///
|
///
|
||||||
/// \ingroup ov_model_cpp_api
|
/// \ingroup ov_model_cpp_api
|
||||||
class OPENVINO_API PrePostProcessor final {
|
class OPENVINO_API PrePostProcessor final {
|
||||||
class PrePostProcessorImpl;
|
struct PrePostProcessorImpl;
|
||||||
std::unique_ptr<PrePostProcessorImpl> m_impl;
|
std::unique_ptr<PrePostProcessorImpl> m_impl;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -13,7 +13,7 @@ namespace reference {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void acos(const T* arg, T* out, size_t count) {
|
void acos(const T* arg, T* out, size_t count) {
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
out[i] = std::acos(arg[i]);
|
out[i] = static_cast<T>(std::acos(arg[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -13,14 +13,14 @@ namespace reference {
|
|||||||
template <typename T, typename std::enable_if<!std::is_integral<T>::value, bool>::type = true>
|
template <typename T, typename std::enable_if<!std::is_integral<T>::value, bool>::type = true>
|
||||||
void acosh(const T* arg, T* out, size_t count) {
|
void acosh(const T* arg, T* out, size_t count) {
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
out[i] = std::acosh(arg[i]);
|
out[i] = static_cast<T>(std::acosh(arg[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
|
template <typename T, typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
|
||||||
void acosh(const T* arg, T* out, size_t count) {
|
void acosh(const T* arg, T* out, size_t count) {
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
out[i] = std::roundl(std::acosh(arg[i]));
|
out[i] = static_cast<T>(std::roundl(std::acosh(arg[i])));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -20,7 +20,7 @@ inline size_t window_start(size_t idx, size_t arg_shape, size_t out_shape) {
|
|||||||
return idx * arg_shape / out_shape;
|
return idx * arg_shape / out_shape;
|
||||||
}
|
}
|
||||||
inline size_t window_end(size_t idx, size_t arg_shape, size_t out_shape) {
|
inline size_t window_end(size_t idx, size_t arg_shape, size_t out_shape) {
|
||||||
return ceil(static_cast<double>((idx + 1) * arg_shape) / out_shape);
|
return static_cast<size_t>(ceil(static_cast<double>((idx + 1) * arg_shape) / out_shape));
|
||||||
}
|
}
|
||||||
template <typename T>
|
template <typename T>
|
||||||
T avg_div(const T sum, size_t n) {
|
T avg_div(const T sum, size_t n) {
|
||||||
@ -29,7 +29,7 @@ T avg_div(const T sum, size_t n) {
|
|||||||
if (std::is_same<T, int8_t>::value || std::is_same<T, uint8_t>::value) {
|
if (std::is_same<T, int8_t>::value || std::is_same<T, uint8_t>::value) {
|
||||||
return static_cast<T>(std::nearbyint(static_cast<float>(sum) / n));
|
return static_cast<T>(std::nearbyint(static_cast<float>(sum) / n));
|
||||||
} else {
|
} else {
|
||||||
return sum / n;
|
return static_cast<T>(sum / n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,8 +21,8 @@ void adaptive_max_pool_1d(const T* arg, T* out, IT* indices, size_t h_in, size_t
|
|||||||
auto to = arg + adaptive_pool::window_end(i, h_in, h_out);
|
auto to = arg + adaptive_pool::window_end(i, h_in, h_out);
|
||||||
NGRAPH_CHECK(to - from != 0, "AdaptiveMaxPool elements == 0, must be non-zero");
|
NGRAPH_CHECK(to - from != 0, "AdaptiveMaxPool elements == 0, must be non-zero");
|
||||||
auto it = std::max_element(from, to);
|
auto it = std::max_element(from, to);
|
||||||
out[i] = *it;
|
out[i] = static_cast<T>(*it);
|
||||||
indices[i] = it - arg;
|
indices[i] = static_cast<IT>(it - arg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template <typename T, typename IT>
|
template <typename T, typename IT>
|
||||||
@ -41,8 +41,8 @@ void adaptive_max_pool_2d(const T* arg, T* out, IT* indices, size_t h_in, size_t
|
|||||||
auto it = std::max_element(from, to);
|
auto it = std::max_element(from, to);
|
||||||
result = *it > *result ? it : result;
|
result = *it > *result ? it : result;
|
||||||
}
|
}
|
||||||
out[i * w_out + j] = *result;
|
out[i * w_out + j] = static_cast<T>(*result);
|
||||||
indices[i * w_out + j] = result - arg;
|
indices[i * w_out + j] = static_cast<IT>(result - arg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -76,8 +76,8 @@ void adaptive_max_pool_3d(const T* arg,
|
|||||||
result = *it > *result ? it : result;
|
result = *it > *result ? it : result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out[i * h_out * w_out + j * w_out + k] = *result;
|
out[i * h_out * w_out + j * w_out + k] = static_cast<T>(*result);
|
||||||
indices[i * h_out * w_out + j * w_out + k] = result - arg;
|
indices[i * h_out * w_out + j * w_out + k] = static_cast<IT>(result - arg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -13,7 +13,7 @@ namespace reference {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void asin(const T* arg, T* out, size_t count) {
|
void asin(const T* arg, T* out, size_t count) {
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
out[i] = std::asin(arg[i]);
|
out[i] = static_cast<T>(std::asin(arg[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -20,7 +20,7 @@ void asinh(const T* arg, T* out, size_t count) {
|
|||||||
template <typename T, typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
|
template <typename T, typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
|
||||||
void asinh(const T* arg, T* out, size_t count) {
|
void asinh(const T* arg, T* out, size_t count) {
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
out[i] = std::roundl(std::asinh(arg[i]));
|
out[i] = static_cast<T>(std::roundl(std::asinh(arg[i])));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -13,14 +13,14 @@ namespace reference {
|
|||||||
template <typename T, typename std::enable_if<!std::is_integral<T>::value, bool>::type = true>
|
template <typename T, typename std::enable_if<!std::is_integral<T>::value, bool>::type = true>
|
||||||
void atan(const T* arg, T* out, size_t count) {
|
void atan(const T* arg, T* out, size_t count) {
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
out[i] = std::atan(arg[i]);
|
out[i] = static_cast<T>(std::atan(arg[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
|
template <typename T, typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
|
||||||
void atan(const T* arg, T* out, size_t count) {
|
void atan(const T* arg, T* out, size_t count) {
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
out[i] = std::roundl(std::atan(arg[i]));
|
out[i] = static_cast<T>(std::roundl(std::atan(arg[i])));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -29,7 +29,7 @@ void atanh(const T* arg, T* out, size_t count) {
|
|||||||
if (arg[i] > 0) {
|
if (arg[i] > 0) {
|
||||||
out[i] = std::numeric_limits<T>::max();
|
out[i] = std::numeric_limits<T>::max();
|
||||||
} else {
|
} else {
|
||||||
out[i] = std::roundl(std::atanh(arg[i]));
|
out[i] = static_cast<T>(std::roundl(std::atanh(arg[i])));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (arg[i] <= -1) {
|
if (arg[i] <= -1) {
|
||||||
@ -37,7 +37,7 @@ void atanh(const T* arg, T* out, size_t count) {
|
|||||||
} else if (arg[i] >= 1) {
|
} else if (arg[i] >= 1) {
|
||||||
out[i] = std::numeric_limits<T>::max();
|
out[i] = std::numeric_limits<T>::max();
|
||||||
} else {
|
} else {
|
||||||
out[i] = std::roundl(std::atanh(arg[i]));
|
out[i] = static_cast<T>(std::roundl(std::atanh(arg[i])));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -28,7 +28,7 @@ inline void row_major_strides(const Shape& shape, size_t* strides, size_t size)
|
|||||||
|
|
||||||
template <typename C, typename T>
|
template <typename C, typename T>
|
||||||
inline T value_with_padding_or(const C& arr, size_t padding, size_t idx, T&& default_value) {
|
inline T value_with_padding_or(const C& arr, size_t padding, size_t idx, T&& default_value) {
|
||||||
return idx < padding ? std::forward<T>(default_value) : arr[idx - padding];
|
return idx < padding ? std::forward<T>(default_value) : static_cast<T>(arr[idx - padding]);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int A0, int A1, typename T, typename U, typename Functor>
|
template <int A0, int A1, typename T, typename U, typename Functor>
|
||||||
@ -100,7 +100,7 @@ void autobroadcast_binop(const T* arg0,
|
|||||||
switch (broadcast_spec.m_type) {
|
switch (broadcast_spec.m_type) {
|
||||||
case op::AutoBroadcastType::NONE:
|
case op::AutoBroadcastType::NONE:
|
||||||
for (size_t i = 0; i < shape_size(arg0_shape); i++) {
|
for (size_t i = 0; i < shape_size(arg0_shape); i++) {
|
||||||
out[i] = elementwise_functor(arg0[i], arg1[i]);
|
out[i] = static_cast<U>(elementwise_functor(arg0[i], arg1[i]));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case op::AutoBroadcastType::NUMPY:
|
case op::AutoBroadcastType::NUMPY:
|
||||||
@ -155,23 +155,6 @@ void autobroadcast_binop(const T* arg0,
|
|||||||
if (dim0 != dim1)
|
if (dim0 != dim1)
|
||||||
axis = std::max(axis, i);
|
axis = std::max(axis, i);
|
||||||
}
|
}
|
||||||
#if 0
|
|
||||||
// Universal function without optimisations
|
|
||||||
CoordinateTransformBasic arg0_transform(arg0_shape);
|
|
||||||
CoordinateTransformBasic arg1_transform(arg1_shape);
|
|
||||||
U *dst = out;
|
|
||||||
|
|
||||||
for(CoordinateIterator it(output_shape),
|
|
||||||
ite = CoordinateIterator::end();
|
|
||||||
it != ite;
|
|
||||||
++it)
|
|
||||||
{
|
|
||||||
const Coordinate& output_coord = *it;
|
|
||||||
size_t const idx0 = arg0_transform.index(output_coord);
|
|
||||||
size_t const idx1 = arg1_transform.index(output_coord);
|
|
||||||
*dst++ = elementwise_functor(arg0[idx0], arg1[idx1]);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
|
|
||||||
if (axis == 0) {
|
if (axis == 0) {
|
||||||
for (size_t i = 0, end = strides0[0]; i < end; ++i)
|
for (size_t i = 0, end = strides0[0]; i < end; ++i)
|
||||||
@ -222,7 +205,6 @@ void autobroadcast_binop(const T* arg0,
|
|||||||
axis,
|
axis,
|
||||||
strides0[axis],
|
strides0[axis],
|
||||||
elementwise_functor);
|
elementwise_functor);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case op::AutoBroadcastType::PDPD:
|
case op::AutoBroadcastType::PDPD:
|
||||||
@ -401,13 +383,13 @@ void autobroadcast_select(const U* arg0,
|
|||||||
const Coordinate arg2_coord = reduce(output_coord, arg2_squeezed_axes, false);
|
const Coordinate arg2_coord = reduce(output_coord, arg2_squeezed_axes, false);
|
||||||
|
|
||||||
const size_t arg0_idx =
|
const size_t arg0_idx =
|
||||||
std::inner_product(arg0_coord.begin(), arg0_coord.end(), arg0_strides.begin(), 0);
|
std::inner_product(arg0_coord.begin(), arg0_coord.end(), arg0_strides.begin(), uint64_t(0));
|
||||||
const size_t arg1_idx =
|
const size_t arg1_idx =
|
||||||
std::inner_product(arg1_coord.begin(), arg1_coord.end(), arg1_strides.begin(), 0);
|
std::inner_product(arg1_coord.begin(), arg1_coord.end(), arg1_strides.begin(), uint64_t(0));
|
||||||
const size_t arg2_idx =
|
const size_t arg2_idx =
|
||||||
std::inner_product(arg2_coord.begin(), arg2_coord.end(), arg2_strides.begin(), 0);
|
std::inner_product(arg2_coord.begin(), arg2_coord.end(), arg2_strides.begin(), uint64_t(0));
|
||||||
const size_t output_idx =
|
const size_t output_idx =
|
||||||
std::inner_product(output_coord.begin(), output_coord.end(), output_strides.begin(), 0);
|
std::inner_product(output_coord.begin(), output_coord.end(), output_strides.begin(), uint64_t(0));
|
||||||
out[output_idx] = elementwise_functor(arg0[arg0_idx], arg1[arg1_idx], arg2[arg2_idx]);
|
out[output_idx] = elementwise_functor(arg0[arg0_idx], arg1[arg1_idx], arg2[arg2_idx]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -476,12 +458,14 @@ void autobroadcast_select(const U* arg0,
|
|||||||
const Coordinate arg0_coord = reduce(output_coord, arg0_squeezed_axes, false);
|
const Coordinate arg0_coord = reduce(output_coord, arg0_squeezed_axes, false);
|
||||||
const Coordinate arg2_coord = reduce(output_coord, arg2_squeezed_axes, false);
|
const Coordinate arg2_coord = reduce(output_coord, arg2_squeezed_axes, false);
|
||||||
|
|
||||||
const size_t arg0_idx = std::inner_product(arg0_coord.begin(), arg0_coord.end(), arg0_strides.begin(), 0);
|
const size_t arg0_idx =
|
||||||
|
std::inner_product(arg0_coord.begin(), arg0_coord.end(), arg0_strides.begin(), uint64_t(0));
|
||||||
const size_t arg1_idx =
|
const size_t arg1_idx =
|
||||||
std::inner_product(output_coord.begin(), output_coord.end(), output_strides.begin(), 0);
|
std::inner_product(output_coord.begin(), output_coord.end(), output_strides.begin(), uint64_t(0));
|
||||||
const size_t arg2_idx = std::inner_product(arg2_coord.begin(), arg2_coord.end(), arg2_strides.begin(), 0);
|
const size_t arg2_idx =
|
||||||
|
std::inner_product(arg2_coord.begin(), arg2_coord.end(), arg2_strides.begin(), uint64_t(0));
|
||||||
const size_t output_idx =
|
const size_t output_idx =
|
||||||
std::inner_product(output_coord.begin(), output_coord.end(), output_strides.begin(), 0);
|
std::inner_product(output_coord.begin(), output_coord.end(), output_strides.begin(), uint64_t(0));
|
||||||
|
|
||||||
out[output_idx] = elementwise_functor(arg0[arg0_idx], arg1[arg1_idx], arg2[arg2_idx]);
|
out[output_idx] = elementwise_functor(arg0[arg0_idx], arg1[arg1_idx], arg2[arg2_idx]);
|
||||||
}
|
}
|
||||||
|
@ -220,7 +220,7 @@ void avg_pool(const T* arg,
|
|||||||
out[output_transform.index(out_coord)] =
|
out[output_transform.index(out_coord)] =
|
||||||
static_cast<T>(std::nearbyint(static_cast<float>(result) / n_elements));
|
static_cast<T>(std::nearbyint(static_cast<float>(result) / n_elements));
|
||||||
} else {
|
} else {
|
||||||
out[output_transform.index(out_coord)] = result / n_elements;
|
out[output_transform.index(out_coord)] = result / static_cast<T>(n_elements);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
out[output_transform.index(out_coord)] = T{0};
|
out[output_transform.index(out_coord)] = T{0};
|
||||||
|
@ -29,15 +29,15 @@ void binary_convolve_3D_channels(const ConvolutionParams& p,
|
|||||||
T_IN*& out,
|
T_IN*& out,
|
||||||
const float pad_value) {
|
const float pad_value) {
|
||||||
const int n_bits = 8;
|
const int n_bits = 8;
|
||||||
const int input_size_z = batch_shape[1];
|
const int64_t input_size_z = batch_shape[1];
|
||||||
const int input_size_y = batch_shape[2];
|
const int64_t input_size_y = batch_shape[2];
|
||||||
const int input_size_x = batch_shape[3];
|
const int64_t input_size_x = batch_shape[3];
|
||||||
const int filter_size_z = filter_shape[1];
|
const int64_t filter_size_z = filter_shape[1];
|
||||||
const int filter_size_y = filter_shape[2];
|
const int64_t filter_size_y = filter_shape[2];
|
||||||
const int filter_size_x = filter_shape[3];
|
const int64_t filter_size_x = filter_shape[3];
|
||||||
const int dilated_filter_size_z = filter_size_z + (filter_size_z - 1) * (p.dilation[0] - 1);
|
const int64_t dilated_filter_size_z = filter_size_z + (filter_size_z - 1) * (p.dilation[0] - 1);
|
||||||
const int dilated_filter_size_y = filter_size_y + (filter_size_y - 1) * (p.dilation[1] - 1);
|
const int64_t dilated_filter_size_y = filter_size_y + (filter_size_y - 1) * (p.dilation[1] - 1);
|
||||||
const int dilated_filter_size_x = filter_size_x + (filter_size_x - 1) * (p.dilation[2] - 1);
|
const int64_t dilated_filter_size_x = filter_size_x + (filter_size_x - 1) * (p.dilation[2] - 1);
|
||||||
|
|
||||||
const Shape input_channel_shape(++batch_shape.begin(), batch_shape.end());
|
const Shape input_channel_shape(++batch_shape.begin(), batch_shape.end());
|
||||||
const size_t input_channel_size = shape_size(input_channel_shape);
|
const size_t input_channel_size = shape_size(input_channel_shape);
|
||||||
@ -45,11 +45,11 @@ void binary_convolve_3D_channels(const ConvolutionParams& p,
|
|||||||
const size_t filter_channel_size = shape_size(filter_channel_shape);
|
const size_t filter_channel_size = shape_size(filter_channel_shape);
|
||||||
const T_IN bit_count = static_cast<T_IN>(filter_channel_size);
|
const T_IN bit_count = static_cast<T_IN>(filter_channel_size);
|
||||||
|
|
||||||
for (int i_z = -p.pads_begin[0]; i_z <= (p.pads_end[0] + input_size_z - dilated_filter_size_z);
|
for (int64_t i_z = -p.pads_begin[0]; i_z <= (p.pads_end[0] + input_size_z - dilated_filter_size_z);
|
||||||
i_z += p.strides[0]) {
|
i_z += p.strides[0]) {
|
||||||
for (int i_y = -p.pads_begin[1]; i_y <= (p.pads_end[1] + input_size_y - dilated_filter_size_y);
|
for (int64_t i_y = -p.pads_begin[1]; i_y <= (p.pads_end[1] + input_size_y - dilated_filter_size_y);
|
||||||
i_y += p.strides[1]) {
|
i_y += p.strides[1]) {
|
||||||
for (int i_x = -p.pads_begin[2]; i_x <= (p.pads_end[2] + input_size_x - dilated_filter_size_x);
|
for (int64_t i_x = -p.pads_begin[2]; i_x <= (p.pads_end[2] + input_size_x - dilated_filter_size_x);
|
||||||
i_x += p.strides[2]) {
|
i_x += p.strides[2]) {
|
||||||
auto input_channel = batch;
|
auto input_channel = batch;
|
||||||
size_t filter_channels_count = filter_shape[0];
|
size_t filter_channels_count = filter_shape[0];
|
||||||
@ -57,23 +57,24 @@ void binary_convolve_3D_channels(const ConvolutionParams& p,
|
|||||||
T_IN sum = 0;
|
T_IN sum = 0;
|
||||||
while (filter_channels_count--) {
|
while (filter_channels_count--) {
|
||||||
T_IN popcount = 0;
|
T_IN popcount = 0;
|
||||||
for (int f_z = 0; f_z < filter_size_z; ++f_z) {
|
for (int64_t f_z = 0; f_z < filter_size_z; ++f_z) {
|
||||||
for (int f_y = 0; f_y < filter_size_y; ++f_y) {
|
for (int64_t f_y = 0; f_y < filter_size_y; ++f_y) {
|
||||||
for (int f_x = 0; f_x < filter_size_x; ++f_x) {
|
for (int64_t f_x = 0; f_x < filter_size_x; ++f_x) {
|
||||||
int rel_i_z = i_z + (f_z * p.dilation[0]);
|
int64_t rel_i_z = i_z + (f_z * p.dilation[0]);
|
||||||
int rel_i_y = i_y + (f_y * p.dilation[1]);
|
int64_t rel_i_y = i_y + (f_y * p.dilation[1]);
|
||||||
int rel_i_x = i_x + (f_x * p.dilation[2]);
|
int64_t rel_i_x = i_x + (f_x * p.dilation[2]);
|
||||||
|
|
||||||
bool padding =
|
bool padding =
|
||||||
!(in_range(rel_i_x, {0, input_size_x}) && in_range(rel_i_y, {0, input_size_y}) &&
|
!(in_range(rel_i_x, {0, input_size_x}) && in_range(rel_i_y, {0, input_size_y}) &&
|
||||||
in_range(rel_i_z, {0, input_size_z}));
|
in_range(rel_i_z, {0, input_size_z}));
|
||||||
int i_buf_idx =
|
int64_t i_buf_idx =
|
||||||
(rel_i_z * input_size_y * input_size_x) + (rel_i_y * input_size_x) + rel_i_x;
|
(rel_i_z * input_size_y * input_size_x) + (rel_i_y * input_size_x) + rel_i_x;
|
||||||
|
|
||||||
T_IN in_val = padding ? static_cast<T_IN>(pad_value)
|
T_IN in_val = padding ? static_cast<T_IN>(pad_value)
|
||||||
: static_cast<T_IN>(input_channel[i_buf_idx]);
|
: static_cast<T_IN>(input_channel[i_buf_idx]);
|
||||||
|
|
||||||
int f_buf_idx = (f_z * filter_size_y * filter_size_x) + (f_y * filter_size_x) + f_x;
|
int f_buf_idx = static_cast<int>((f_z * filter_size_y * filter_size_x) +
|
||||||
|
(f_y * filter_size_x) + f_x);
|
||||||
|
|
||||||
int f_byte_idx = (f_buf_idx + filter_count) / n_bits;
|
int f_byte_idx = (f_buf_idx + filter_count) / n_bits;
|
||||||
int bit_idx = (n_bits - 1) - ((f_buf_idx + filter_count) % n_bits);
|
int bit_idx = (n_bits - 1) - ((f_buf_idx + filter_count) % n_bits);
|
||||||
@ -86,7 +87,7 @@ void binary_convolve_3D_channels(const ConvolutionParams& p,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
input_channel += input_channel_size;
|
input_channel += input_channel_size;
|
||||||
filter_count += filter_channel_size;
|
filter_count += static_cast<int>(filter_channel_size);
|
||||||
sum += (2 * popcount - bit_count);
|
sum += (2 * popcount - bit_count);
|
||||||
}
|
}
|
||||||
*out = sum;
|
*out = sum;
|
||||||
|
@ -13,7 +13,7 @@ namespace reference {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void ceiling(const T* arg, T* out, size_t count) {
|
void ceiling(const T* arg, T* out, size_t count) {
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
out[i] = std::ceil(arg[i]);
|
out[i] = static_cast<T>(std::ceil(arg[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -31,11 +31,11 @@ constexpr size_t out_channel_axis = 1;
|
|||||||
constexpr size_t spatial_axis = 2;
|
constexpr size_t spatial_axis = 2;
|
||||||
|
|
||||||
struct ConvolutionParams {
|
struct ConvolutionParams {
|
||||||
std::vector<int> strides;
|
std::vector<int64_t> strides;
|
||||||
std::vector<int> dilation;
|
std::vector<int64_t> dilation;
|
||||||
std::vector<int> pads_begin;
|
std::vector<int64_t> pads_begin;
|
||||||
std::vector<int> pads_end;
|
std::vector<int64_t> pads_end;
|
||||||
std::vector<int> output_padding;
|
std::vector<int64_t> output_padding;
|
||||||
|
|
||||||
ConvolutionParams(const Strides& strides_,
|
ConvolutionParams(const Strides& strides_,
|
||||||
const Strides& dilation_,
|
const Strides& dilation_,
|
||||||
@ -61,30 +61,30 @@ void convolve_3D_channels(const ConvolutionParams& p,
|
|||||||
const T* filter,
|
const T* filter,
|
||||||
const Shape& filter_shape,
|
const Shape& filter_shape,
|
||||||
T*& out) {
|
T*& out) {
|
||||||
const int input_size_z = batch_shape[1];
|
const int input_size_z = static_cast<int>(batch_shape[1]);
|
||||||
const int input_size_y = batch_shape[2];
|
const int input_size_y = static_cast<int>(batch_shape[2]);
|
||||||
const int input_size_x = batch_shape[3];
|
const int input_size_x = static_cast<int>(batch_shape[3]);
|
||||||
const int filter_size_z = filter_shape[1];
|
const int filter_size_z = static_cast<int>(filter_shape[1]);
|
||||||
const int filter_size_y = filter_shape[2];
|
const int filter_size_y = static_cast<int>(filter_shape[2]);
|
||||||
const int filter_size_x = filter_shape[3];
|
const int filter_size_x = static_cast<int>(filter_shape[3]);
|
||||||
const int dilated_filter_size_z = filter_size_z + (filter_size_z - 1) * (p.dilation[0] - 1);
|
const int dilated_filter_size_z = static_cast<int>(filter_size_z + (filter_size_z - 1) * (p.dilation[0] - 1));
|
||||||
const int dilated_filter_size_y = filter_size_y + (filter_size_y - 1) * (p.dilation[1] - 1);
|
const int dilated_filter_size_y = static_cast<int>(filter_size_y + (filter_size_y - 1) * (p.dilation[1] - 1));
|
||||||
const int dilated_filter_size_x = filter_size_x + (filter_size_x - 1) * (p.dilation[2] - 1);
|
const int dilated_filter_size_x = static_cast<int>(filter_size_x + (filter_size_x - 1) * (p.dilation[2] - 1));
|
||||||
|
|
||||||
const Shape input_channel_shape(++batch_shape.begin(), batch_shape.end());
|
const Shape input_channel_shape(++batch_shape.begin(), batch_shape.end());
|
||||||
const size_t input_channel_size = shape_size(input_channel_shape);
|
const size_t input_channel_size = shape_size(input_channel_shape);
|
||||||
const Shape filter_channel_shape(++filter_shape.begin(), filter_shape.end());
|
const Shape filter_channel_shape(++filter_shape.begin(), filter_shape.end());
|
||||||
const size_t filter_channel_size = shape_size(filter_channel_shape);
|
const size_t filter_channel_size = shape_size(filter_channel_shape);
|
||||||
|
|
||||||
for (int i_z = -p.pads_begin[0];
|
for (int i_z = static_cast<int>(-p.pads_begin[0]);
|
||||||
i_z <= (p.pads_end[0] + input_size_z - dilated_filter_size_z + p.output_padding[0]);
|
i_z <= static_cast<int>(p.pads_end[0] + input_size_z - dilated_filter_size_z + p.output_padding[0]);
|
||||||
i_z += p.strides[0]) {
|
i_z += static_cast<int>(p.strides[0])) {
|
||||||
for (int i_y = -p.pads_begin[1];
|
for (int i_y = static_cast<int>(-p.pads_begin[1]);
|
||||||
i_y <= (p.pads_end[1] + input_size_y - dilated_filter_size_y + p.output_padding[1]);
|
i_y <= static_cast<int>(p.pads_end[1] + input_size_y - dilated_filter_size_y + p.output_padding[1]);
|
||||||
i_y += p.strides[1]) {
|
i_y += static_cast<int>(p.strides[1])) {
|
||||||
for (int i_x = -p.pads_begin[2];
|
for (int i_x = static_cast<int>(-p.pads_begin[2]);
|
||||||
i_x <= (p.pads_end[2] + input_size_x - dilated_filter_size_x + p.output_padding[2]);
|
i_x <= static_cast<int>(p.pads_end[2] + input_size_x - dilated_filter_size_x + p.output_padding[2]);
|
||||||
i_x += p.strides[2]) {
|
i_x += static_cast<int>(p.strides[2])) {
|
||||||
auto input_channel = batch;
|
auto input_channel = batch;
|
||||||
auto filter_channel = filter;
|
auto filter_channel = filter;
|
||||||
T sum = 0;
|
T sum = 0;
|
||||||
@ -93,9 +93,9 @@ void convolve_3D_channels(const ConvolutionParams& p,
|
|||||||
for (int f_z = 0; f_z < filter_size_z; ++f_z) {
|
for (int f_z = 0; f_z < filter_size_z; ++f_z) {
|
||||||
for (int f_y = 0; f_y < filter_size_y; ++f_y) {
|
for (int f_y = 0; f_y < filter_size_y; ++f_y) {
|
||||||
for (int f_x = 0; f_x < filter_size_x; ++f_x) {
|
for (int f_x = 0; f_x < filter_size_x; ++f_x) {
|
||||||
int rel_i_z = i_z + (f_z * p.dilation[0]);
|
int rel_i_z = i_z + (f_z * static_cast<int>(p.dilation[0]));
|
||||||
int rel_i_y = i_y + (f_y * p.dilation[1]);
|
int rel_i_y = i_y + (f_y * static_cast<int>(p.dilation[1]));
|
||||||
int rel_i_x = i_x + (f_x * p.dilation[2]);
|
int rel_i_x = i_x + (f_x * static_cast<int>(p.dilation[2]));
|
||||||
|
|
||||||
bool padding =
|
bool padding =
|
||||||
!(in_range(rel_i_x, {0, input_size_x}) && in_range(rel_i_y, {0, input_size_y}) &&
|
!(in_range(rel_i_x, {0, input_size_x}) && in_range(rel_i_y, {0, input_size_y}) &&
|
||||||
@ -122,7 +122,7 @@ void convolve_3D_channels(const ConvolutionParams& p,
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline void extend_to_3D(ConvolutionParams& p, Shape& in_shape, Shape& filter_shape) {
|
inline void extend_to_3D(ConvolutionParams& p, Shape& in_shape, Shape& filter_shape) {
|
||||||
int spatial_rank = in_shape.size() - 2;
|
int spatial_rank = static_cast<int>(in_shape.size() - 2);
|
||||||
if (spatial_rank < 3) {
|
if (spatial_rank < 3) {
|
||||||
int missing_dims = 3 - spatial_rank;
|
int missing_dims = 3 - spatial_rank;
|
||||||
p.dilation.insert(std::prev(p.dilation.end(), spatial_rank), missing_dims, 1);
|
p.dilation.insert(std::prev(p.dilation.end(), spatial_rank), missing_dims, 1);
|
||||||
|
@ -25,9 +25,9 @@ void extend_with_zeros(const Strides& strides,
|
|||||||
const T* in,
|
const T* in,
|
||||||
Shape& output_shape,
|
Shape& output_shape,
|
||||||
std::vector<T>& input_zeros) {
|
std::vector<T>& input_zeros) {
|
||||||
std::vector<int> input_3d(3, 1);
|
std::vector<size_t> input_3d(3, 1);
|
||||||
std::vector<int> strides_3d(3, 1);
|
std::vector<size_t> strides_3d(3, 1);
|
||||||
std::vector<int> output_3d(3, 1);
|
std::vector<size_t> output_3d(3, 1);
|
||||||
|
|
||||||
for (size_t i = 0; i < strides.size(); ++i) {
|
for (size_t i = 0; i < strides.size(); ++i) {
|
||||||
output_shape[i + 2] = input_shape[i + 2] + (strides[i] - 1) * (input_shape[i + 2] - 1);
|
output_shape[i + 2] = input_shape[i + 2] + (strides[i] - 1) * (input_shape[i + 2] - 1);
|
||||||
@ -84,9 +84,11 @@ void infer_forward_convbackprop_output_shape(const Shape& in_spatial_shape,
|
|||||||
const Strides& dilations,
|
const Strides& dilations,
|
||||||
const CoordinateDiff& output_padding) {
|
const CoordinateDiff& output_padding) {
|
||||||
for (size_t idx = 0; idx < in_spatial_shape.size(); idx++) {
|
for (size_t idx = 0; idx < in_spatial_shape.size(); idx++) {
|
||||||
int total_padding = strides[idx] * (in_spatial_shape[idx] - 1) + dilations[idx] * (f_spatial_shape[idx] - 1) +
|
// FIXME: Incorrect logic with negative pad
|
||||||
1 - out_spatial_shape[idx] + output_padding[idx];
|
int total_padding =
|
||||||
size_t padded_dim = std::max<size_t>(total_padding, 0);
|
static_cast<int>(strides[idx] * (in_spatial_shape[idx] - 1) + dilations[idx] * (f_spatial_shape[idx] - 1) +
|
||||||
|
1 - out_spatial_shape[idx] + output_padding[idx]);
|
||||||
|
size_t padded_dim = std::max<size_t>(static_cast<size_t>(total_padding), static_cast<size_t>(0));
|
||||||
size_t filter_dilated_dim = dilations[idx] * (f_spatial_shape[idx] - 1) + 1;
|
size_t filter_dilated_dim = dilations[idx] * (f_spatial_shape[idx] - 1) + 1;
|
||||||
size_t out_spatial_dim =
|
size_t out_spatial_dim =
|
||||||
(in_spatial_shape[idx] - 1) * strides[idx] + filter_dilated_dim - padded_dim + output_padding[idx];
|
(in_spatial_shape[idx] - 1) * strides[idx] + filter_dilated_dim - padded_dim + output_padding[idx];
|
||||||
@ -186,7 +188,7 @@ void convolution_backprop_impl(const T* in,
|
|||||||
// convert output shape to 3D, contains only dimensions
|
// convert output shape to 3D, contains only dimensions
|
||||||
Shape out_shape_3d{out_shape.begin() + 2, out_shape.end()};
|
Shape out_shape_3d{out_shape.begin() + 2, out_shape.end()};
|
||||||
|
|
||||||
int out_shape_rank = out_shape.size() - 2;
|
int out_shape_rank = static_cast<int>(out_shape.size()) - 2;
|
||||||
if (out_shape_rank < 3) {
|
if (out_shape_rank < 3) {
|
||||||
int missing_dims = 3 - out_shape_rank;
|
int missing_dims = 3 - out_shape_rank;
|
||||||
out_shape_3d.insert(std::prev(out_shape_3d.end(), out_shape_rank), missing_dims, 1);
|
out_shape_3d.insert(std::prev(out_shape_3d.end(), out_shape_rank), missing_dims, 1);
|
||||||
@ -299,7 +301,7 @@ void convolution_backprop_in(const T* delta_in,
|
|||||||
|
|
||||||
// extend stride and filter inputs with zero padding for stride and filter_dilation
|
// extend stride and filter inputs with zero padding for stride and filter_dilation
|
||||||
// > 1, after that set stride and filter params to 1.
|
// > 1, after that set stride and filter params to 1.
|
||||||
const size_t stride_dim = std::accumulate(stride.begin(), stride.end(), 1, std::multiplies<size_t>());
|
const size_t stride_dim = std::accumulate(stride.begin(), stride.end(), int64_t(1), std::multiplies<int64_t>());
|
||||||
if (stride_dim >= 2) {
|
if (stride_dim >= 2) {
|
||||||
extend_with_zeros(stride, in_shape, delta_in, conv_input_shape, extended_input);
|
extend_with_zeros(stride, in_shape, delta_in, conv_input_shape, extended_input);
|
||||||
std::fill(conv_stride.begin(), conv_stride.end(), 1);
|
std::fill(conv_stride.begin(), conv_stride.end(), 1);
|
||||||
@ -307,7 +309,7 @@ void convolution_backprop_in(const T* delta_in,
|
|||||||
}
|
}
|
||||||
|
|
||||||
const size_t dilation_dim =
|
const size_t dilation_dim =
|
||||||
std::accumulate(filter_dilation.begin(), filter_dilation.end(), 1, std::multiplies<size_t>());
|
std::accumulate(filter_dilation.begin(), filter_dilation.end(), uint64_t(1), std::multiplies<size_t>());
|
||||||
if (dilation_dim >= 2) {
|
if (dilation_dim >= 2) {
|
||||||
extend_with_zeros<T>(filter_dilation,
|
extend_with_zeros<T>(filter_dilation,
|
||||||
filter_shape,
|
filter_shape,
|
||||||
|
@ -20,7 +20,7 @@ void cos(const T* arg, T* out, size_t count) {
|
|||||||
template <typename T, typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
|
template <typename T, typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
|
||||||
void cos(const T* arg, T* out, size_t count) {
|
void cos(const T* arg, T* out, size_t count) {
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
out[i] = std::roundl(std::cos(arg[i]));
|
out[i] = static_cast<T>(std::roundl(std::cos(arg[i])));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -13,14 +13,14 @@ namespace reference {
|
|||||||
template <typename T, typename std::enable_if<!std::is_integral<T>::value, bool>::type = true>
|
template <typename T, typename std::enable_if<!std::is_integral<T>::value, bool>::type = true>
|
||||||
void cosh(const T* arg, T* out, size_t count) {
|
void cosh(const T* arg, T* out, size_t count) {
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
out[i] = std::cosh(arg[i]);
|
out[i] = static_cast<T>(std::cosh(arg[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
|
template <typename T, typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
|
||||||
void cosh(const T* arg, T* out, size_t count) {
|
void cosh(const T* arg, T* out, size_t count) {
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
out[i] = std::roundl(std::cosh(arg[i]));
|
out[i] = static_cast<T>(std::roundl(std::cosh(arg[i])));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -49,7 +49,7 @@ void ctc_greedy_decoder(const T* data,
|
|||||||
|
|
||||||
auto class_index = data + data_index;
|
auto class_index = data + data_index;
|
||||||
auto class_max_element = std::max_element(class_index, class_index + class_count);
|
auto class_max_element = std::max_element(class_index, class_index + class_count);
|
||||||
T max_class_ind = std::distance(class_index, class_max_element);
|
T max_class_ind = static_cast<T>(std::distance(class_index, class_max_element));
|
||||||
|
|
||||||
if (!(previous_class_index == max_class_ind && ctc_merge_repeated) &&
|
if (!(previous_class_index == max_class_ind && ctc_merge_repeated) &&
|
||||||
static_cast<uint64_t>(max_class_ind) < blank_index) {
|
static_cast<uint64_t>(max_class_ind) < blank_index) {
|
||||||
|
@ -24,7 +24,7 @@ void ctc_greedy_decoder_seq_len(const TF* data,
|
|||||||
const auto batch_size = data_shape[0];
|
const auto batch_size = data_shape[0];
|
||||||
const auto seq_len_max = data_shape[1];
|
const auto seq_len_max = data_shape[1];
|
||||||
const auto class_count = data_shape[2];
|
const auto class_count = data_shape[2];
|
||||||
std::fill_n(out1, shape_size(out_shape), -1);
|
std::fill_n(out1, shape_size(out_shape), TCI(-1));
|
||||||
|
|
||||||
for (std::size_t batch_ind = 0; batch_ind < batch_size; ++batch_ind) {
|
for (std::size_t batch_ind = 0; batch_ind < batch_size; ++batch_ind) {
|
||||||
TI previous_class_index = static_cast<TI>(-1);
|
TI previous_class_index = static_cast<TI>(-1);
|
||||||
@ -36,11 +36,11 @@ void ctc_greedy_decoder_seq_len(const TF* data,
|
|||||||
auto class_max_element = std::max_element(class_index, class_index + class_count);
|
auto class_max_element = std::max_element(class_index, class_index + class_count);
|
||||||
const auto max_class_ind = std::distance(class_index, class_max_element);
|
const auto max_class_ind = std::distance(class_index, class_max_element);
|
||||||
if (max_class_ind != blank_index[0] && !(ctc_merge_repeated && previous_class_index == max_class_ind)) {
|
if (max_class_ind != blank_index[0] && !(ctc_merge_repeated && previous_class_index == max_class_ind)) {
|
||||||
out1[out_index++] = max_class_ind;
|
out1[out_index++] = static_cast<TCI>(max_class_ind);
|
||||||
}
|
}
|
||||||
previous_class_index = max_class_ind;
|
previous_class_index = static_cast<TI>(max_class_ind);
|
||||||
}
|
}
|
||||||
out2[batch_ind] = out_index - batch_ind * seq_len_max;
|
out2[batch_ind] = static_cast<TSL>(out_index - batch_ind * seq_len_max);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -25,7 +25,7 @@ void CTCLoss(const T* logits,
|
|||||||
const size_t batchNum = logitsShape[0];
|
const size_t batchNum = logitsShape[0];
|
||||||
const size_t maxTime = logitsShape[1];
|
const size_t maxTime = logitsShape[1];
|
||||||
const size_t classesNum = logitsShape[2];
|
const size_t classesNum = logitsShape[2];
|
||||||
U blankIndex = classesNum - 1;
|
U blankIndex = static_cast<U>(classesNum - 1);
|
||||||
if (blankIndexP != nullptr) {
|
if (blankIndexP != nullptr) {
|
||||||
blankIndex = blankIndexP[0];
|
blankIndex = blankIndexP[0];
|
||||||
}
|
}
|
||||||
@ -105,9 +105,10 @@ void CTCLoss(const T* logits,
|
|||||||
res = prevLogProb;
|
res = prevLogProb;
|
||||||
} else if (prevLogProb != -type_inf) {
|
} else if (prevLogProb != -type_inf) {
|
||||||
if (res > prevLogProb)
|
if (res > prevLogProb)
|
||||||
res = res + std::log1pf(std::exp(prevLogProb - res));
|
res = res + static_cast<T>(std::log1pf(static_cast<float>(std::exp(prevLogProb - res))));
|
||||||
else
|
else
|
||||||
res = prevLogProb + std::log1pf(std::exp(res - prevLogProb));
|
res = prevLogProb +
|
||||||
|
static_cast<T>(std::log1pf(static_cast<float>(std::exp(res - prevLogProb))));
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -84,19 +84,19 @@ inline float bilinear_interpolation(const inputType* data,
|
|||||||
|
|
||||||
float value11 = 0;
|
float value11 = 0;
|
||||||
if (y1 >= 0 && x1 >= 0)
|
if (y1 >= 0 && x1 >= 0)
|
||||||
value11 = data[y1 * x_size + x1];
|
value11 = static_cast<float>(data[y1 * x_size + x1]);
|
||||||
|
|
||||||
float value21 = 0;
|
float value21 = 0;
|
||||||
if (y1 >= 0 && x2 < x_size)
|
if (y1 >= 0 && x2 < x_size)
|
||||||
value21 = data[y1 * x_size + x2];
|
value21 = static_cast<float>(data[y1 * x_size + x2]);
|
||||||
|
|
||||||
float value12 = 0;
|
float value12 = 0;
|
||||||
if (y2 < y_size && x1 >= 0)
|
if (y2 < y_size && x1 >= 0)
|
||||||
value12 = data[y2 * x_size + x1];
|
value12 = static_cast<float>(data[y2 * x_size + x1]);
|
||||||
|
|
||||||
float value22 = 0;
|
float value22 = 0;
|
||||||
if (y2 < y_size && x2 < x_size)
|
if (y2 < y_size && x2 < x_size)
|
||||||
value22 = data[y2 * x_size + x2];
|
value22 = static_cast<float>(data[y2 * x_size + x2]);
|
||||||
|
|
||||||
const float value = (1 - distX) * (1 - distY) * value11 + (1 - distX) * distY * value12 +
|
const float value = (1 - distX) * (1 - distY) * value11 + (1 - distX) * distY * value12 +
|
||||||
distX * (1 - distY) * value21 + distX * distY * value22;
|
distX * (1 - distY) * value21 + distX * distY * value22;
|
||||||
@ -118,26 +118,28 @@ void convolve_2D_channels(const ConvolutionParams& p,
|
|||||||
int64_t groups,
|
int64_t groups,
|
||||||
int64_t deformable_groups,
|
int64_t deformable_groups,
|
||||||
bool bilinear_interpolation_pad) {
|
bool bilinear_interpolation_pad) {
|
||||||
const int input_size_y = batch_shape[1];
|
const int input_size_y = static_cast<int>(batch_shape[1]);
|
||||||
const int input_size_x = batch_shape[2];
|
const int input_size_x = static_cast<int>(batch_shape[2]);
|
||||||
const int filter_size_y = filter_shape[1];
|
const int filter_size_y = static_cast<int>(filter_shape[1]);
|
||||||
const int filter_size_x = filter_shape[2];
|
const int filter_size_x = static_cast<int>(filter_shape[2]);
|
||||||
const int dilated_filter_size_y = filter_size_y + (filter_size_y - 1) * (p.dilation[0] - 1);
|
const int dilated_filter_size_y = filter_size_y + (filter_size_y - 1) * (static_cast<int>(p.dilation[0]) - 1);
|
||||||
const int dilated_filter_size_x = filter_size_x + (filter_size_x - 1) * (p.dilation[1] - 1);
|
const int dilated_filter_size_x = filter_size_x + (filter_size_x - 1) * (static_cast<int>(p.dilation[1]) - 1);
|
||||||
|
|
||||||
const int input_channel_size = shape_size(shape_reduce(batch_shape));
|
const int input_channel_size = static_cast<int>(shape_size(shape_reduce(batch_shape)));
|
||||||
const int filter_channel_size = shape_size(shape_reduce(filter_shape));
|
const int filter_channel_size = static_cast<int>(shape_size(shape_reduce(filter_shape)));
|
||||||
const int offsets_size = shape_size(offset_shape);
|
const int offsets_size = static_cast<int>(shape_size(offset_shape));
|
||||||
const int offsets_spatial_size = shape_size(shape_reduce(offset_shape));
|
const int offsets_spatial_size = static_cast<int>(shape_size(shape_reduce(offset_shape)));
|
||||||
const int filter_channels_count = filter_shape[0];
|
const int filter_channels_count = static_cast<int>(filter_shape[0]);
|
||||||
const int mask_size = shape_size(mask_shape);
|
const int mask_size = static_cast<int>(shape_size(mask_shape));
|
||||||
const int mask_spatial_size = shape_size(shape_reduce(mask_shape));
|
const int mask_spatial_size = static_cast<int>(shape_size(shape_reduce(mask_shape)));
|
||||||
|
|
||||||
int out_idx = 0;
|
int out_idx = 0;
|
||||||
for (int i_y = -p.pads_begin[0]; i_y <= (p.pads_end[0] + input_size_y - dilated_filter_size_y);
|
for (int i_y = static_cast<int>(-p.pads_begin[0]);
|
||||||
i_y += p.strides[0]) {
|
i_y <= static_cast<int>(p.pads_end[0] + input_size_y - dilated_filter_size_y);
|
||||||
for (int i_x = -p.pads_begin[1]; i_x <= (p.pads_end[1] + input_size_x - dilated_filter_size_x);
|
i_y += static_cast<int>(p.strides[0])) {
|
||||||
i_x += p.strides[1]) {
|
for (int i_x = static_cast<int>(-p.pads_begin[1]);
|
||||||
|
i_x <= static_cast<int>(p.pads_end[1] + input_size_x - dilated_filter_size_x);
|
||||||
|
i_x += static_cast<int>(p.strides[1])) {
|
||||||
auto input_channel = batch;
|
auto input_channel = batch;
|
||||||
auto filter_channel = filter;
|
auto filter_channel = filter;
|
||||||
T sum = 0;
|
T sum = 0;
|
||||||
@ -151,15 +153,16 @@ void convolve_2D_channels(const ConvolutionParams& p,
|
|||||||
f_buf_idx * 2 * offsets_spatial_size + out_idx];
|
f_buf_idx * 2 * offsets_spatial_size + out_idx];
|
||||||
T x_offset = offsets[deformable_group_idx * offsets_size +
|
T x_offset = offsets[deformable_group_idx * offsets_size +
|
||||||
(f_buf_idx * 2 + 1) * offsets_spatial_size + out_idx];
|
(f_buf_idx * 2 + 1) * offsets_spatial_size + out_idx];
|
||||||
T rel_i_y = i_y + (f_y * p.dilation[0]) + y_offset;
|
T rel_i_y = static_cast<T>(i_y + (f_y * p.dilation[0]) + y_offset);
|
||||||
T rel_i_x = i_x + (f_x * p.dilation[1]) + x_offset;
|
T rel_i_x = static_cast<T>(i_x + (f_x * p.dilation[1]) + x_offset);
|
||||||
|
|
||||||
bool padding;
|
bool padding;
|
||||||
if (bilinear_interpolation_pad) {
|
if (bilinear_interpolation_pad) {
|
||||||
padding = !((static_cast<int>(rel_i_x) > -1 && static_cast<int>(rel_i_x) < input_size_x) &&
|
padding = !((static_cast<int>(rel_i_x) > -1 && static_cast<int>(rel_i_x) < input_size_x) &&
|
||||||
(static_cast<int>(rel_i_y) > -1 && static_cast<int>(rel_i_y) < input_size_y));
|
(static_cast<int>(rel_i_y) > -1 && static_cast<int>(rel_i_y) < input_size_y));
|
||||||
} else {
|
} else {
|
||||||
padding = !(in_range(rel_i_x, {0, input_size_x}) && in_range(rel_i_y, {0, input_size_y}));
|
padding = !(in_range(rel_i_x, {T(0), T(input_size_x)}) &&
|
||||||
|
in_range(rel_i_y, {T(0), T(input_size_y)}));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (padding)
|
if (padding)
|
||||||
@ -167,12 +170,12 @@ void convolve_2D_channels(const ConvolutionParams& p,
|
|||||||
|
|
||||||
T mask_scalar =
|
T mask_scalar =
|
||||||
mask[deformable_group_idx * mask_size + f_buf_idx * mask_spatial_size + out_idx];
|
mask[deformable_group_idx * mask_size + f_buf_idx * mask_spatial_size + out_idx];
|
||||||
sum += bilinear_interpolation(input_channel,
|
sum += static_cast<T>(bilinear_interpolation(input_channel,
|
||||||
rel_i_x,
|
static_cast<float>(rel_i_x),
|
||||||
rel_i_y,
|
static_cast<float>(rel_i_y),
|
||||||
input_size_x,
|
input_size_x,
|
||||||
input_size_y,
|
input_size_y,
|
||||||
bilinear_interpolation_pad) *
|
bilinear_interpolation_pad)) *
|
||||||
filter_channel[f_buf_idx] * mask_scalar;
|
filter_channel[f_buf_idx] * mask_scalar;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -53,7 +53,7 @@ void deformable_psroi_pooling(const T* data_input,
|
|||||||
const T* roi = rois_input + roi_idx * roi_attrs_count;
|
const T* roi = rois_input + roi_idx * roi_attrs_count;
|
||||||
|
|
||||||
// Index of the corresponding input batch
|
// Index of the corresponding input batch
|
||||||
int64_t roi_batch_id = roi[0];
|
int64_t roi_batch_id = static_cast<int64_t>(roi[0]);
|
||||||
if (roi_batch_id < 0)
|
if (roi_batch_id < 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
@ -104,12 +104,12 @@ void deformable_psroi_pooling(const T* data_input,
|
|||||||
T x_offset_value = offsets_input[x_offset_idx];
|
T x_offset_value = offsets_input[x_offset_idx];
|
||||||
T y_offset_value = offsets_input[y_offset_idx];
|
T y_offset_value = offsets_input[y_offset_idx];
|
||||||
|
|
||||||
x_offset_value *= trans_std;
|
x_offset_value *= static_cast<T>(trans_std);
|
||||||
y_offset_value *= trans_std;
|
y_offset_value *= static_cast<T>(trans_std);
|
||||||
|
|
||||||
// Move bin position by normalized offset values
|
// Move bin position by normalized offset values
|
||||||
bin_x1_idx += (x_offset_value * roi_width);
|
bin_x1_idx += static_cast<float>(x_offset_value) * roi_width;
|
||||||
bin_y1_idx += (y_offset_value * roi_height);
|
bin_y1_idx += static_cast<float>(y_offset_value) * roi_height;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Each bin is divided into sub-bins
|
// Each bin is divided into sub-bins
|
||||||
@ -150,11 +150,13 @@ void deformable_psroi_pooling(const T* data_input,
|
|||||||
const float delta_left_x = std::fabs(sub_bin_x1_idx - left_x);
|
const float delta_left_x = std::fabs(sub_bin_x1_idx - left_x);
|
||||||
const float delta_top_y = std::fabs(sub_bin_y1_idx - top_y);
|
const float delta_top_y = std::fabs(sub_bin_y1_idx - top_y);
|
||||||
|
|
||||||
const T top_interp = top_left_sample + (top_right_sample - top_left_sample) * delta_left_x;
|
const T top_interp =
|
||||||
const T bottom_interp =
|
top_left_sample + (top_right_sample - top_left_sample) * static_cast<T>(delta_left_x);
|
||||||
bottom_left_sample + (bottom_right_sample - bottom_left_sample) * delta_left_x;
|
const T bottom_interp = bottom_left_sample + (bottom_right_sample - bottom_left_sample) *
|
||||||
|
static_cast<T>(delta_left_x);
|
||||||
|
|
||||||
const T sub_bin_value = top_interp + (bottom_interp - top_interp) * delta_top_y;
|
const T sub_bin_value =
|
||||||
|
top_interp + (bottom_interp - top_interp) * static_cast<T>(delta_top_y);
|
||||||
|
|
||||||
legit_sub_bin_count++;
|
legit_sub_bin_count++;
|
||||||
sub_bins_val_sum += sub_bin_value;
|
sub_bins_val_sum += sub_bin_value;
|
||||||
@ -162,7 +164,7 @@ void deformable_psroi_pooling(const T* data_input,
|
|||||||
}
|
}
|
||||||
// Calculate average of sub_bin values for single ROI bin
|
// Calculate average of sub_bin values for single ROI bin
|
||||||
if (legit_sub_bin_count != 0) {
|
if (legit_sub_bin_count != 0) {
|
||||||
output[out_value_idx] = sub_bins_val_sum / legit_sub_bin_count;
|
output[out_value_idx] = sub_bins_val_sum / static_cast<T>(legit_sub_bin_count);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -47,7 +47,7 @@ private:
|
|||||||
for (size_t p = 0; p < numPriors; ++p) {
|
for (size_t p = 0; p < numPriors; ++p) {
|
||||||
size_t startIdx = p * numLocClasses * 4;
|
size_t startIdx = p * numLocClasses * 4;
|
||||||
for (size_t c = 0; c < numLocClasses; ++c) {
|
for (size_t c = 0; c < numLocClasses; ++c) {
|
||||||
int label = attrs.share_location ? -1 : c;
|
int label = attrs.share_location ? -1 : static_cast<int>(c);
|
||||||
if (labelBbox.find(label) == labelBbox.end()) {
|
if (labelBbox.find(label) == labelBbox.end()) {
|
||||||
labelBbox[label].resize(numPriors);
|
labelBbox[label].resize(numPriors);
|
||||||
}
|
}
|
||||||
@ -66,7 +66,7 @@ private:
|
|||||||
for (size_t i = 0; i < numImages; ++i) {
|
for (size_t i = 0; i < numImages; ++i) {
|
||||||
std::map<int, std::vector<dataType>>& labelScores = confPreds[i];
|
std::map<int, std::vector<dataType>>& labelScores = confPreds[i];
|
||||||
for (size_t p = 0; p < numPriors; ++p) {
|
for (size_t p = 0; p < numPriors; ++p) {
|
||||||
int startIdx = p * numClasses;
|
size_t startIdx = p * numClasses;
|
||||||
for (int c = 0; c < numClasses; ++c) {
|
for (int c = 0; c < numClasses; ++c) {
|
||||||
labelScores[c].push_back(confData[startIdx + c]);
|
labelScores[c].push_back(confData[startIdx + c]);
|
||||||
}
|
}
|
||||||
@ -82,7 +82,7 @@ private:
|
|||||||
for (size_t i = 0; i < numImages; ++i) {
|
for (size_t i = 0; i < numImages; ++i) {
|
||||||
std::map<int, std::vector<dataType>>& labelScores = confPreds[i];
|
std::map<int, std::vector<dataType>>& labelScores = confPreds[i];
|
||||||
for (size_t p = 0; p < numPriors; ++p) {
|
for (size_t p = 0; p < numPriors; ++p) {
|
||||||
int startIdx = p * numClasses;
|
size_t startIdx = p * numClasses;
|
||||||
if (armConfData[p * 2 + 1] < attrs.objectness_score) {
|
if (armConfData[p * 2 + 1] < attrs.objectness_score) {
|
||||||
for (int c = 0; c < numClasses; ++c) {
|
for (int c = 0; c < numClasses; ++c) {
|
||||||
c == attrs.background_label_id ? labelScores[c].push_back(1) : labelScores[c].push_back(0);
|
c == attrs.background_label_id ? labelScores[c].push_back(1) : labelScores[c].push_back(0);
|
||||||
@ -113,12 +113,13 @@ private:
|
|||||||
std::vector<std::vector<std::vector<dataType>>>& priorVariances) {
|
std::vector<std::vector<std::vector<dataType>>>& priorVariances) {
|
||||||
priorBboxes.resize(priorsBatchSize);
|
priorBboxes.resize(priorsBatchSize);
|
||||||
priorVariances.resize(priorsBatchSize);
|
priorVariances.resize(priorsBatchSize);
|
||||||
int off = attrs.variance_encoded_in_target ? (numPriors * priorSize) : (2 * numPriors * priorSize);
|
int off =
|
||||||
|
static_cast<int>(attrs.variance_encoded_in_target ? (numPriors * priorSize) : (2 * numPriors * priorSize));
|
||||||
for (size_t n = 0; n < priorsBatchSize; n++) {
|
for (size_t n = 0; n < priorsBatchSize; n++) {
|
||||||
std::vector<NormalizedBBox>& currPrBbox = priorBboxes[n];
|
std::vector<NormalizedBBox>& currPrBbox = priorBboxes[n];
|
||||||
std::vector<std::vector<dataType>>& currPrVar = priorVariances[n];
|
std::vector<std::vector<dataType>>& currPrVar = priorVariances[n];
|
||||||
for (size_t i = 0; i < numPriors; ++i) {
|
for (size_t i = 0; i < numPriors; ++i) {
|
||||||
int start_idx = i * priorSize;
|
size_t start_idx = i * priorSize;
|
||||||
NormalizedBBox bbox;
|
NormalizedBBox bbox;
|
||||||
bbox.xmin = priorData[start_idx + 0 + offset];
|
bbox.xmin = priorData[start_idx + 0 + offset];
|
||||||
bbox.ymin = priorData[start_idx + 1 + offset];
|
bbox.ymin = priorData[start_idx + 1 + offset];
|
||||||
@ -129,7 +130,7 @@ private:
|
|||||||
if (!attrs.variance_encoded_in_target) {
|
if (!attrs.variance_encoded_in_target) {
|
||||||
const dataType* priorVar = priorData + numPriors * priorSize;
|
const dataType* priorVar = priorData + numPriors * priorSize;
|
||||||
for (size_t i = 0; i < numPriors; ++i) {
|
for (size_t i = 0; i < numPriors; ++i) {
|
||||||
int start_idx = i * 4;
|
size_t start_idx = i * 4;
|
||||||
std::vector<dataType> var(4);
|
std::vector<dataType> var(4);
|
||||||
for (int j = 0; j < 4; ++j) {
|
for (int j = 0; j < 4; ++j) {
|
||||||
var[j] = (priorVar[start_idx + j]);
|
var[j] = (priorVar[start_idx + j]);
|
||||||
@ -151,10 +152,10 @@ private:
|
|||||||
dataType priorYmax = priorBboxes.ymax;
|
dataType priorYmax = priorBboxes.ymax;
|
||||||
|
|
||||||
if (!attrs.normalized) {
|
if (!attrs.normalized) {
|
||||||
priorXmin /= attrs.input_width;
|
priorXmin /= static_cast<dataType>(attrs.input_width);
|
||||||
priorYmin /= attrs.input_height;
|
priorYmin /= static_cast<dataType>(attrs.input_height);
|
||||||
priorXmax /= attrs.input_width;
|
priorXmax /= static_cast<dataType>(attrs.input_width);
|
||||||
priorYmax /= attrs.input_height;
|
priorYmax /= static_cast<dataType>(attrs.input_height);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (attrs.code_type == "caffe.PriorBoxParameter.CORNER") {
|
if (attrs.code_type == "caffe.PriorBoxParameter.CORNER") {
|
||||||
@ -171,8 +172,8 @@ private:
|
|||||||
dataType decodeBboxWidth, decodeBboxHeight;
|
dataType decodeBboxWidth, decodeBboxHeight;
|
||||||
decodeBboxCenterX = priorVariances[0] * bbox.xmin * priorWidth + priorCenterX;
|
decodeBboxCenterX = priorVariances[0] * bbox.xmin * priorWidth + priorCenterX;
|
||||||
decodeBboxCenterY = priorVariances[1] * bbox.ymin * priorHeight + priorCenterY;
|
decodeBboxCenterY = priorVariances[1] * bbox.ymin * priorHeight + priorCenterY;
|
||||||
decodeBboxWidth = std::exp(priorVariances[2] * bbox.xmax) * priorWidth;
|
decodeBboxWidth = static_cast<dataType>(std::exp(priorVariances[2] * bbox.xmax)) * priorWidth;
|
||||||
decodeBboxHeight = std::exp(priorVariances[3] * bbox.ymax) * priorHeight;
|
decodeBboxHeight = static_cast<dataType>(std::exp(priorVariances[3] * bbox.ymax)) * priorHeight;
|
||||||
decodeBbox.xmin = decodeBboxCenterX - decodeBboxWidth / 2;
|
decodeBbox.xmin = decodeBboxCenterX - decodeBboxWidth / 2;
|
||||||
decodeBbox.ymin = decodeBboxCenterY - decodeBboxHeight / 2;
|
decodeBbox.ymin = decodeBboxCenterY - decodeBboxHeight / 2;
|
||||||
decodeBbox.xmax = decodeBboxCenterX + decodeBboxWidth / 2;
|
decodeBbox.xmax = decodeBboxCenterX + decodeBboxWidth / 2;
|
||||||
@ -187,10 +188,10 @@ private:
|
|||||||
dataType priorYmax = priorBboxes.ymax;
|
dataType priorYmax = priorBboxes.ymax;
|
||||||
|
|
||||||
if (!attrs.normalized) {
|
if (!attrs.normalized) {
|
||||||
priorXmin /= attrs.input_width;
|
priorXmin /= static_cast<dataType>(attrs.input_width);
|
||||||
priorYmin /= attrs.input_height;
|
priorYmin /= static_cast<dataType>(attrs.input_height);
|
||||||
priorXmax /= attrs.input_width;
|
priorXmax /= static_cast<dataType>(attrs.input_width);
|
||||||
priorYmax /= attrs.input_height;
|
priorYmax /= static_cast<dataType>(attrs.input_height);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (attrs.code_type == "caffe.PriorBoxParameter.CORNER") {
|
if (attrs.code_type == "caffe.PriorBoxParameter.CORNER") {
|
||||||
@ -207,8 +208,8 @@ private:
|
|||||||
dataType decodeBboxWidth, decodeBboxHeight;
|
dataType decodeBboxWidth, decodeBboxHeight;
|
||||||
decodeBboxCenterX = bbox.xmin * priorWidth + priorCenterX;
|
decodeBboxCenterX = bbox.xmin * priorWidth + priorCenterX;
|
||||||
decodeBboxCenterY = bbox.ymin * priorHeight + priorCenterY;
|
decodeBboxCenterY = bbox.ymin * priorHeight + priorCenterY;
|
||||||
decodeBboxWidth = std::exp(bbox.xmax) * priorWidth;
|
decodeBboxWidth = static_cast<dataType>(std::exp(bbox.xmax)) * priorWidth;
|
||||||
decodeBboxHeight = std::exp(bbox.ymax) * priorHeight;
|
decodeBboxHeight = static_cast<dataType>(std::exp(bbox.ymax)) * priorHeight;
|
||||||
decodeBbox.xmin = decodeBboxCenterX - decodeBboxWidth / 2;
|
decodeBbox.xmin = decodeBboxCenterX - decodeBboxWidth / 2;
|
||||||
decodeBbox.ymin = decodeBboxCenterY - decodeBboxHeight / 2;
|
decodeBbox.ymin = decodeBboxCenterY - decodeBboxHeight / 2;
|
||||||
decodeBbox.xmax = decodeBboxCenterX + decodeBboxWidth / 2;
|
decodeBbox.xmax = decodeBboxCenterX + decodeBboxWidth / 2;
|
||||||
@ -220,8 +221,8 @@ private:
|
|||||||
const std::vector<std::vector<dataType>>& priorVariances,
|
const std::vector<std::vector<dataType>>& priorVariances,
|
||||||
const std::vector<NormalizedBBox>& labelLocPreds,
|
const std::vector<NormalizedBBox>& labelLocPreds,
|
||||||
std::vector<NormalizedBBox>& decodeBboxes) {
|
std::vector<NormalizedBBox>& decodeBboxes) {
|
||||||
int numBboxes = priorBboxes.size();
|
size_t numBboxes = priorBboxes.size();
|
||||||
for (int i = 0; i < numBboxes; ++i) {
|
for (size_t i = 0; i < numBboxes; ++i) {
|
||||||
NormalizedBBox decodeBbox;
|
NormalizedBBox decodeBbox;
|
||||||
|
|
||||||
if (attrs.variance_encoded_in_target) {
|
if (attrs.variance_encoded_in_target) {
|
||||||
@ -246,14 +247,14 @@ private:
|
|||||||
decodeBboxes.resize(numImages);
|
decodeBboxes.resize(numImages);
|
||||||
for (size_t i = 0; i < numImages; ++i) {
|
for (size_t i = 0; i < numImages; ++i) {
|
||||||
LabelBBox& decodeBboxesImage = decodeBboxes[i];
|
LabelBBox& decodeBboxesImage = decodeBboxes[i];
|
||||||
int pboxIdx = i;
|
int pboxIdx = static_cast<int>(i);
|
||||||
if (priorBboxes.size() == 1) {
|
if (priorBboxes.size() == 1) {
|
||||||
pboxIdx = 0;
|
pboxIdx = 0;
|
||||||
}
|
}
|
||||||
const std::vector<NormalizedBBox>& currPrBbox = priorBboxes[pboxIdx];
|
const std::vector<NormalizedBBox>& currPrBbox = priorBboxes[pboxIdx];
|
||||||
const std::vector<std::vector<dataType>>& currPrVar = priorVariances[pboxIdx];
|
const std::vector<std::vector<dataType>>& currPrVar = priorVariances[pboxIdx];
|
||||||
for (size_t c = 0; c < numLocClasses; ++c) {
|
for (size_t c = 0; c < numLocClasses; ++c) {
|
||||||
int label = attrs.share_location ? -1 : c;
|
int label = attrs.share_location ? -1 : static_cast<int>(c);
|
||||||
if (attrs.background_label_id > -1 && label == attrs.background_label_id) {
|
if (attrs.background_label_id > -1 && label == attrs.background_label_id) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -274,7 +275,7 @@ private:
|
|||||||
const std::vector<NormalizedBBox>& currPrBbox = priorBboxes[i];
|
const std::vector<NormalizedBBox>& currPrBbox = priorBboxes[i];
|
||||||
const std::vector<std::vector<dataType>>& currPrVar = priorVariances[i];
|
const std::vector<std::vector<dataType>>& currPrVar = priorVariances[i];
|
||||||
for (size_t c = 0; c < numLocClasses; ++c) {
|
for (size_t c = 0; c < numLocClasses; ++c) {
|
||||||
int label = attrs.share_location ? -1 : c;
|
int label = attrs.share_location ? -1 : static_cast<int>(c);
|
||||||
if (attrs.background_label_id > -1 && label == attrs.background_label_id) {
|
if (attrs.background_label_id > -1 && label == attrs.background_label_id) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -298,7 +299,7 @@ private:
|
|||||||
std::vector<std::pair<dataType, int>>& scoreIndexVec) {
|
std::vector<std::pair<dataType, int>>& scoreIndexVec) {
|
||||||
for (size_t i = 0; i < scores.size(); ++i) {
|
for (size_t i = 0; i < scores.size(); ++i) {
|
||||||
if (scores[i] > threshold) {
|
if (scores[i] > threshold) {
|
||||||
scoreIndexVec.push_back(std::make_pair(scores[i], i));
|
scoreIndexVec.push_back(std::make_pair(scores[i], static_cast<int>(i)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -336,7 +337,7 @@ private:
|
|||||||
dataType bbox2_size = BBoxSize(bbox2);
|
dataType bbox2_size = BBoxSize(bbox2);
|
||||||
return intersect_size / (bbox1_size + bbox2_size - intersect_size);
|
return intersect_size / (bbox1_size + bbox2_size - intersect_size);
|
||||||
} else {
|
} else {
|
||||||
return 0.0f;
|
return static_cast<dataType>(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -344,7 +345,7 @@ private:
|
|||||||
const std::vector<dataType>& scores,
|
const std::vector<dataType>& scores,
|
||||||
std::vector<int>& indices) {
|
std::vector<int>& indices) {
|
||||||
std::vector<std::pair<dataType, int>> scoreIndexVec;
|
std::vector<std::pair<dataType, int>> scoreIndexVec;
|
||||||
GetMaxScoreIndex(scores, attrs.confidence_threshold, attrs.top_k, scoreIndexVec);
|
GetMaxScoreIndex(scores, static_cast<dataType>(attrs.confidence_threshold), attrs.top_k, scoreIndexVec);
|
||||||
while (scoreIndexVec.size() != 0) {
|
while (scoreIndexVec.size() != 0) {
|
||||||
const int idx = scoreIndexVec.front().second;
|
const int idx = scoreIndexVec.front().second;
|
||||||
bool keep = true;
|
bool keep = true;
|
||||||
@ -381,7 +382,7 @@ private:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (id > 0 && conf >= attrs.confidence_threshold) {
|
if (id > 0 && conf >= attrs.confidence_threshold) {
|
||||||
scoreIndexPairs.push_back(std::make_pair(conf, std::make_pair(id, p)));
|
scoreIndexPairs.push_back(std::make_pair(conf, std::make_pair(id, static_cast<int>(p))));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::sort(
|
std::sort(
|
||||||
@ -502,13 +503,13 @@ public:
|
|||||||
continue;
|
continue;
|
||||||
const std::vector<NormalizedBBox>& bboxes = decodeBboxesImage.find(label)->second;
|
const std::vector<NormalizedBBox>& bboxes = decodeBboxesImage.find(label)->second;
|
||||||
caffeNMS(bboxes, scores, indices[c]);
|
caffeNMS(bboxes, scores, indices[c]);
|
||||||
numDet += indices[c].size();
|
numDet += static_cast<int>(indices[c].size());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// MXNet style
|
// MXNet style
|
||||||
mxNetNms(decodeBboxesImage, confScores, indices);
|
mxNetNms(decodeBboxesImage, confScores, indices);
|
||||||
for (auto it = indices.begin(); it != indices.end(); it++)
|
for (auto it = indices.begin(); it != indices.end(); it++)
|
||||||
numDet += it->second.size();
|
numDet += static_cast<int>(it->second.size());
|
||||||
}
|
}
|
||||||
if (attrs.keep_top_k[0] > -1 && numDet > attrs.keep_top_k[0]) {
|
if (attrs.keep_top_k[0] > -1 && numDet > attrs.keep_top_k[0]) {
|
||||||
std::vector<std::pair<dataType, std::pair<int, int>>> scoreIndexPairs;
|
std::vector<std::pair<dataType, std::pair<int, int>>> scoreIndexPairs;
|
||||||
@ -559,8 +560,8 @@ public:
|
|||||||
std::vector<int>& indices = it->second;
|
std::vector<int>& indices = it->second;
|
||||||
for (size_t j = 0; j < indices.size(); ++j) {
|
for (size_t j = 0; j < indices.size(); ++j) {
|
||||||
int idx = indices[j];
|
int idx = indices[j];
|
||||||
result[count * 7 + 0] = i;
|
result[count * 7 + 0] = static_cast<dataType>(i);
|
||||||
result[count * 7 + 1] = attrs.decrease_label_id ? (label - 1) : label;
|
result[count * 7 + 1] = static_cast<dataType>(attrs.decrease_label_id ? (label - 1) : label);
|
||||||
result[count * 7 + 2] = scores[idx];
|
result[count * 7 + 2] = scores[idx];
|
||||||
const NormalizedBBox& bbox = bboxes[idx];
|
const NormalizedBBox& bbox = bboxes[idx];
|
||||||
|
|
||||||
|
@ -36,8 +36,8 @@ void equal(const T* arg0,
|
|||||||
const Shape& arg0_shape,
|
const Shape& arg0_shape,
|
||||||
const Shape& arg1_shape,
|
const Shape& arg1_shape,
|
||||||
const op::AutoBroadcastSpec& broadcast_spec) {
|
const op::AutoBroadcastSpec& broadcast_spec) {
|
||||||
autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
|
autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> U {
|
||||||
return x == y;
|
return static_cast<U>(x == y);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -17,14 +17,14 @@ namespace reference {
|
|||||||
template <typename T, typename std::enable_if<!std::is_integral<T>::value, bool>::type = true>
|
template <typename T, typename std::enable_if<!std::is_integral<T>::value, bool>::type = true>
|
||||||
void erf(const T* arg, T* out, size_t count) {
|
void erf(const T* arg, T* out, size_t count) {
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
out[i] = std::erf(arg[i]);
|
out[i] = static_cast<T>(std::erf(arg[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
|
template <typename T, typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
|
||||||
void erf(const T* arg, T* out, size_t count) {
|
void erf(const T* arg, T* out, size_t count) {
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
out[i] = std::round(std::erf(arg[i]));
|
out[i] = static_cast<T>(std::round(std::erf(arg[i])));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -13,7 +13,7 @@ namespace reference {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void exp(const T* arg, T* out, size_t count) {
|
void exp(const T* arg, T* out, size_t count) {
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
out[i] = std::exp(arg[i]);
|
out[i] = static_cast<T>(std::exp(arg[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -48,10 +48,10 @@ void experimental_detectron_prior_grid_generator(const T* priors,
|
|||||||
for (int64_t h = 0; h < layer_height; ++h) {
|
for (int64_t h = 0; h < layer_height; ++h) {
|
||||||
for (int64_t w = 0; w < layer_width; ++w) {
|
for (int64_t w = 0; w < layer_width; ++w) {
|
||||||
for (int64_t s = 0; s < num_priors; ++s) {
|
for (int64_t s = 0; s < num_priors; ++s) {
|
||||||
output_rois[0] = priors[4 * s + 0] + step_w * (w + 0.5f);
|
output_rois[0] = static_cast<T>(priors[4 * s + 0] + step_w * (w + 0.5f));
|
||||||
output_rois[1] = priors[4 * s + 1] + step_h * (h + 0.5f);
|
output_rois[1] = static_cast<T>(priors[4 * s + 1] + step_h * (h + 0.5f));
|
||||||
output_rois[2] = priors[4 * s + 2] + step_w * (w + 0.5f);
|
output_rois[2] = static_cast<T>(priors[4 * s + 2] + step_w * (w + 0.5f));
|
||||||
output_rois[3] = priors[4 * s + 3] + step_h * (h + 0.5f);
|
output_rois[3] = static_cast<T>(priors[4 * s + 3] + step_h * (h + 0.5f));
|
||||||
output_rois += 4;
|
output_rois += 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -49,8 +49,8 @@ void extract_image_patches(const std::shared_ptr<op::ExtractImagePatches> extImg
|
|||||||
int64_t PL = 0, PT = 0;
|
int64_t PL = 0, PT = 0;
|
||||||
|
|
||||||
if (auto_pad != op::PadType::VALID) {
|
if (auto_pad != op::PadType::VALID) {
|
||||||
int64_t PW = (std::ceil(1.f * IW / SW) - 1) * SW + iwStep - IW;
|
int64_t PW = static_cast<int64_t>(std::ceil(1.f * IW / SW) - 1) * SW + iwStep - IW;
|
||||||
int64_t PH = (std::ceil(1.f * IH / SH) - 1) * SH + ihStep - IH;
|
int64_t PH = static_cast<int64_t>(std::ceil(1.f * IH / SH) - 1) * SH + ihStep - IH;
|
||||||
|
|
||||||
if ((PW > 0) && (PW < iwStep)) {
|
if ((PW > 0) && (PW < iwStep)) {
|
||||||
if (PW % 2 == 1) {
|
if (PW % 2 == 1) {
|
||||||
|
@ -22,7 +22,7 @@ namespace fake_quantize_details {
|
|||||||
inline std::vector<size_t> calc_broadcast_index_offset(const std::vector<size_t>& memory_offsets,
|
inline std::vector<size_t> calc_broadcast_index_offset(const std::vector<size_t>& memory_offsets,
|
||||||
const std::vector<size_t>& broadcast_shape) {
|
const std::vector<size_t>& broadcast_shape) {
|
||||||
std::vector<size_t> broadcast_offsets(broadcast_shape.size(), 0);
|
std::vector<size_t> broadcast_offsets(broadcast_shape.size(), 0);
|
||||||
for (int i = broadcast_shape.size() - 2; i >= 0; --i) {
|
for (int i = static_cast<int>(broadcast_shape.size()) - 2; i >= 0; --i) {
|
||||||
if (broadcast_shape[i] == 1) {
|
if (broadcast_shape[i] == 1) {
|
||||||
broadcast_offsets[i] = memory_offsets[i];
|
broadcast_offsets[i] = memory_offsets[i];
|
||||||
}
|
}
|
||||||
@ -34,7 +34,7 @@ inline std::vector<size_t> calc_broadcast_index_offset(const std::vector<size_t>
|
|||||||
broadcast_offsets[broadcast_offsets.size() - 1] = 1;
|
broadcast_offsets[broadcast_offsets.size() - 1] = 1;
|
||||||
}
|
}
|
||||||
if (broadcast_shape.back() == 1) {
|
if (broadcast_shape.back() == 1) {
|
||||||
for (int i = broadcast_shape.size() - 1; i >= 0; --i) {
|
for (int i = static_cast<int>(broadcast_shape.size()) - 1; i >= 0; --i) {
|
||||||
if (broadcast_shape[i] != 1) {
|
if (broadcast_shape[i] != 1) {
|
||||||
broadcast_offsets[i] = memory_offsets[i] - 1;
|
broadcast_offsets[i] = memory_offsets[i] - 1;
|
||||||
break;
|
break;
|
||||||
@ -45,7 +45,7 @@ inline std::vector<size_t> calc_broadcast_index_offset(const std::vector<size_t>
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline size_t calc_full_broadcast_offset(const std::vector<size_t>& current_dims, const std::vector<size_t>& offsets) {
|
inline size_t calc_full_broadcast_offset(const std::vector<size_t>& current_dims, const std::vector<size_t>& offsets) {
|
||||||
return std::inner_product(begin(current_dims), end(current_dims), begin(offsets), 0);
|
return std::inner_product(begin(current_dims), end(current_dims), begin(offsets), uint64_t(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline Shape align_shape_sizes(const Shape& shape, const Shape& target_shape, const op::AutoBroadcastSpec& broadcast) {
|
inline Shape align_shape_sizes(const Shape& shape, const Shape& target_shape, const op::AutoBroadcastSpec& broadcast) {
|
||||||
@ -147,8 +147,9 @@ inline T quantize(const T& arg,
|
|||||||
} else if (arg > std::max(in_low, in_high)) {
|
} else if (arg > std::max(in_low, in_high)) {
|
||||||
return out_high;
|
return out_high;
|
||||||
}
|
}
|
||||||
return std::nearbyint((arg - in_low) / (in_high - in_low) * (levels - 1)) / (levels - 1) * (out_high - out_low) +
|
return static_cast<T>(std::nearbyint((arg - in_low) / (in_high - in_low) * (levels - 1)) / (levels - 1) *
|
||||||
out_low;
|
(out_high - out_low) +
|
||||||
|
out_low);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace fake_quantize_details
|
} // namespace fake_quantize_details
|
||||||
|
@ -24,7 +24,7 @@ void floor_mod(const T* arg0,
|
|||||||
// Cast to double is needed for integer input,
|
// Cast to double is needed for integer input,
|
||||||
// otherwise std::floor will act like std::trunc
|
// otherwise std::floor will act like std::trunc
|
||||||
const double divisor = static_cast<double>(y);
|
const double divisor = static_cast<double>(y);
|
||||||
return x - y * std::floor(x / divisor);
|
return static_cast<T>(x - y * std::floor(x / divisor));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -15,14 +15,14 @@ template <typename T>
|
|||||||
void gelu(const T* arg, T* out, op::GeluApproximationMode mode, size_t count) {
|
void gelu(const T* arg, T* out, op::GeluApproximationMode mode, size_t count) {
|
||||||
if (mode == op::GeluApproximationMode::ERF) {
|
if (mode == op::GeluApproximationMode::ERF) {
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
out[i] = 0.5 * arg[i] * (1 + erf(arg[i] / std::sqrt(2.0)));
|
out[i] = static_cast<T>((0.5 * arg[i] * (1 + erf(arg[i] / std::sqrt(2.0)))));
|
||||||
}
|
}
|
||||||
} else if (mode == op::GeluApproximationMode::TANH) {
|
} else if (mode == op::GeluApproximationMode::TANH) {
|
||||||
const auto pi = atan(1.0) * 4.0;
|
const auto pi = atan(1.0) * 4.0;
|
||||||
const auto sqpi = std::sqrt(2.0 / pi);
|
const auto sqpi = std::sqrt(2.0 / pi);
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
auto& x = arg[i];
|
auto& x = arg[i];
|
||||||
out[i] = 0.5 * x * (1.0 + std::tanh(sqpi * (x + 0.044715 * std::pow(x, 3))));
|
out[i] = static_cast<T>(0.5 * x * (1.0 + std::tanh(sqpi * (x + 0.044715 * std::pow(x, 3)))));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -31,8 +31,8 @@ void greater(const T* arg0,
|
|||||||
const Shape& arg0_shape,
|
const Shape& arg0_shape,
|
||||||
const Shape& arg1_shape,
|
const Shape& arg1_shape,
|
||||||
const op::AutoBroadcastSpec& broadcast_spec) {
|
const op::AutoBroadcastSpec& broadcast_spec) {
|
||||||
autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
|
autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> U {
|
||||||
return x > y;
|
return static_cast<U>(x > y);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -31,8 +31,8 @@ void greater_eq(const T* arg0,
|
|||||||
const Shape& arg0_shape,
|
const Shape& arg0_shape,
|
||||||
const Shape& arg1_shape,
|
const Shape& arg1_shape,
|
||||||
const op::AutoBroadcastSpec& broadcast_spec) {
|
const op::AutoBroadcastSpec& broadcast_spec) {
|
||||||
autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
|
autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> U {
|
||||||
return x >= y;
|
return static_cast<U>(x >= y);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -13,7 +13,7 @@ namespace reference {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void hard_sigmoid(const T* arg, const T alpha, const T beta, T* out, size_t count) {
|
void hard_sigmoid(const T* arg, const T alpha, const T beta, T* out, size_t count) {
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
out[i] = std::max<T>(0.0f, std::min<T>(1.0f, alpha * arg[i] + beta));
|
out[i] = std::max<T>(T(0), std::min<T>(T(1), alpha * arg[i] + beta));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -484,7 +484,7 @@ void InterpolateEval<T>::linear_onnx_func(const T* input_data, T* out) {
|
|||||||
for (int64_t i = 0; i < points_in_neighbor; ++i) {
|
for (int64_t i = 0; i < points_in_neighbor; ++i) {
|
||||||
int64_t offset = 0;
|
int64_t offset = 0;
|
||||||
for (int64_t j = 0; j < spatial_rank; ++j) {
|
for (int64_t j = 0; j < spatial_rank; ++j) {
|
||||||
if (i & (1 << (spatial_rank - 1 - j))) {
|
if (i & (static_cast<int64_t>(1) << (spatial_rank - 1 - j))) {
|
||||||
offset += in1[j] * input_index_multipliers[j];
|
offset += in1[j] * input_index_multipliers[j];
|
||||||
} else {
|
} else {
|
||||||
offset += in2[j] * input_index_multipliers[j];
|
offset += in2[j] * input_index_multipliers[j];
|
||||||
@ -498,9 +498,9 @@ void InterpolateEval<T>::linear_onnx_func(const T* input_data, T* out) {
|
|||||||
for (int64_t i = 0; i < points_in_neighbor; ++i) {
|
for (int64_t i = 0; i < points_in_neighbor; ++i) {
|
||||||
float coeff = 1.0f;
|
float coeff = 1.0f;
|
||||||
for (int64_t j = 0; j < spatial_rank; ++j) {
|
for (int64_t j = 0; j < spatial_rank; ++j) {
|
||||||
coeff *= (i & (1 << (spatial_rank - 1 - j))) ? d1[j] : d2[j];
|
coeff *= (i & (static_cast<int64_t>(1) << (spatial_rank - 1 - j))) ? d1[j] : d2[j];
|
||||||
}
|
}
|
||||||
sum += coeff * values_of_input_points[points_in_neighbor - 1 - i];
|
sum += coeff * static_cast<float>(values_of_input_points[points_in_neighbor - 1 - i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 6. Store result.
|
// 6. Store result.
|
||||||
@ -533,7 +533,7 @@ void InterpolateEval<T>::cubic_func(const T* input_data, T* out) {
|
|||||||
int64_t in_coord_int = static_cast<int64_t>(std::floor(in_coord));
|
int64_t in_coord_int = static_cast<int64_t>(std::floor(in_coord));
|
||||||
base_coords[axis] = in_coord_int;
|
base_coords[axis] = in_coord_int;
|
||||||
auto s = static_cast<float>(in_coord - in_coord_int);
|
auto s = static_cast<float>(in_coord - in_coord_int);
|
||||||
cubic_coeffs[axis] = helper.get_cubic_coeff(s, m_cube_coeff);
|
cubic_coeffs[axis] = helper.get_cubic_coeff(s, static_cast<float>(m_cube_coeff));
|
||||||
}
|
}
|
||||||
|
|
||||||
float summa = 0.0f;
|
float summa = 0.0f;
|
||||||
@ -553,7 +553,7 @@ void InterpolateEval<T>::cubic_func(const T* input_data, T* out) {
|
|||||||
coeffs_prod *= cubic_coeffs[axis][idx[i]];
|
coeffs_prod *= cubic_coeffs[axis][idx[i]];
|
||||||
}
|
}
|
||||||
|
|
||||||
summa += coeffs_prod * input_data[input_transform.index(coords_for_sum)];
|
summa += coeffs_prod * static_cast<float>(input_data[input_transform.index(coords_for_sum)]);
|
||||||
}
|
}
|
||||||
|
|
||||||
out[output_transform.index(output_coord)] = static_cast<T>(summa);
|
out[output_transform.index(output_coord)] = static_cast<T>(summa);
|
||||||
|
@ -31,8 +31,8 @@ void less(const T* arg0,
|
|||||||
const Shape& arg0_shape,
|
const Shape& arg0_shape,
|
||||||
const Shape& arg1_shape,
|
const Shape& arg1_shape,
|
||||||
const op::AutoBroadcastSpec& broadcast_spec) {
|
const op::AutoBroadcastSpec& broadcast_spec) {
|
||||||
autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
|
autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> U {
|
||||||
return x < y;
|
return static_cast<U>(x < y);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -31,8 +31,8 @@ void less_eq(const T* arg0,
|
|||||||
const Shape& arg0_shape,
|
const Shape& arg0_shape,
|
||||||
const Shape& arg1_shape,
|
const Shape& arg1_shape,
|
||||||
const op::AutoBroadcastSpec& broadcast_spec) {
|
const op::AutoBroadcastSpec& broadcast_spec) {
|
||||||
autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
|
autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> U {
|
||||||
return x <= y;
|
return static_cast<U>(x <= y);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -13,7 +13,7 @@ namespace reference {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void log(const T* arg, T* out, size_t count) {
|
void log(const T* arg, T* out, size_t count) {
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
out[i] = std::log(arg[i]);
|
out[i] = static_cast<T>(std::log(arg[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -29,15 +29,16 @@ void log_softmax(const T* arg, T* out, const Shape& shape, const AxisSet& axes)
|
|||||||
for (const Coordinate& coord : transform) {
|
for (const Coordinate& coord : transform) {
|
||||||
Coordinate temp_coord = reduce(coord, axes, true);
|
Coordinate temp_coord = reduce(coord, axes, true);
|
||||||
out[transform.index(coord)] =
|
out[transform.index(coord)] =
|
||||||
std::exp(arg[transform.index(coord)] - temp_max[temp_transform.index(temp_coord)]);
|
static_cast<T>(std::exp(arg[transform.index(coord)] - temp_max[temp_transform.index(temp_coord)]));
|
||||||
}
|
}
|
||||||
|
|
||||||
sum(out, temp_sum.data(), shape, axes);
|
sum(out, temp_sum.data(), shape, axes);
|
||||||
|
|
||||||
for (const Coordinate& coord : transform) {
|
for (const Coordinate& coord : transform) {
|
||||||
Coordinate temp_coord = reduce(coord, axes, true);
|
Coordinate temp_coord = reduce(coord, axes, true);
|
||||||
out[transform.index(coord)] = (arg[transform.index(coord)] - temp_max[temp_transform.index(temp_coord)]) -
|
out[transform.index(coord)] =
|
||||||
std::log(temp_sum[temp_transform.index(temp_coord)]);
|
static_cast<T>((arg[transform.index(coord)] - temp_max[temp_transform.index(temp_coord)]) -
|
||||||
|
std::log(temp_sum[temp_transform.index(temp_coord)]));
|
||||||
}
|
}
|
||||||
NGRAPH_SUPPRESS_DEPRECATED_END
|
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||||
}
|
}
|
||||||
|
@ -28,8 +28,10 @@ static inline void reduce_logical_and(const char* arg,
|
|||||||
for (const Coordinate& input_coord : input_transform) {
|
for (const Coordinate& input_coord : input_transform) {
|
||||||
const Coordinate output_coord = reduce(input_coord, reduction_axes, dont_keep_dims_in_output);
|
const Coordinate output_coord = reduce(input_coord, reduction_axes, dont_keep_dims_in_output);
|
||||||
|
|
||||||
const size_t in_idx = std::inner_product(input_coord.begin(), input_coord.end(), in_strides.begin(), 0);
|
const size_t in_idx =
|
||||||
const size_t out_idx = std::inner_product(output_coord.begin(), output_coord.end(), out_strides.begin(), 0);
|
std::inner_product(input_coord.begin(), input_coord.end(), in_strides.begin(), uint64_t(0));
|
||||||
|
const size_t out_idx =
|
||||||
|
std::inner_product(output_coord.begin(), output_coord.end(), out_strides.begin(), uint64_t(0));
|
||||||
|
|
||||||
out[out_idx] = out[out_idx] && arg[in_idx];
|
out[out_idx] = out[out_idx] && arg[in_idx];
|
||||||
}
|
}
|
||||||
@ -46,8 +48,10 @@ static inline void reduce_logical_or(const char* arg, char* out, const Shape& in
|
|||||||
for (const Coordinate& input_coord : input_transform) {
|
for (const Coordinate& input_coord : input_transform) {
|
||||||
const Coordinate output_coord = reduce(input_coord, reduction_axes, false);
|
const Coordinate output_coord = reduce(input_coord, reduction_axes, false);
|
||||||
|
|
||||||
const size_t in_idx = std::inner_product(input_coord.begin(), input_coord.end(), in_strides.begin(), 0);
|
const size_t in_idx =
|
||||||
const size_t out_idx = std::inner_product(output_coord.begin(), output_coord.end(), out_strides.begin(), 0);
|
std::inner_product(input_coord.begin(), input_coord.end(), in_strides.begin(), uint64_t(0));
|
||||||
|
const size_t out_idx =
|
||||||
|
std::inner_product(output_coord.begin(), output_coord.end(), out_strides.begin(), uint64_t(0));
|
||||||
|
|
||||||
out[out_idx] = out[out_idx] || arg[in_idx];
|
out[out_idx] = out[out_idx] || arg[in_idx];
|
||||||
}
|
}
|
||||||
|
@ -33,7 +33,7 @@ static std::vector<size_t> slice_indices(const Shape& full_shape,
|
|||||||
indices.reserve(slice_size);
|
indices.reserve(slice_size);
|
||||||
indices.push_back(point_to_flat_idx(full_shape, coord));
|
indices.push_back(point_to_flat_idx(full_shape, coord));
|
||||||
for (size_t i = 0; i < slice_size - 1; i++) {
|
for (size_t i = 0; i < slice_size - 1; i++) {
|
||||||
for (int r = rank - 1; r >= 0; r--) {
|
for (int r = static_cast<int>(rank) - 1; r >= 0; r--) {
|
||||||
coord[r]++;
|
coord[r]++;
|
||||||
if (coord[r] < (begin[r] + slice_shape[r]))
|
if (coord[r] < (begin[r] + slice_shape[r]))
|
||||||
break;
|
break;
|
||||||
@ -66,7 +66,7 @@ void lrn(const T* arg,
|
|||||||
T alpha = static_cast<T>(dalpha);
|
T alpha = static_cast<T>(dalpha);
|
||||||
T beta = static_cast<T>(dbeta);
|
T beta = static_cast<T>(dbeta);
|
||||||
T bias = static_cast<T>(dbias);
|
T bias = static_cast<T>(dbias);
|
||||||
T scale = alpha / std::pow(size, axes.size());
|
T scale = alpha / static_cast<T>(std::pow(size, axes.size()));
|
||||||
|
|
||||||
std::vector<size_t> begin_area(arg_shape.size());
|
std::vector<size_t> begin_area(arg_shape.size());
|
||||||
Shape area_shape(arg_shape.size(), 1);
|
Shape area_shape(arg_shape.size(), 1);
|
||||||
@ -80,8 +80,10 @@ void lrn(const T* arg,
|
|||||||
// area determined by in_coord local neighborhood
|
// area determined by in_coord local neighborhood
|
||||||
for (size_t i = 0; i < axes_map.size(); i++) {
|
for (size_t i = 0; i < axes_map.size(); i++) {
|
||||||
if (axes_map[i]) {
|
if (axes_map[i]) {
|
||||||
begin_area[i] = std::max<int>(0, in_coord.at(i) - (size - 1) / 2);
|
begin_area[i] = std::max<int>(0, static_cast<int>(in_coord.at(i)) - (static_cast<int>(size) - 1) / 2);
|
||||||
area_shape[i] = std::min<int>(arg_shape.at(i), in_coord.at(i) + (size - 1) / 2 + 1) - begin_area[i];
|
area_shape[i] = std::min<int>(static_cast<int>(arg_shape.at(i)),
|
||||||
|
static_cast<int>(in_coord.at(i)) + (static_cast<int>(size) - 1) / 2 + 1) -
|
||||||
|
begin_area[i];
|
||||||
} else {
|
} else {
|
||||||
begin_area[i] = in_coord.at(i);
|
begin_area[i] = in_coord.at(i);
|
||||||
}
|
}
|
||||||
@ -90,7 +92,7 @@ void lrn(const T* arg,
|
|||||||
T square_sum = sum_region_across_axes(arg, slice_indices(arg_shape, begin_area, area_shape));
|
T square_sum = sum_region_across_axes(arg, slice_indices(arg_shape, begin_area, area_shape));
|
||||||
auto index = input_transform.index(in_coord);
|
auto index = input_transform.index(in_coord);
|
||||||
T x = arg[index];
|
T x = arg[index];
|
||||||
out[index] = x / (std::pow(bias + scale * square_sum, beta));
|
out[index] = x / static_cast<T>(std::pow(bias + scale * square_sum, beta));
|
||||||
}
|
}
|
||||||
NGRAPH_SUPPRESS_DEPRECATED_END
|
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||||
}
|
}
|
||||||
|
@ -302,7 +302,7 @@ void lstm_cell_v1(const T* X,
|
|||||||
std::vector<T> XHBPi(gate_shape_size);
|
std::vector<T> XHBPi(gate_shape_size);
|
||||||
if (input_forget) {
|
if (input_forget) {
|
||||||
// it = (1 - ft)
|
// it = (1 - ft)
|
||||||
std::vector<T> ones(gate_shape_size, 1.f);
|
std::vector<T> ones(gate_shape_size, T(1));
|
||||||
reference::subtract(ones.data(),
|
reference::subtract(ones.data(),
|
||||||
XHBPf.data(),
|
XHBPf.data(),
|
||||||
XHBPi.data(),
|
XHBPi.data(),
|
||||||
|
@ -29,8 +29,10 @@ void max(const T* arg, T* out, const Shape& in_shape, const AxisSet& reduction_a
|
|||||||
for (const Coordinate& input_coord : input_transform) {
|
for (const Coordinate& input_coord : input_transform) {
|
||||||
const Coordinate output_coord = reduce(input_coord, reduction_axes, dont_keep_dims_in_output);
|
const Coordinate output_coord = reduce(input_coord, reduction_axes, dont_keep_dims_in_output);
|
||||||
|
|
||||||
const size_t in_idx = std::inner_product(input_coord.begin(), input_coord.end(), in_strides.begin(), 0);
|
const size_t in_idx =
|
||||||
const size_t out_idx = std::inner_product(output_coord.begin(), output_coord.end(), out_strides.begin(), 0);
|
std::inner_product(input_coord.begin(), input_coord.end(), in_strides.begin(), uint64_t(0));
|
||||||
|
const size_t out_idx =
|
||||||
|
std::inner_product(output_coord.begin(), output_coord.end(), out_strides.begin(), uint64_t(0));
|
||||||
|
|
||||||
const T x = arg[in_idx];
|
const T x = arg[in_idx];
|
||||||
const T max = out[out_idx];
|
const T max = out[out_idx];
|
||||||
|
@ -147,7 +147,7 @@ struct Coord : public std::vector<T> {
|
|||||||
Coord(std::initializer_list<T>&& values) : std::vector<T>{std::move(values)} {}
|
Coord(std::initializer_list<T>&& values) : std::vector<T>{std::move(values)} {}
|
||||||
};
|
};
|
||||||
|
|
||||||
inline bool elem_in_padding_area(const Coord<int>& kernel_position,
|
inline bool elem_in_padding_area(const Coord<size_t>& kernel_position,
|
||||||
const Coord<size_t>& kernel_offset,
|
const Coord<size_t>& kernel_offset,
|
||||||
const Shape& data_shape) {
|
const Shape& data_shape) {
|
||||||
for (size_t dim = 0; dim + 2 < data_shape.size(); ++dim) {
|
for (size_t dim = 0; dim + 2 < data_shape.size(); ++dim) {
|
||||||
@ -160,10 +160,10 @@ inline bool elem_in_padding_area(const Coord<int>& kernel_position,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline Coord<int> calculate_kernel_position(const Coord<size_t>& out_elem_coord,
|
inline Coord<size_t> calculate_kernel_position(const Coord<size_t>& out_elem_coord,
|
||||||
const Strides& kernel_strides,
|
const Strides& kernel_strides,
|
||||||
const Shape& pads_begin) {
|
const Shape& pads_begin) {
|
||||||
Coord<int> top_left_corner;
|
Coord<size_t> top_left_corner;
|
||||||
top_left_corner.reserve(out_elem_coord.size());
|
top_left_corner.reserve(out_elem_coord.size());
|
||||||
for (size_t i = 0u; i < out_elem_coord.size(); ++i) {
|
for (size_t i = 0u; i < out_elem_coord.size(); ++i) {
|
||||||
top_left_corner.emplace_back(out_elem_coord[i] * kernel_strides[i] - pads_begin[i]);
|
top_left_corner.emplace_back(out_elem_coord[i] * kernel_strides[i] - pads_begin[i]);
|
||||||
@ -184,7 +184,7 @@ void max_pool_1d(const Values_t* data,
|
|||||||
const size_t pads_begin,
|
const size_t pads_begin,
|
||||||
const size_t pads_end,
|
const size_t pads_end,
|
||||||
const size_t indices_offset) {
|
const size_t indices_offset) {
|
||||||
int kernel_position = 0 - pads_begin;
|
int kernel_position = 0 - static_cast<int>(pads_begin);
|
||||||
// select max elem and its index for each "placeholder" in the out buffer (pointed to by out_idx)
|
// select max elem and its index for each "placeholder" in the out buffer (pointed to by out_idx)
|
||||||
for (size_t out_idx = 0; out_idx < out_elems; ++out_idx) {
|
for (size_t out_idx = 0; out_idx < out_elems; ++out_idx) {
|
||||||
Values_t max_elem = std::numeric_limits<Values_t>::lowest();
|
Values_t max_elem = std::numeric_limits<Values_t>::lowest();
|
||||||
@ -195,12 +195,12 @@ void max_pool_1d(const Values_t* data,
|
|||||||
if (kernel_position + kernel_elem_offset >= 0 && kernel_position + kernel_elem_offset < data_elems &&
|
if (kernel_position + kernel_elem_offset >= 0 && kernel_position + kernel_elem_offset < data_elems &&
|
||||||
data[kernel_position + kernel_elem_offset] > max_elem) {
|
data[kernel_position + kernel_elem_offset] > max_elem) {
|
||||||
max_elem = data[kernel_position + kernel_elem_offset];
|
max_elem = data[kernel_position + kernel_elem_offset];
|
||||||
max_elem_idx = kernel_position + kernel_elem_offset;
|
max_elem_idx = static_cast<Indices_t>(kernel_position + kernel_elem_offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
values[out_idx] = max_elem;
|
values[out_idx] = max_elem;
|
||||||
indices[out_idx] = max_elem_idx + indices_offset;
|
indices[out_idx] = static_cast<Indices_t>(max_elem_idx + indices_offset);
|
||||||
kernel_position += kernel_stride;
|
kernel_position += static_cast<int>(kernel_stride);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -246,14 +246,14 @@ void max_pool_2d(const Values_t* data,
|
|||||||
|
|
||||||
if (data[data_elem_index] > max_elem) {
|
if (data[data_elem_index] > max_elem) {
|
||||||
max_elem = data[data_elem_index];
|
max_elem = data[data_elem_index];
|
||||||
max_elem_idx = data_elem_index;
|
max_elem_idx = static_cast<Indices_t>(data_elem_index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
values[out_idx] = max_elem;
|
values[out_idx] = max_elem;
|
||||||
indices[out_idx] = max_elem_idx + indices_offset;
|
indices[out_idx] = static_cast<Indices_t>(max_elem_idx + indices_offset);
|
||||||
++out_idx;
|
++out_idx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -307,14 +307,14 @@ void max_pool_3d(const Values_t* data,
|
|||||||
|
|
||||||
if (data[data_elem_index] > max_elem) {
|
if (data[data_elem_index] > max_elem) {
|
||||||
max_elem = data[data_elem_index];
|
max_elem = data[data_elem_index];
|
||||||
max_elem_idx = data_elem_index;
|
max_elem_idx = static_cast<Indices_t>(data_elem_index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
values[out_idx] = max_elem;
|
values[out_idx] = max_elem;
|
||||||
indices[out_idx] = max_elem_idx + indices_offset;
|
indices[out_idx] = static_cast<Indices_t>(max_elem_idx + indices_offset);
|
||||||
++out_idx;
|
++out_idx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -342,7 +342,7 @@ void max_pool(const Values_t* data,
|
|||||||
const auto out_channel_elems = shape_size(std::begin(out_shape) + 2, std::end(out_shape));
|
const auto out_channel_elems = shape_size(std::begin(out_shape) + 2, std::end(out_shape));
|
||||||
|
|
||||||
for (size_t b = 0; b < data_shape[0]; ++b) {
|
for (size_t b = 0; b < data_shape[0]; ++b) {
|
||||||
const Indices_t batch_indices_offset = b * data_batch_elems;
|
const Indices_t batch_indices_offset = static_cast<Indices_t>(b * data_batch_elems);
|
||||||
|
|
||||||
for (size_t c = 0; c < data_shape[1]; ++c) {
|
for (size_t c = 0; c < data_shape[1]; ++c) {
|
||||||
// calculate the buffer offsets for a given channel "c" then execute an appropriate
|
// calculate the buffer offsets for a given channel "c" then execute an appropriate
|
||||||
@ -350,7 +350,7 @@ void max_pool(const Values_t* data,
|
|||||||
const Values_t* data_channel_first_elem = data + b * data_batch_elems + c * data_channel_elems;
|
const Values_t* data_channel_first_elem = data + b * data_batch_elems + c * data_channel_elems;
|
||||||
Values_t* out_channel_first_elem = values + b * out_batch_elems + c * out_channel_elems;
|
Values_t* out_channel_first_elem = values + b * out_batch_elems + c * out_channel_elems;
|
||||||
Indices_t* indices_channel_first_elem = indices + b * out_batch_elems + c * out_channel_elems;
|
Indices_t* indices_channel_first_elem = indices + b * out_batch_elems + c * out_channel_elems;
|
||||||
const Indices_t channel_indices_offset = c * data_channel_elems;
|
const Indices_t channel_indices_offset = static_cast<Indices_t>(c * data_channel_elems);
|
||||||
// total offset of the flattened tensor indices for currently processed batch and channel
|
// total offset of the flattened tensor indices for currently processed batch and channel
|
||||||
const Indices_t indices_offset = batch_indices_offset + channel_indices_offset;
|
const Indices_t indices_offset = batch_indices_offset + channel_indices_offset;
|
||||||
|
|
||||||
@ -401,7 +401,8 @@ void max_pool(const Values_t* data,
|
|||||||
|
|
||||||
// adjust the calculated indices to the requested range (specified by the axis attribute) if needed
|
// adjust the calculated indices to the requested range (specified by the axis attribute) if needed
|
||||||
if (axis != 0) {
|
if (axis != 0) {
|
||||||
const Indices_t max_index = shape_size(std::begin(data_shape) + axis, std::end(data_shape));
|
const Indices_t max_index =
|
||||||
|
static_cast<Indices_t>(shape_size(std::begin(data_shape) + axis, std::end(data_shape)));
|
||||||
|
|
||||||
const auto indices_number = shape_size(out_shape);
|
const auto indices_number = shape_size(out_shape);
|
||||||
for (size_t i = 0; i < indices_number; ++i) {
|
for (size_t i = 0; i < indices_number; ++i) {
|
||||||
|
@ -23,7 +23,7 @@ void mean(const T* arg, T* out, const Shape& in_shape, const AxisSet& reduction_
|
|||||||
constexpr bool dont_keep_dims_in_output = false;
|
constexpr bool dont_keep_dims_in_output = false;
|
||||||
const auto out_shape = reduce(in_shape, reduction_axes, dont_keep_dims_in_output);
|
const auto out_shape = reduce(in_shape, reduction_axes, dont_keep_dims_in_output);
|
||||||
std::vector<T> cs(shape_size(out_shape), 0);
|
std::vector<T> cs(shape_size(out_shape), 0);
|
||||||
std::fill(out, out + shape_size(out_shape), 0);
|
std::fill(out, out + shape_size(out_shape), T(0));
|
||||||
|
|
||||||
const auto in_strides = row_major_strides(in_shape);
|
const auto in_strides = row_major_strides(in_shape);
|
||||||
const auto out_strides = row_major_strides(out_shape);
|
const auto out_strides = row_major_strides(out_shape);
|
||||||
@ -34,8 +34,10 @@ void mean(const T* arg, T* out, const Shape& in_shape, const AxisSet& reduction_
|
|||||||
for (const Coordinate& input_coord : input_transform) {
|
for (const Coordinate& input_coord : input_transform) {
|
||||||
const Coordinate output_coord = reduce(input_coord, reduction_axes, dont_keep_dims_in_output);
|
const Coordinate output_coord = reduce(input_coord, reduction_axes, dont_keep_dims_in_output);
|
||||||
|
|
||||||
const size_t in_idx = std::inner_product(input_coord.begin(), input_coord.end(), in_strides.begin(), 0);
|
const size_t in_idx =
|
||||||
const size_t out_idx = std::inner_product(output_coord.begin(), output_coord.end(), out_strides.begin(), 0);
|
std::inner_product(input_coord.begin(), input_coord.end(), in_strides.begin(), uint64_t(0));
|
||||||
|
const size_t out_idx =
|
||||||
|
std::inner_product(output_coord.begin(), output_coord.end(), out_strides.begin(), uint64_t(0));
|
||||||
|
|
||||||
details::kahan_summation(arg[in_idx], cs[out_idx], out[out_idx]);
|
details::kahan_summation(arg[in_idx], cs[out_idx], out[out_idx]);
|
||||||
|
|
||||||
|
@ -34,8 +34,10 @@ void min(const T* arg, T* out, const Shape& in_shape, const AxisSet& reduction_a
|
|||||||
for (const Coordinate& input_coord : input_transform) {
|
for (const Coordinate& input_coord : input_transform) {
|
||||||
const Coordinate output_coord = reduce(input_coord, reduction_axes, dont_keep_dims_in_output);
|
const Coordinate output_coord = reduce(input_coord, reduction_axes, dont_keep_dims_in_output);
|
||||||
|
|
||||||
const size_t in_idx = std::inner_product(input_coord.begin(), input_coord.end(), in_strides.begin(), 0);
|
const size_t in_idx =
|
||||||
const size_t out_idx = std::inner_product(output_coord.begin(), output_coord.end(), out_strides.begin(), 0);
|
std::inner_product(input_coord.begin(), input_coord.end(), in_strides.begin(), uint64_t(0));
|
||||||
|
const size_t out_idx =
|
||||||
|
std::inner_product(output_coord.begin(), output_coord.end(), out_strides.begin(), uint64_t(0));
|
||||||
|
|
||||||
const T x = arg[in_idx];
|
const T x = arg[in_idx];
|
||||||
const T min = out[out_idx];
|
const T min = out[out_idx];
|
||||||
|
@ -13,7 +13,7 @@ namespace reference {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void mish(const T* arg, T* out, size_t count) {
|
void mish(const T* arg, T* out, size_t count) {
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
out[i] = arg[i] * std::tanh(std::log((std::exp(arg[i]) + 1.0)));
|
out[i] = static_cast<T>(arg[i] * std::tanh(std::log((std::exp(arg[i]) + 1.0))));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -20,7 +20,7 @@ void mod(const T* arg0,
|
|||||||
const Shape& arg_shape1,
|
const Shape& arg_shape1,
|
||||||
const op::AutoBroadcastSpec& broadcast_spec) {
|
const op::AutoBroadcastSpec& broadcast_spec) {
|
||||||
autobroadcast_binop(arg0, arg1, out, arg_shape0, arg_shape1, broadcast_spec, [](T x, T y) -> T {
|
autobroadcast_binop(arg0, arg1, out, arg_shape0, arg_shape1, broadcast_spec, [](T x, T y) -> T {
|
||||||
return T(x - std::truncf(x / y) * y);
|
return static_cast<T>(x - std::truncf(static_cast<float>(x / y)) * y);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -36,7 +36,7 @@ void mvn(const T* arg,
|
|||||||
mean(tmp_buffer.data(), mean_value.data(), in_shape, reduction_axes);
|
mean(tmp_buffer.data(), mean_value.data(), in_shape, reduction_axes);
|
||||||
|
|
||||||
add(mean_value.data(),
|
add(mean_value.data(),
|
||||||
std::vector<T>(shape_size(reduced_shape), eps).data(),
|
std::vector<T>(shape_size(reduced_shape), static_cast<T>(eps)).data(),
|
||||||
tmp_buffer.data(),
|
tmp_buffer.data(),
|
||||||
reduced_shape,
|
reduced_shape,
|
||||||
reduced_shape,
|
reduced_shape,
|
||||||
@ -67,7 +67,7 @@ void mvn_6(const T* arg,
|
|||||||
|
|
||||||
if (eps_mode == op::MVNEpsMode::INSIDE_SQRT) {
|
if (eps_mode == op::MVNEpsMode::INSIDE_SQRT) {
|
||||||
add(mean_value.data(),
|
add(mean_value.data(),
|
||||||
std::vector<T>(shape_size(reduced_shape), eps).data(),
|
std::vector<T>(shape_size(reduced_shape), static_cast<T>(eps)).data(),
|
||||||
tmp_buffer.data(),
|
tmp_buffer.data(),
|
||||||
reduced_shape,
|
reduced_shape,
|
||||||
reduced_shape,
|
reduced_shape,
|
||||||
@ -76,7 +76,7 @@ void mvn_6(const T* arg,
|
|||||||
} else {
|
} else {
|
||||||
sqrt(mean_value.data(), tmp_buffer.data(), shape_size(reduced_shape));
|
sqrt(mean_value.data(), tmp_buffer.data(), shape_size(reduced_shape));
|
||||||
add(tmp_buffer.data(),
|
add(tmp_buffer.data(),
|
||||||
std::vector<T>(shape_size(reduced_shape), eps).data(),
|
std::vector<T>(shape_size(reduced_shape), static_cast<T>(eps)).data(),
|
||||||
tmp_buffer.data(),
|
tmp_buffer.data(),
|
||||||
reduced_shape,
|
reduced_shape,
|
||||||
reduced_shape,
|
reduced_shape,
|
||||||
|
@ -23,7 +23,7 @@ void normalize_l2(const T* data,
|
|||||||
// When axes is an empty list, then each `data` element is divided by itself
|
// When axes is an empty list, then each `data` element is divided by itself
|
||||||
// resulting value 1 for all non-zero elements
|
// resulting value 1 for all non-zero elements
|
||||||
for (size_t i = 0; i < shape_size(data_shape); ++i) {
|
for (size_t i = 0; i < shape_size(data_shape); ++i) {
|
||||||
out[i] = data[i] == 0 ? 0 : 1;
|
out[i] = data[i] == 0 ? T(0) : T(1);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -47,8 +47,9 @@ void normalize_l2(const T* data,
|
|||||||
reduce_shape,
|
reduce_shape,
|
||||||
op::AutoBroadcastSpec(op::AutoBroadcastType::NUMPY),
|
op::AutoBroadcastSpec(op::AutoBroadcastType::NUMPY),
|
||||||
[&eps, &eps_mode](T x, T y) -> T {
|
[&eps, &eps_mode](T x, T y) -> T {
|
||||||
T arg = (eps_mode == op::EpsMode::ADD) ? y + eps : std::max(y, static_cast<T>(eps));
|
T arg = (eps_mode == op::EpsMode::ADD) ? y + static_cast<T>(eps)
|
||||||
return x / std::sqrt(arg);
|
: std::max(y, static_cast<T>(eps));
|
||||||
|
return x / static_cast<T>(std::sqrt(arg));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -25,8 +25,8 @@ void not_equal(const T* arg0,
|
|||||||
const Shape& arg0_shape,
|
const Shape& arg0_shape,
|
||||||
const Shape& arg1_shape,
|
const Shape& arg1_shape,
|
||||||
const op::AutoBroadcastSpec& broadcast_spec) {
|
const op::AutoBroadcastSpec& broadcast_spec) {
|
||||||
autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
|
autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> U {
|
||||||
return x != y;
|
return static_cast<U>(x != y);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -17,7 +17,7 @@ namespace reference {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void power(const T* arg0, const T* arg1, T* out, size_t count) {
|
void power(const T* arg0, const T* arg1, T* out, size_t count) {
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++) {
|
||||||
out[i] = std::pow(arg0[i], arg1[i]);
|
out[i] = static_cast<T>(std::pow(arg0[i], arg1[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -29,7 +29,7 @@ void power(const T* arg0,
|
|||||||
const Shape& arg1_shape,
|
const Shape& arg1_shape,
|
||||||
const op::AutoBroadcastSpec& broadcast_spec) {
|
const op::AutoBroadcastSpec& broadcast_spec) {
|
||||||
autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
|
autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
|
||||||
return std::pow(x, y);
|
return static_cast<T>(std::pow(x, y));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
@ -17,7 +17,7 @@ template <typename T>
|
|||||||
void product(const T* arg, T* out, const Shape& in_shape, const AxisSet& reduction_axes) {
|
void product(const T* arg, T* out, const Shape& in_shape, const AxisSet& reduction_axes) {
|
||||||
constexpr bool dont_keep_dims_in_output = false;
|
constexpr bool dont_keep_dims_in_output = false;
|
||||||
const auto out_shape = reduce(in_shape, reduction_axes, dont_keep_dims_in_output);
|
const auto out_shape = reduce(in_shape, reduction_axes, dont_keep_dims_in_output);
|
||||||
std::fill(out, out + shape_size(out_shape), 1);
|
std::fill(out, out + shape_size(out_shape), T(1));
|
||||||
|
|
||||||
const auto in_strides = row_major_strides(in_shape);
|
const auto in_strides = row_major_strides(in_shape);
|
||||||
const auto out_strides = row_major_strides(out_shape);
|
const auto out_strides = row_major_strides(out_shape);
|
||||||
@ -26,8 +26,10 @@ void product(const T* arg, T* out, const Shape& in_shape, const AxisSet& reducti
|
|||||||
for (const Coordinate& input_coord : input_transform) {
|
for (const Coordinate& input_coord : input_transform) {
|
||||||
const Coordinate output_coord = reduce(input_coord, reduction_axes, dont_keep_dims_in_output);
|
const Coordinate output_coord = reduce(input_coord, reduction_axes, dont_keep_dims_in_output);
|
||||||
|
|
||||||
const size_t in_idx = std::inner_product(input_coord.begin(), input_coord.end(), in_strides.begin(), 0);
|
const size_t in_idx =
|
||||||
const size_t out_idx = std::inner_product(output_coord.begin(), output_coord.end(), out_strides.begin(), 0);
|
std::inner_product(input_coord.begin(), input_coord.end(), in_strides.begin(), uint64_t(0));
|
||||||
|
const size_t out_idx =
|
||||||
|
std::inner_product(output_coord.begin(), output_coord.end(), out_strides.begin(), uint64_t(0));
|
||||||
|
|
||||||
out[out_idx] = out[out_idx] * arg[in_idx];
|
out[out_idx] = out[out_idx] * arg[in_idx];
|
||||||
}
|
}
|
||||||
|
@ -146,15 +146,15 @@ static void enumerate_proposals(const T* bottom4d,
|
|||||||
const T pred_ctr_x = dx * static_cast<T>(ww) + static_cast<T>(ctr_x);
|
const T pred_ctr_x = dx * static_cast<T>(ww) + static_cast<T>(ctr_x);
|
||||||
const T pred_ctr_y = dy * static_cast<T>(hh) + static_cast<T>(ctr_y);
|
const T pred_ctr_y = dy * static_cast<T>(hh) + static_cast<T>(ctr_y);
|
||||||
// new width & height according to gradient d(log w), d(log h)
|
// new width & height according to gradient d(log w), d(log h)
|
||||||
const T pred_w = std::exp(d_log_w) * static_cast<T>(ww);
|
const T pred_w = static_cast<T>(std::exp(d_log_w) * ww);
|
||||||
const T pred_h = std::exp(d_log_h) * static_cast<T>(hh);
|
const T pred_h = static_cast<T>(std::exp(d_log_h) * hh);
|
||||||
|
|
||||||
// update upper-left corner location
|
// update upper-left corner location
|
||||||
x0 = pred_ctr_x - 0.5f * pred_w;
|
x0 = static_cast<float>(pred_ctr_x - 0.5f * pred_w);
|
||||||
y0 = pred_ctr_y - 0.5f * pred_h;
|
y0 = static_cast<float>(pred_ctr_y - 0.5f * pred_h);
|
||||||
// update lower-right corner location
|
// update lower-right corner location
|
||||||
x1 = pred_ctr_x + 0.5f * pred_w;
|
x1 = static_cast<float>(pred_ctr_x + 0.5f * pred_w);
|
||||||
y1 = pred_ctr_y + 0.5f * pred_h;
|
y1 = static_cast<float>(pred_ctr_y + 0.5f * pred_h);
|
||||||
|
|
||||||
// adjust new corner locations to be within the image region,
|
// adjust new corner locations to be within the image region,
|
||||||
if (clip_before_nms) {
|
if (clip_before_nms) {
|
||||||
@ -230,8 +230,8 @@ static void nms(const int num_boxes,
|
|||||||
const T x1 = std::min(x1i, x1j);
|
const T x1 = std::min(x1i, x1j);
|
||||||
const T y1 = std::min(y1i, y1j);
|
const T y1 = std::min(y1i, y1j);
|
||||||
// intersection area
|
// intersection area
|
||||||
const T width = std::max<T>(0.0f, x1 - x0 + coordinates_offset);
|
const T width = std::max<T>(static_cast<T>(0), x1 - x0 + coordinates_offset);
|
||||||
const T height = std::max<T>(0.0f, y1 - y0 + coordinates_offset);
|
const T height = std::max<T>(static_cast<T>(0), y1 - y0 + coordinates_offset);
|
||||||
const T area = width * height;
|
const T area = width * height;
|
||||||
// area of A, B
|
// area of A, B
|
||||||
const T A_area = (x1i - x0i + coordinates_offset) * (y1i - y0i + coordinates_offset);
|
const T A_area = (x1i - x0i + coordinates_offset) * (y1i - y0i + coordinates_offset);
|
||||||
@ -267,10 +267,10 @@ static void retrieve_rois(const int num_rois,
|
|||||||
T y1 = proposals[index].y1;
|
T y1 = proposals[index].y1;
|
||||||
|
|
||||||
if (clip_after_nms) {
|
if (clip_after_nms) {
|
||||||
x0 = std::max<T>(0.0f, std::min(x0, static_cast<T>(img_w)));
|
x0 = std::max<T>(T(0), std::min(x0, static_cast<T>(img_w)));
|
||||||
y0 = std::max<T>(0.0f, std::min(y0, static_cast<T>(img_h)));
|
y0 = std::max<T>(T(0), std::min(y0, static_cast<T>(img_h)));
|
||||||
x1 = std::max<T>(0.0f, std::min(x1, static_cast<T>(img_w)));
|
x1 = std::max<T>(T(0), std::min(x1, static_cast<T>(img_w)));
|
||||||
y1 = std::max<T>(0.0f, std::min(y1, static_cast<T>(img_h)));
|
y1 = std::max<T>(T(0), std::min(y1, static_cast<T>(img_h)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (normalize) {
|
if (normalize) {
|
||||||
@ -326,8 +326,8 @@ static void proposal_exec(const T* class_probs,
|
|||||||
T* p_prob_item = attrs.infer_probs ? out_probs : nullptr;
|
T* p_prob_item = attrs.infer_probs ? out_probs : nullptr;
|
||||||
|
|
||||||
// bottom shape (batch_size * (2 * num_anchors) * H * W)
|
// bottom shape (batch_size * (2 * num_anchors) * H * W)
|
||||||
const unsigned int bottom_H = class_probs_shape[2];
|
const unsigned int bottom_H = static_cast<unsigned int>(class_probs_shape[2]);
|
||||||
const unsigned int bottom_W = class_probs_shape[3];
|
const unsigned int bottom_W = static_cast<unsigned int>(class_probs_shape[3]);
|
||||||
// input image height and width
|
// input image height and width
|
||||||
const T img_H = image_shape[0];
|
const T img_H = image_shape[0];
|
||||||
const T img_W = image_shape[1];
|
const T img_W = image_shape[1];
|
||||||
@ -337,21 +337,21 @@ static void proposal_exec(const T* class_probs,
|
|||||||
// or be the same for both {image_height, image_width, scale_height_and_width}
|
// or be the same for both {image_height, image_width, scale_height_and_width}
|
||||||
const T scale_H = image_shape[2];
|
const T scale_H = image_shape[2];
|
||||||
const T scale_W = (image_shape_shape.size() < 4 ? scale_H : image_shape[3]);
|
const T scale_W = (image_shape_shape.size() < 4 ? scale_H : image_shape[3]);
|
||||||
const T min_box_H = attrs.min_size * scale_H;
|
const T min_box_H = static_cast<T>(attrs.min_size * scale_H);
|
||||||
const T min_box_W = attrs.min_size * scale_W;
|
const T min_box_W = static_cast<T>(attrs.min_size * scale_W);
|
||||||
// get number of proposals
|
// get number of proposals
|
||||||
// class_probs shape is {batch_size, anchor_count*2, bottom_H, bottom_W}
|
// class_probs shape is {batch_size, anchor_count*2, bottom_H, bottom_W}
|
||||||
const unsigned int anchor_count = class_probs_shape[1] / 2;
|
const unsigned int anchor_count = static_cast<unsigned int>(class_probs_shape[1] / 2);
|
||||||
const unsigned int num_proposals = anchor_count * bottom_H * bottom_W;
|
const unsigned int num_proposals = anchor_count * bottom_H * bottom_W;
|
||||||
// final RoI count
|
// final RoI count
|
||||||
int num_rois = 0;
|
int num_rois = 0;
|
||||||
std::vector<ProposalBox<T>> proposals(num_proposals);
|
std::vector<ProposalBox<T>> proposals(num_proposals);
|
||||||
const int pre_nms_topn = num_proposals < attrs.pre_nms_topn ? num_proposals : attrs.pre_nms_topn;
|
const int pre_nms_topn = static_cast<int>(num_proposals < attrs.pre_nms_topn ? num_proposals : attrs.pre_nms_topn);
|
||||||
std::vector<unsigned int> roi_indices(attrs.post_nms_topn);
|
std::vector<unsigned int> roi_indices(attrs.post_nms_topn);
|
||||||
|
|
||||||
std::vector<float> anchors = generate_anchors(attrs, anchor_count);
|
std::vector<float> anchors = generate_anchors(attrs, anchor_count);
|
||||||
|
|
||||||
unsigned int batch_num = class_probs_shape[0];
|
unsigned int batch_num = static_cast<unsigned int>(class_probs_shape[0]);
|
||||||
float coordinates_offset = attrs.framework == "tensorflow" ? 0.0f : 1.0f;
|
float coordinates_offset = attrs.framework == "tensorflow" ? 0.0f : 1.0f;
|
||||||
bool initial_clip = attrs.framework == "tensorflow";
|
bool initial_clip = attrs.framework == "tensorflow";
|
||||||
bool swap_xy = attrs.framework == "tensorflow";
|
bool swap_xy = attrs.framework == "tensorflow";
|
||||||
@ -367,11 +367,11 @@ static void proposal_exec(const T* class_probs,
|
|||||||
anchor_count,
|
anchor_count,
|
||||||
bottom_H,
|
bottom_H,
|
||||||
bottom_W,
|
bottom_W,
|
||||||
img_H,
|
static_cast<float>(img_H),
|
||||||
img_W,
|
static_cast<float>(img_W),
|
||||||
min_box_H,
|
static_cast<float>(min_box_H),
|
||||||
min_box_W,
|
static_cast<float>(min_box_W),
|
||||||
attrs.feat_stride,
|
static_cast<int>(attrs.feat_stride),
|
||||||
attrs.box_coordinate_scale,
|
attrs.box_coordinate_scale,
|
||||||
attrs.box_size_scale,
|
attrs.box_size_scale,
|
||||||
coordinates_offset,
|
coordinates_offset,
|
||||||
@ -391,20 +391,20 @@ static void proposal_exec(const T* class_probs,
|
|||||||
num_rois,
|
num_rois,
|
||||||
0,
|
0,
|
||||||
attrs.nms_thresh,
|
attrs.nms_thresh,
|
||||||
attrs.post_nms_topn,
|
static_cast<int>(attrs.post_nms_topn),
|
||||||
static_cast<T>(coordinates_offset));
|
static_cast<T>(coordinates_offset));
|
||||||
|
|
||||||
T* p_probs = p_prob_item ? p_prob_item + batch_idx * attrs.post_nms_topn : nullptr;
|
T* p_probs = p_prob_item ? p_prob_item + batch_idx * attrs.post_nms_topn : nullptr;
|
||||||
retrieve_rois(num_rois,
|
retrieve_rois(num_rois,
|
||||||
batch_idx,
|
static_cast<int>(batch_idx),
|
||||||
pre_nms_topn,
|
pre_nms_topn,
|
||||||
proposals,
|
proposals,
|
||||||
roi_indices,
|
roi_indices,
|
||||||
p_roi_item + batch_idx * attrs.post_nms_topn * 5,
|
p_roi_item + batch_idx * attrs.post_nms_topn * 5,
|
||||||
attrs.post_nms_topn,
|
static_cast<int>(attrs.post_nms_topn),
|
||||||
attrs.normalize,
|
attrs.normalize,
|
||||||
img_H,
|
static_cast<float>(img_H),
|
||||||
img_W,
|
static_cast<float>(img_W),
|
||||||
attrs.clip_after_nms,
|
attrs.clip_after_nms,
|
||||||
p_probs);
|
p_probs);
|
||||||
}
|
}
|
||||||
|
@ -42,21 +42,21 @@ void psroi_pooling(const T* input,
|
|||||||
int num_spatial_bins = spatial_bins_x * spatial_bins_y;
|
int num_spatial_bins = spatial_bins_x * spatial_bins_y;
|
||||||
for (size_t roi = 0; roi < num_rois; roi++) {
|
for (size_t roi = 0; roi < num_rois; roi++) {
|
||||||
const T* box = rois + roi * 5;
|
const T* box = rois + roi * 5;
|
||||||
int batch_id = box[0];
|
int batch_id = static_cast<int>(box[0]);
|
||||||
float start_w = 0;
|
float start_w = 0;
|
||||||
float start_h = 0;
|
float start_h = 0;
|
||||||
float end_w = 0;
|
float end_w = 0;
|
||||||
float end_h = 0;
|
float end_h = 0;
|
||||||
if (mode == BILINEAR) {
|
if (mode == BILINEAR) {
|
||||||
start_w = box[1] * spatial_scale;
|
start_w = static_cast<float>(box[1]) * spatial_scale;
|
||||||
start_h = box[2] * spatial_scale;
|
start_h = static_cast<float>(box[2]) * spatial_scale;
|
||||||
end_w = box[3] * spatial_scale;
|
end_w = static_cast<float>(box[3]) * spatial_scale;
|
||||||
end_h = box[4] * spatial_scale;
|
end_h = static_cast<float>(box[4]) * spatial_scale;
|
||||||
} else if (mode == AVG) {
|
} else if (mode == AVG) {
|
||||||
start_w = std::roundf(box[1]) * spatial_scale;
|
start_w = std::roundf(static_cast<float>(box[1])) * spatial_scale;
|
||||||
start_h = std::roundf(box[2]) * spatial_scale;
|
start_h = std::roundf(static_cast<float>(box[2])) * spatial_scale;
|
||||||
end_w = (std::roundf(box[3]) + 1.0f) * spatial_scale;
|
end_w = (std::roundf(static_cast<float>(box[3])) + 1.0f) * spatial_scale;
|
||||||
end_h = (std::roundf(box[4]) + 1.0f) * spatial_scale;
|
end_h = (std::roundf(static_cast<float>(box[4])) + 1.0f) * spatial_scale;
|
||||||
}
|
}
|
||||||
float box_width = end_w - start_w;
|
float box_width = end_w - start_w;
|
||||||
float box_height = end_h - start_h;
|
float box_height = end_h - start_h;
|
||||||
@ -94,7 +94,7 @@ void psroi_pooling(const T* input,
|
|||||||
sum += input_offset[h * width + w];
|
sum += input_offset[h * width + w];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
output[index] = sum / (current_bin_width * current_bin_height);
|
output[index] = sum / static_cast<T>(current_bin_width * current_bin_height);
|
||||||
c_in++;
|
c_in++;
|
||||||
} else if (mode == BILINEAR) {
|
} else if (mode == BILINEAR) {
|
||||||
c_in = 0;
|
c_in = 0;
|
||||||
@ -112,18 +112,20 @@ void psroi_pooling(const T* input,
|
|||||||
? (ph * height_scale + bin_start_h * (height - 1))
|
? (ph * height_scale + bin_start_h * (height - 1))
|
||||||
: (bin_start_h + bin_start_h + bin_height) * (height - 1) / 2;
|
: (bin_start_h + bin_start_h + bin_height) * (height - 1) / 2;
|
||||||
if (point_x < width && point_y < height) {
|
if (point_x < width && point_y < height) {
|
||||||
size_t left = floorf(point_x);
|
size_t left = static_cast<size_t>(floorf(point_x));
|
||||||
size_t right = std::min(static_cast<size_t>(ceilf(point_x)), width - 1);
|
size_t right = std::min(static_cast<size_t>(ceilf(point_x)), width - 1);
|
||||||
size_t top = floorf(point_y);
|
size_t top = static_cast<size_t>(floorf(point_y));
|
||||||
size_t bottom = std::min(static_cast<size_t>(ceilf(point_y)), height - 1);
|
size_t bottom = std::min(static_cast<size_t>(ceilf(point_y)), height - 1);
|
||||||
T top_left = input_offset[top * width + left];
|
T top_left = input_offset[top * width + left];
|
||||||
T top_right = input_offset[top * width + right];
|
T top_right = input_offset[top * width + right];
|
||||||
T bottom_left = input_offset[bottom * width + left];
|
T bottom_left = input_offset[bottom * width + left];
|
||||||
T bottom_right = input_offset[bottom * width + right];
|
T bottom_right = input_offset[bottom * width + right];
|
||||||
|
|
||||||
T top_interp = top_left + (top_right - top_left) * (point_x - left);
|
T top_interp = top_left + (top_right - top_left) * static_cast<T>(point_x - left);
|
||||||
T bottom_interp = bottom_left + (bottom_right - bottom_left) * (point_x - left);
|
T bottom_interp =
|
||||||
output[index] += top_interp + (bottom_interp - top_interp) * (point_y - top);
|
bottom_left + (bottom_right - bottom_left) * static_cast<T>(point_x - left);
|
||||||
|
output[index] +=
|
||||||
|
top_interp + (bottom_interp - top_interp) * static_cast<T>(point_y - top);
|
||||||
}
|
}
|
||||||
c_in++;
|
c_in++;
|
||||||
}
|
}
|
||||||
|
@ -17,7 +17,7 @@ template <typename T>
|
|||||||
void reduce_l1(const T* arg, T* out, const Shape& in_shape, const AxisSet& reduction_axes) {
|
void reduce_l1(const T* arg, T* out, const Shape& in_shape, const AxisSet& reduction_axes) {
|
||||||
constexpr bool dont_keep_dims_in_output = false;
|
constexpr bool dont_keep_dims_in_output = false;
|
||||||
const auto out_shape = reduce(in_shape, reduction_axes, dont_keep_dims_in_output);
|
const auto out_shape = reduce(in_shape, reduction_axes, dont_keep_dims_in_output);
|
||||||
std::fill(out, out + shape_size(out_shape), 0);
|
std::fill(out, out + shape_size(out_shape), T(0));
|
||||||
|
|
||||||
const auto in_strides = row_major_strides(in_shape);
|
const auto in_strides = row_major_strides(in_shape);
|
||||||
const auto out_strides = row_major_strides(out_shape);
|
const auto out_strides = row_major_strides(out_shape);
|
||||||
@ -26,8 +26,10 @@ void reduce_l1(const T* arg, T* out, const Shape& in_shape, const AxisSet& reduc
|
|||||||
for (const Coordinate& input_coord : input_transform) {
|
for (const Coordinate& input_coord : input_transform) {
|
||||||
const Coordinate output_coord = reduce(input_coord, reduction_axes, dont_keep_dims_in_output);
|
const Coordinate output_coord = reduce(input_coord, reduction_axes, dont_keep_dims_in_output);
|
||||||
|
|
||||||
const size_t in_idx = std::inner_product(input_coord.begin(), input_coord.end(), in_strides.begin(), 0);
|
const size_t in_idx =
|
||||||
const size_t out_idx = std::inner_product(output_coord.begin(), output_coord.end(), out_strides.begin(), 0);
|
std::inner_product(input_coord.begin(), input_coord.end(), in_strides.begin(), uint64_t(0));
|
||||||
|
const size_t out_idx =
|
||||||
|
std::inner_product(output_coord.begin(), output_coord.end(), out_strides.begin(), uint64_t(0));
|
||||||
|
|
||||||
out[out_idx] = out[out_idx] + std::abs(arg[in_idx]);
|
out[out_idx] = out[out_idx] + std::abs(arg[in_idx]);
|
||||||
}
|
}
|
||||||
|
@ -17,7 +17,7 @@ template <typename T>
|
|||||||
void reduce_l2(const T* arg, T* out, const Shape& in_shape, const AxisSet& reduction_axes) {
|
void reduce_l2(const T* arg, T* out, const Shape& in_shape, const AxisSet& reduction_axes) {
|
||||||
constexpr bool dont_keep_dims_in_output = false;
|
constexpr bool dont_keep_dims_in_output = false;
|
||||||
const auto out_shape = reduce(in_shape, reduction_axes, dont_keep_dims_in_output);
|
const auto out_shape = reduce(in_shape, reduction_axes, dont_keep_dims_in_output);
|
||||||
std::fill(out, out + shape_size(out_shape), 0);
|
std::fill(out, out + shape_size(out_shape), T(0));
|
||||||
|
|
||||||
const auto in_strides = row_major_strides(in_shape);
|
const auto in_strides = row_major_strides(in_shape);
|
||||||
const auto out_strides = row_major_strides(out_shape);
|
const auto out_strides = row_major_strides(out_shape);
|
||||||
@ -26,8 +26,10 @@ void reduce_l2(const T* arg, T* out, const Shape& in_shape, const AxisSet& reduc
|
|||||||
for (const Coordinate& input_coord : input_transform) {
|
for (const Coordinate& input_coord : input_transform) {
|
||||||
const Coordinate output_coord = reduce(input_coord, reduction_axes, dont_keep_dims_in_output);
|
const Coordinate output_coord = reduce(input_coord, reduction_axes, dont_keep_dims_in_output);
|
||||||
|
|
||||||
const size_t in_idx = std::inner_product(input_coord.begin(), input_coord.end(), in_strides.begin(), 0);
|
const size_t in_idx =
|
||||||
const size_t out_idx = std::inner_product(output_coord.begin(), output_coord.end(), out_strides.begin(), 0);
|
std::inner_product(input_coord.begin(), input_coord.end(), in_strides.begin(), uint64_t(0));
|
||||||
|
const size_t out_idx =
|
||||||
|
std::inner_product(output_coord.begin(), output_coord.end(), out_strides.begin(), uint64_t(0));
|
||||||
|
|
||||||
out[out_idx] = out[out_idx] + arg[in_idx] * arg[in_idx];
|
out[out_idx] = out[out_idx] + arg[in_idx] * arg[in_idx];
|
||||||
}
|
}
|
||||||
|
@ -37,7 +37,8 @@ static inline void softmax_generic(const T* src_data, T* dst_data, int batches,
|
|||||||
|
|
||||||
T sum = 0;
|
T sum = 0;
|
||||||
for (int channel_idx = 0; channel_idx < channels; channel_idx++) {
|
for (int channel_idx = 0; channel_idx < channels; channel_idx++) {
|
||||||
dst_data[offset + channel_idx * area + i] = std::exp(src_data[offset + channel_idx * area + i] - max);
|
dst_data[offset + channel_idx * area + i] =
|
||||||
|
static_cast<T>(std::exp(src_data[offset + channel_idx * area + i] - max));
|
||||||
sum += dst_data[offset + channel_idx * area + i];
|
sum += dst_data[offset + channel_idx * area + i];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -59,15 +60,15 @@ void region_yolo(const T* input,
|
|||||||
const std::vector<int64_t>& mask) {
|
const std::vector<int64_t>& mask) {
|
||||||
NGRAPH_CHECK(input_shape.size() == 4);
|
NGRAPH_CHECK(input_shape.size() == 4);
|
||||||
|
|
||||||
const int batches = input_shape[0];
|
const int batches = static_cast<int>(input_shape[0]);
|
||||||
const int height = input_shape[2];
|
const int height = static_cast<int>(input_shape[2]);
|
||||||
const int width = input_shape[3];
|
const int width = static_cast<int>(input_shape[3]);
|
||||||
|
|
||||||
const auto mask_size = mask.size();
|
const auto mask_size = mask.size();
|
||||||
|
|
||||||
int num_regions = 0;
|
size_t num_regions = 0;
|
||||||
int end_index = 0;
|
size_t end_index = 0;
|
||||||
int output_size = 0;
|
size_t output_size = 0;
|
||||||
|
|
||||||
if (do_softmax) {
|
if (do_softmax) {
|
||||||
// Region layer (Yolo v2)
|
// Region layer (Yolo v2)
|
||||||
@ -83,18 +84,18 @@ void region_yolo(const T* input,
|
|||||||
|
|
||||||
std::copy(input, input + output_size, output);
|
std::copy(input, input + output_size, output);
|
||||||
|
|
||||||
const int inputs_size = width * height * num_regions * (classes + coords + 1);
|
const int inputs_size = width * height * static_cast<int>(num_regions) * (classes + coords + 1);
|
||||||
|
|
||||||
for (int batch_idx = 0; batch_idx < batches; batch_idx++) {
|
for (int batch_idx = 0; batch_idx < batches; batch_idx++) {
|
||||||
for (int n = 0; n < num_regions; n++) {
|
for (int n = 0; n < static_cast<int>(num_regions); n++) {
|
||||||
int index = entry_index(width, height, coords, classes, inputs_size, batch_idx, n * width * height, 0);
|
int index = entry_index(width, height, coords, classes, inputs_size, batch_idx, n * width * height, 0);
|
||||||
std::transform(output + index, output + index + 2 * width * height, output + index, [](T elem) {
|
std::transform(output + index, output + index + 2 * width * height, output + index, [](T elem) {
|
||||||
return sigmoid<T>(elem);
|
return sigmoid<T>(static_cast<float>(elem));
|
||||||
});
|
});
|
||||||
|
|
||||||
index = entry_index(width, height, coords, classes, inputs_size, batch_idx, n * width * height, coords);
|
index = entry_index(width, height, coords, classes, inputs_size, batch_idx, n * width * height, coords);
|
||||||
std::transform(output + index, output + index + end_index, output + index, [](T elem) {
|
std::transform(output + index, output + index + end_index, output + index, [](T elem) {
|
||||||
return sigmoid<T>(elem);
|
return sigmoid<T>(static_cast<float>(elem));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -114,7 +115,5 @@ void region_yolo(const T* input,
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace reference
|
} // namespace reference
|
||||||
|
|
||||||
} // namespace runtime
|
} // namespace runtime
|
||||||
|
|
||||||
} // namespace ngraph
|
} // namespace ngraph
|
||||||
|
@ -40,8 +40,8 @@ void reverse_sequence(const T* arg,
|
|||||||
Coordinate out_coord = in_coord;
|
Coordinate out_coord = in_coord;
|
||||||
out_coord[sequence_axis] = sequence_index;
|
out_coord[sequence_axis] = sequence_index;
|
||||||
|
|
||||||
const size_t in_idx = std::inner_product(in_coord.begin(), in_coord.end(), strides.begin(), 0);
|
const size_t in_idx = std::inner_product(in_coord.begin(), in_coord.end(), strides.begin(), size_t(0));
|
||||||
const size_t out_idx = std::inner_product(out_coord.begin(), out_coord.end(), strides.begin(), 0);
|
const size_t out_idx = std::inner_product(out_coord.begin(), out_coord.end(), strides.begin(), size_t(0));
|
||||||
out[out_idx] = arg[in_idx];
|
out[out_idx] = arg[in_idx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -126,14 +126,14 @@ void roi_align(const T* feature_maps,
|
|||||||
unsigned int sample_x_high;
|
unsigned int sample_x_high;
|
||||||
|
|
||||||
if (sample_y_low >= feature_map_height - 1) {
|
if (sample_y_low >= feature_map_height - 1) {
|
||||||
sample_y_high = sample_y_low = feature_map_height - 1;
|
sample_y_high = sample_y_low = static_cast<unsigned int>(feature_map_height - 1);
|
||||||
sample_y = static_cast<T>(sample_y_low);
|
sample_y = static_cast<T>(sample_y_low);
|
||||||
} else {
|
} else {
|
||||||
sample_y_high = sample_y_low + 1;
|
sample_y_high = sample_y_low + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sample_x_low >= feature_map_width - 1) {
|
if (sample_x_low >= feature_map_width - 1) {
|
||||||
sample_x_high = sample_x_low = feature_map_width - 1;
|
sample_x_high = sample_x_low = static_cast<unsigned int>(feature_map_width - 1);
|
||||||
sample_x = static_cast<T>(sample_x_low);
|
sample_x = static_cast<T>(sample_x_low);
|
||||||
} else {
|
} else {
|
||||||
sample_x_high = sample_x_low + 1;
|
sample_x_high = sample_x_low + 1;
|
||||||
|
@ -19,35 +19,35 @@ void roi_pooling(const T* feature_maps,
|
|||||||
const float spatial_scale,
|
const float spatial_scale,
|
||||||
const std::string& pooling_method) {
|
const std::string& pooling_method) {
|
||||||
// Feature maps input shape: {N, C, H, W}
|
// Feature maps input shape: {N, C, H, W}
|
||||||
const int batches = feature_maps_shape[0];
|
const int batches = static_cast<int>(feature_maps_shape[0]);
|
||||||
const int channels = feature_maps_shape[1];
|
const int channels = static_cast<int>(feature_maps_shape[1]);
|
||||||
const int height = feature_maps_shape[2];
|
const int height = static_cast<int>(feature_maps_shape[2]);
|
||||||
const int width = feature_maps_shape[3];
|
const int width = static_cast<int>(feature_maps_shape[3]);
|
||||||
|
|
||||||
// Output shape: {NUM_ROIS, C, pooled_h, pooled_w}
|
// Output shape: {NUM_ROIS, C, pooled_h, pooled_w}
|
||||||
const int pooled_h = output_shape[2];
|
const int pooled_h = static_cast<int>(output_shape[2]);
|
||||||
const int pooled_w = output_shape[3];
|
const int pooled_w = static_cast<int>(output_shape[3]);
|
||||||
|
|
||||||
// ROIs shape: {NUM_ROIS, 5}
|
// ROIs shape: {NUM_ROIS, 5}
|
||||||
const int num_rois = rois_shape[0];
|
const size_t num_rois = rois_shape[0];
|
||||||
|
|
||||||
for (int roi_num = 0; roi_num < num_rois; roi_num++) {
|
for (size_t roi_num = 0; roi_num < num_rois; roi_num++) {
|
||||||
// ROI tuple: [roi_batch_id, roi_w_start, roi_h_start, roi_w_end, roi_h_end]
|
// ROI tuple: [roi_batch_id, roi_w_start, roi_h_start, roi_w_end, roi_h_end]
|
||||||
// ROI index
|
// ROI index
|
||||||
int roi_idx = rois_shape[1] * roi_num;
|
size_t roi_idx = rois_shape[1] * roi_num;
|
||||||
|
|
||||||
// ROI batch id
|
// ROI batch id
|
||||||
int roi_batch_id = rois[roi_idx + 0];
|
int roi_batch_id = static_cast<int>(rois[roi_idx + 0]);
|
||||||
|
|
||||||
// ROI batch id must be in the range of [0, N-1]
|
// ROI batch id must be in the range of [0, N-1]
|
||||||
NGRAPH_CHECK(0 <= roi_batch_id && roi_batch_id < batches, "ROI batch id must be in the range of [0, N-1]");
|
NGRAPH_CHECK(0 <= roi_batch_id && roi_batch_id < batches, "ROI batch id must be in the range of [0, N-1]");
|
||||||
|
|
||||||
if (pooling_method == "max") {
|
if (pooling_method == "max") {
|
||||||
// ROI coordinates scaled to input feature maps
|
// ROI coordinates scaled to input feature maps
|
||||||
int roi_w_start = std::round(rois[roi_idx + 1] * spatial_scale);
|
int roi_w_start = static_cast<int>(std::round(rois[roi_idx + 1] * spatial_scale));
|
||||||
int roi_h_start = std::round(rois[roi_idx + 2] * spatial_scale);
|
int roi_h_start = static_cast<int>(std::round(rois[roi_idx + 2] * spatial_scale));
|
||||||
int roi_w_end = std::round(rois[roi_idx + 3] * spatial_scale);
|
int roi_w_end = static_cast<int>(std::round(rois[roi_idx + 3] * spatial_scale));
|
||||||
int roi_h_end = std::round(rois[roi_idx + 4] * spatial_scale);
|
int roi_h_end = static_cast<int>(std::round(rois[roi_idx + 4] * spatial_scale));
|
||||||
|
|
||||||
// Force malformed ROIs to be 1x1
|
// Force malformed ROIs to be 1x1
|
||||||
int roi_height = std::max(roi_h_end - roi_h_start + 1, 1);
|
int roi_height = std::max(roi_h_end - roi_h_start + 1, 1);
|
||||||
@ -123,13 +123,13 @@ void roi_pooling(const T* feature_maps,
|
|||||||
in_y = ((ph == pooled_h - 1) ? (height - 1) * roi_h_end
|
in_y = ((ph == pooled_h - 1) ? (height - 1) * roi_h_end
|
||||||
: (ph * roi_height_scale + roi_h_start * (height - 1)));
|
: (ph * roi_height_scale + roi_h_start * (height - 1)));
|
||||||
} else {
|
} else {
|
||||||
in_y = 0.5 * (roi_h_start + roi_h_end) * (height - 1);
|
in_y = static_cast<T>(0.5 * (roi_h_start + roi_h_end) * (height - 1));
|
||||||
}
|
}
|
||||||
if (pooled_w > 1) {
|
if (pooled_w > 1) {
|
||||||
in_x = ((pw == pooled_w - 1) ? (width - 1) * roi_w_end
|
in_x = ((pw == pooled_w - 1) ? (width - 1) * roi_w_end
|
||||||
: (pw * roi_width_scale + roi_w_start * (width - 1)));
|
: (pw * roi_width_scale + roi_w_start * (width - 1)));
|
||||||
} else {
|
} else {
|
||||||
in_x = 0.5 * (roi_w_end + roi_w_start) * (width - 1);
|
in_x = static_cast<T>(0.5 * (roi_w_end + roi_w_start) * (width - 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
const size_t pool_index =
|
const size_t pool_index =
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user