Add cnpy fuzz test and fix issues (#6109)

This commit is contained in:
Andrey Somsikov 2021-06-14 17:00:49 +03:00 committed by GitHub
parent 17b5240065
commit fb7435a647
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 160 additions and 10 deletions

View File

@ -18,14 +18,72 @@ To run fuzzing you will need [LLVM](https://apt.llvm.org/) components:
- lld (linker) - lld (linker)
- libc++ - libc++
## Reproducing Failure Found by Fuzzing
1. Build `fuzz` test target: ## Building fuzz tests
1. Build openvino
Build openvino with options `ENABLE_FUZZING` and `ENABLE_SANITIZER` enabled. It
is recommended to use clang compiler.
```bash ```bash
cmake -DENABLE_TESTS=ON .. && ninja fuzz (\
mkdir -p build && cd build && \
CC=clang CXX=clang++ cmake .. -DENABLE_FUZZING=ON -DENABLE_SANITIZER=ON -DTREAT_WARNING_AS_ERROR=OFF && \
cmake --build . \
)
``` ```
2. Run fuzzing test passing a failure reproducer as a command-line argument: 2. Build fuzz tests
Build fuzz tests with options `ENABLE_FUZZING` and `ENABLE_SANITIZER` enabled.
You should use the same compiler as was used for the openvino build.
```bash ```bash
./read_network-fuzzer crash-reproducer (\
mkdir -p tests/fuzz/build && cd tests/fuzz/build && \
CC=clang CXX=clang++ cmake .. -DENABLE_FUZZING=ON -DENABLE_SANITIZER=ON -DTREAT_WARNING_AS_ERROR=OFF -DInferenceEngine_DIR=$(pwd)/../../../build && \
cmake --build . \
)
``` ```
## Running fuzz tests
1. Prepare fuzzing corpus
Fuzzing engine needs a set of valid inputs to start fuzzing from. Those files
are called a fuzzing corpus. Place valid inputs for the fuzzing test into
directory.
Intel employees can get the corpus as described here
https://wiki.ith.intel.com/x/2N42bg.
2. Run fuzzing
```bash
./read_network-fuzzer -max_total_time=600 ./read_network-corpus
```
Consider adding those useful command line options:
- `-jobs=$(nproc)` runs multiple fuzzing jobs in parallel.
- `-rss_limit_mb=0` to ignore out-of-memory issues.
## Analyzing fuzzing quality
### Explore code coverage
To build coverage report after fuzz test execution run:
```
llvm-profdata merge -sparse *.profraw -o default.profdata && \
llvm-cov show ./read_network-fuzzer -instr-profile=default.profdata -format=html -output-dir=read_network-coverage
```
## Reproducing findings
Fuzzing run halts on the first issue identified, prints issue details to stdout and save data to reproduce the issue as a file in the current folder. To debug the issue pass reproducer as command line argument to fuzz test
```bash
./read_network-fuzzer crash-409b5eeed46a8445b7f7b7a2ce5b60a9ad895e3b
```
It is recommended but not required to use binaries built for fuzzing to debug the issues. A binaries built without `ENABLE_FUZZING` options can also be used to reproduce and debug the issues.

View File

@ -6,7 +6,7 @@ set(TARGET_NAME fuzz-testhelper)
file( file(
GLOB SRC_FILES GLOB SRC_FILES
${CMAKE_CURRENT_SOURCE_DIR}/*-testhelper.cc) ${CMAKE_CURRENT_SOURCE_DIR}/*.cc)
add_library( add_library(
${TARGET_NAME} STATIC ${TARGET_NAME} STATIC

View File

@ -0,0 +1,40 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "fuzz-utils.h"
#include <stdexcept>
#include <stdlib.h>
#include <string.h>
#include <string>
#ifndef _WIN32
#include <unistd.h>
#endif // _WIN32
MemoryFile::MemoryFile(const void *data, size_t size) {
#ifdef _WIN32
throw std::exception("MemoryFile is not implemented for Windows");
#else // _WIN32
m_name = strdup("/dev/shm/fuzz-XXXXXX");
if (!m_name)
throw std::bad_alloc();
int fd = mkstemp(m_name);
if (size) {
size_t nbytes = write(fd, data, size);
if (nbytes != size) {
free(m_name);
close(fd);
throw std::runtime_error("Failed to write " + std::to_string(size) +
" bytes to " + m_name);
}
}
close(fd);
#endif // _WIN32
}
MemoryFile::~MemoryFile() {
#ifndef _WIN32
unlink(m_name);
free(m_name);
#endif // _WIN32
}

View File

@ -0,0 +1,19 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <stddef.h>
class MemoryFile {
public:
/// Create a memory backed file
MemoryFile(const void *data, size_t size);
/// Delete memory backed file
~MemoryFile();
/// Get path to a file.
const char *name() { return m_name; }
private:
char *m_name;
};

View File

@ -9,11 +9,14 @@ add_custom_target(fuzz)
# Fuzz test target name is source file name without extension. # Fuzz test target name is source file name without extension.
FILE(GLOB tests "*-fuzzer.cc") FILE(GLOB tests "*-fuzzer.cc")
add_subdirectory(../../../thirdparty/cnpy ${CMAKE_CURRENT_BINARY_DIR}/cnpy)
add_subdirectory(../../../thirdparty/zlib ${CMAKE_CURRENT_BINARY_DIR}/zlib)
foreach(test_source ${tests}) foreach(test_source ${tests})
get_filename_component(test_name ${test_source} NAME_WE) get_filename_component(test_name ${test_source} NAME_WE)
add_fuzzer(${test_name} ${test_source}) add_fuzzer(${test_name} ${test_source})
target_link_libraries(${test_name} PRIVATE IE::inference_engine) target_link_libraries(${test_name} PRIVATE IE::inference_engine cnpy zlib)
add_dependencies(fuzz ${test_name}) add_dependencies(fuzz ${test_name})
endforeach() endforeach()

View File

@ -0,0 +1,21 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <stdio.h>
#include <cnpy.h>
#include "fuzz-utils.h"
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * inputData, size_t inputSize) {
MemoryFile file(inputData, inputSize);
try {
cnpy::NpyArray array = cnpy::npy_load(file.name());
}
catch (const std::exception&) {
return 0; // fail gracefully on expected exceptions
}
return 0;
}

View File

@ -90,7 +90,9 @@ void cnpy::parse_npy_header(unsigned char* buffer,size_t& word_size, std::vector
//byte order code | stands for not applicable. //byte order code | stands for not applicable.
//not sure when this applies except for byte array //not sure when this applies except for byte array
loc1 = header.find("descr")+9; loc1 = header.find("descr")+9;
bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false); bool littleEndian = false;
if (loc1 < header.size())
littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
assert(littleEndian); assert(littleEndian);
//char type = header[loc1+1]; //char type = header[loc1+1];
@ -148,7 +150,9 @@ void cnpy::parse_npy_header(FILE* fp, size_t& word_size, std::vector<size_t>& sh
if (loc1 == std::string::npos) if (loc1 == std::string::npos)
throw std::runtime_error("parse_npy_header: failed to find header keyword: 'descr'"); throw std::runtime_error("parse_npy_header: failed to find header keyword: 'descr'");
loc1 += 9; loc1 += 9;
bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false); bool littleEndian = false;
if (loc1 < header.size())
littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
assert(littleEndian); assert(littleEndian);
//char type = header[loc1+1]; //char type = header[loc1+1];

View File

@ -27,6 +27,11 @@ namespace cnpy {
{ {
num_vals = 1; num_vals = 1;
for(size_t i = 0;i < shape.size();i++) num_vals *= shape[i]; for(size_t i = 0;i < shape.size();i++) num_vals *= shape[i];
if (word_size &&
num_vals > std::vector<char>().max_size() / word_size)
throw std::length_error("NpyArray of " + std::to_string(num_vals) +
"*" + std::to_string(word_size) +
" elements is too big.");
data_holder = std::shared_ptr<std::vector<char>>( data_holder = std::shared_ptr<std::vector<char>>(
new std::vector<char>(num_vals * word_size)); new std::vector<char>(num_vals * word_size));
} }