Add input image scale option in benchmark App (#5848)

* Add input image scale flag in benchmark app. - user set input image scale with -iscale. input is divided by scale. Signed-off-by: hyunback <hyunback.kim@intel.com> * Apply image scale, mean parameter in benchmark APP Means and sacles values per channel Signed-off-by: hyunback <hyunback.kim@intel.com> * Fix clang-format Signed-off-by: hyunback <hyunback.kim@intel.com> * fix clang-format issue2. Signed-off-by: hyunback <hyunback.kim@intel.com> * Update benchmark tool to align the format of mean and sacle values with MO arguments. Signed-off-by: hyunback <hyunback.kim@intel.com> * Remove debug print. Signed-off-by: hyunback <hyunback.kim@intel.com>
2021-08-05 01:51:46 +09:00
parent 585f211e0a
commit f00dabc184
10 changed files with 161 additions and 11 deletions
--- a/inference-engine/samples/benchmark_app/benchmark_app.hpp
+++ b/inference-engine/samples/benchmark_app/benchmark_app.hpp
@@ -147,6 +147,14 @@ static constexpr char iop_message[] = "Optional. Specifies precision for input a
                                      "                                             Overwrites precision from ip and op options for "
                                      "specified layers.";

+static constexpr char input_image_scale_message[] = "Optional. Scale values to be used for the input image per channel.\n"
+                                                    "Values to be provided in the [R, G, B] format. Can be defined for desired input of the model.\n"
+                                                    "Example: -iscale data[255,255,255],info[255,255,255]\n";
+
+static constexpr char input_image_mean_message[] = "Optional. Mean values to be used for the input image per channel.\n"
+                                                   "Values to be provided in the [R, G, B] format. Can be defined for desired input of the model,\n"
+                                                   "Example: -imean data[255,255,255],info[255,255,255]\n";
+
 /// @brief Define flag for showing help message <br>
 DEFINE_bool(h, false, help_message);

@@ -259,6 +267,12 @@ DEFINE_string(cache_dir, "", cache_dir_message);
 /// @brief Define flag for load network from model file by name without ReadNetwork <br>
 DEFINE_bool(load_from_file, false, load_from_file_message);

+/// @brief Define flag for using input image scale <br>
+DEFINE_string(iscale, "", input_image_scale_message);
+
+/// @brief Define flag for using input image mean <br>
+DEFINE_string(imean, "", input_image_mean_message);
+
 /**
 * @brief This function show a help message
 */
@@ -304,4 +318,6 @@ static void showUsage() {
    std::cout << "    -ip                          <value>     " << inputs_precision_message << std::endl;
    std::cout << "    -op                          <value>     " << outputs_precision_message << std::endl;
    std::cout << "    -iop                        \"<value>\"    " << iop_message << std::endl;
+    std::cout << "    -iscale                    " << input_image_scale_message << std::endl;
+    std::cout << "    -imean                     " << input_image_mean_message << std::endl;
 }
--- a/inference-engine/samples/benchmark_app/inputs_filling.cpp
+++ b/inference-engine/samples/benchmark_app/inputs_filling.cpp
@@ -91,7 +91,9 @@ void fillBlobImage(Blob::Ptr& inputBlob, const std::vector<std::string>& filePat
                    size_t offset = imageId * numChannels * width * height + (((app_info.layout == "NCHW") || (app_info.layout == "CHW"))
                                                                                  ? (ch * width * height + h * width + w)
                                                                                  : (h * width * numChannels + w * numChannels + ch));
-                    inputBlobData[offset] = static_cast<T>(vreader.at(imageId).get()[h * width * numChannels + w * numChannels + ch]);
+                    inputBlobData[offset] =
+                        (static_cast<T>(vreader.at(imageId).get()[h * width * numChannels + w * numChannels + ch]) - static_cast<T>(app_info.mean[ch])) /
+                        static_cast<T>(app_info.scale[ch]);
                }
            }
        }
--- a/inference-engine/samples/benchmark_app/inputs_filling.hpp
+++ b/inference-engine/samples/benchmark_app/inputs_filling.hpp
@@ -12,4 +12,4 @@
 #include "utils.hpp"

 void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSize, benchmark_app::InputsInfo& app_inputs_info,
-               std::vector<InferReqWrap::Ptr> requests);
+               std::vector<InferReqWrap::Ptr> requests);
--- a/inference-engine/samples/benchmark_app/main.cpp
+++ b/inference-engine/samples/benchmark_app/main.cpp
@@ -380,7 +380,7 @@ int main(int argc, char* argv[]) {
            batchSize = cnnNetwork.getBatchSize();
            // Parse input shapes if specified
            bool reshape = false;
-            app_inputs_info = getInputsInfo<InputInfo::Ptr>(FLAGS_shape, FLAGS_layout, FLAGS_b, inputInfo, reshape);
+            app_inputs_info = getInputsInfo<InputInfo::Ptr>(FLAGS_shape, FLAGS_layout, FLAGS_b, FLAGS_iscale, FLAGS_imean, inputInfo, reshape);
            if (reshape) {
                InferenceEngine::ICNNNetwork::InputShapes shapes = {};
                for (auto& item : app_inputs_info)
@@ -441,7 +441,7 @@ int main(int argc, char* argv[]) {
            slog::info << "Import network took " << duration_ms << " ms" << slog::endl;
            if (statistics)
                statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"import network time (ms)", duration_ms}});
-            app_inputs_info = getInputsInfo<InputInfo::CPtr>(FLAGS_shape, FLAGS_layout, FLAGS_b, exeNetwork.GetInputsInfo());
+            app_inputs_info = getInputsInfo<InputInfo::CPtr>(FLAGS_shape, FLAGS_layout, FLAGS_b, FLAGS_iscale, FLAGS_imean, exeNetwork.GetInputsInfo());
            if (batchSize == 0) {
                batchSize = 1;
            }
--- a/inference-engine/samples/benchmark_app/utils.cpp
+++ b/inference-engine/samples/benchmark_app/utils.cpp
@@ -88,6 +88,17 @@ std::vector<std::string> split(const std::string& s, char delim) {
    return result;
 }

+std::vector<float> splitFloat(const std::string& s, char delim) {
+    std::vector<float> result;
+    std::stringstream ss(s);
+    std::string item;
+
+    while (getline(ss, item, delim)) {
+        result.push_back(std::stof(item));
+    }
+    return result;
+}
+
 std::vector<std::string> parseDevices(const std::string& device_string) {
    std::string comma_separated_devices = device_string;
    if (comma_separated_devices.find(":") != std::string::npos) {
@@ -161,6 +172,44 @@ std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& sha
    return ss.str();
 }

+std::map<std::string, std::vector<float>> parseScaleOrMean(const std::string& scale_mean, const benchmark_app::InputsInfo& inputs_info) {
+    //  Format: data:[255,255,255],info[255,255,255]
+    std::map<std::string, std::vector<float>> return_value;
+
+    std::string search_string = scale_mean;
+    auto start_pos = search_string.find_first_of('[');
+    while (start_pos != std::string::npos) {
+        auto end_pos = search_string.find_first_of(']');
+        if (end_pos == std::string::npos)
+            break;
+        auto input_name = search_string.substr(0, start_pos);
+        auto input_value_string = search_string.substr(start_pos + 1, end_pos - start_pos - 1);
+        auto input_value = splitFloat(input_value_string, ',');
+
+        if (!input_name.empty()) {
+            if (inputs_info.count(input_name)) {
+                return_value[input_name] = input_value;
+            }
+            // ignore wrong input name
+        } else {
+            for (auto& item : inputs_info) {
+                if (item.second.isImage())
+                    return_value[item.first] = input_value;
+            }
+            search_string.clear();
+            break;
+        }
+        search_string = search_string.substr(end_pos + 1);
+        if (search_string.empty() || search_string.front() != ',')
+            break;
+        search_string = search_string.substr(1);
+        start_pos = search_string.find_first_of('[');
+    }
+    if (!search_string.empty())
+        throw std::logic_error("Can't parse input parameter string: " + scale_mean);
+    return return_value;
+}
+
 #ifdef USE_OPENCV
 void dump_config(const std::string& filename, const std::map<std::string, std::map<std::string, std::string>>& config) {
    cv::FileStorage fs(filename, cv::FileStorage::WRITE);
--- a/inference-engine/samples/benchmark_app/utils.hpp
+++ b/inference-engine/samples/benchmark_app/utils.hpp
@@ -13,6 +13,8 @@ struct InputInfo {
    InferenceEngine::Precision precision;
    InferenceEngine::SizeVector shape;
    std::string layout;
+    std::vector<float> scale;
+    std::vector<float> mean;
    bool isImage() const;
    bool isImageInfo() const;
    size_t getDimentionByLayout(char character) const;
@@ -31,6 +33,7 @@ std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector
 std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& shapes);
 size_t getBatchSize(const benchmark_app::InputsInfo& inputs_info);
 std::vector<std::string> split(const std::string& s, char delim);
+std::map<std::string, std::vector<float>> parseScaleOrMean(const std::string& scale_mean, const benchmark_app::InputsInfo& inputs_info);

 template <typename T>
 std::map<std::string, std::string> parseInputParameters(const std::string parameter_string, const std::map<std::string, T>& input_info) {
@@ -65,9 +68,11 @@ std::map<std::string, std::string> parseInputParameters(const std::string parame

 template <typename T>
 benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string, const std::string& layout_string, const size_t batch_size,
-                                        const std::map<std::string, T>& input_info, bool& reshape_required) {
+                                        const std::string& scale_string, const std::string& mean_string, const std::map<std::string, T>& input_info,
+                                        bool& reshape_required) {
    std::map<std::string, std::string> shape_map = parseInputParameters(shape_string, input_info);
    std::map<std::string, std::string> layout_map = parseInputParameters(layout_string, input_info);
+
    reshape_required = false;
    benchmark_app::InputsInfo info_map;
    for (auto& item : input_info) {
@@ -106,14 +111,33 @@ benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string, const s
        }
        info_map[name] = info;
    }
+
+    // Update scale and mean
+    std::map<std::string, std::vector<float>> scale_map = parseScaleOrMean(scale_string, info_map);
+    std::map<std::string, std::vector<float>> mean_map = parseScaleOrMean(mean_string, info_map);
+
+    for (auto& item : info_map) {
+        if (item.second.isImage()) {
+            item.second.scale.assign({1, 1, 1});
+            item.second.mean.assign({0, 0, 0});
+
+            if (scale_map.count(item.first)) {
+                item.second.scale = scale_map.at(item.first);
+            }
+            if (mean_map.count(item.first)) {
+                item.second.mean = mean_map.at(item.first);
+            }
+        }
+    }
+
    return info_map;
 }

 template <typename T>
 benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string, const std::string& layout_string, const size_t batch_size,
-                                        const std::map<std::string, T>& input_info) {
+                                        const std::string& scale_string, const std::string& mean_string, const std::map<std::string, T>& input_info) {
    bool reshape_required = false;
-    return getInputsInfo<T>(shape_string, layout_string, batch_size, input_info, reshape_required);
+    return getInputsInfo<T>(shape_string, layout_string, batch_size, scale_string, mean_string, input_info, reshape_required);
 }

 #ifdef USE_OPENCV
--- a/tools/benchmark_tool/openvino/tools/benchmark/main.py
+++ b/tools/benchmark_tool/openvino/tools/benchmark/main.py
@@ -201,7 +201,7 @@ def run(args):
                                          [
                                              ('load network time (ms)', duration_ms)
                                          ])
-            app_inputs_info, _ = get_inputs_info(args.shape, args.layout, args.batch_size, exe_network.input_info)
+            app_inputs_info, _ = get_inputs_info(args.shape, args.layout, args.batch_size, args.input_scale, args.input_mean, exe_network.input_info)
            if batch_size == 0:
                batch_size = 1
        elif not is_network_compiled:
@@ -222,7 +222,7 @@ def run(args):
            # --------------------- 5. Resizing network to match image sizes and given batch ---------------------------
            next_step()

-            app_inputs_info, reshape = get_inputs_info(args.shape, args.layout, args.batch_size, ie_network.input_info)
+            app_inputs_info, reshape = get_inputs_info(args.shape, args.layout, args.batch_size, args.input_scale, args.input_mean,  ie_network.input_info)
            if reshape:
                start_time = datetime.utcnow()
                shapes = { k : v.shape for k,v in app_inputs_info.items() }
@@ -280,7 +280,7 @@ def run(args):
                                          [
                                              ('import network time (ms)', duration_ms)
                                          ])
-            app_inputs_info, _ = get_inputs_info(args.shape, args.layout, args.batch_size, exe_network.input_info)
+            app_inputs_info, _ = get_inputs_info(args.shape, args.layout, args.batch_size, args.input_scale, args.input_mean, exe_network.input_info)
            if batch_size == 0:
                batch_size = 1

--- a/tools/benchmark_tool/openvino/tools/benchmark/parameters.py
+++ b/tools/benchmark_tool/openvino/tools/benchmark/parameters.py
@@ -130,6 +130,12 @@ def parse_args():
                      help="Optional. Enable model caching to specified directory")
    args.add_argument('-lfile', '--load_from_file', required=False, nargs='?', default=argparse.SUPPRESS,
                      help="Optional. Loads model from file directly without read_network.")
+    args.add_argument('-iscale', '--input_scale', type=str, required=False, default='',
+                      help="Optional. Scale values to be used for the input image per channel.\n Values to be provided in the [R, G, B] format. Can be defined for desired input of the model.\n"
+                           "Example: -iscale data[255,255,255],info[255,255,255]\n")
+    args.add_argument('-imean', '--input_mean', type=str, required=False, default='',
+                      help="Optional. Mean values to be used for the input image per channel.\n Values to be provided in the [R, G, B] format. Can be defined for desired input of the model.\n"
+                           "Example: -imean data[255,255,255],info[255,255,255]\n")
    parsed_args = parser.parse_args()

    return parsed_args
--- a/tools/benchmark_tool/openvino/tools/benchmark/utils/inputs_filling.py
+++ b/tools/benchmark_tool/openvino/tools/benchmark/utils/inputs_filling.py
@@ -126,6 +126,9 @@ def fill_blob_with_image(image_paths, request_id, batch_size, input_id, input_si
    shape = info.shape
    images = np.ndarray(shape)
    image_index = request_id * batch_size * input_size + input_id
+
+    scale_mean = (not np.array_equal(info.scale, (1.0, 1.0, 1.0)) or not np.array_equal(info.mean, (0.0, 0.0, 0.0)))
+
    for b in range(batch_size):
        image_index %= len(image_paths)
        image_filename = image_paths[image_index]
@@ -135,8 +138,20 @@ def fill_blob_with_image(image_paths, request_id, batch_size, input_id, input_si
        if image.shape[:-1] != new_im_size:
            logger.warning(f"Image is resized from ({image.shape[:-1]}) to ({new_im_size})")
            image = cv2.resize(image, new_im_size)
+
+        if scale_mean:
+            blue, green, red = cv2.split(image)
+            blue = np.subtract(blue, info.mean[0])
+            blue = np.divide(blue, info.scale[0])
+            green = np.subtract(green, info.mean[1])
+            green = np.divide(green, info.scale[1])
+            red = np.subtract(red, info.mean[2])
+            red = np.divide(red, info.scale[2])
+            image = cv2.merge([blue, green, red])
+
        if info.layout in ['NCHW', 'CHW']:
            image = image.transpose((2, 0, 1))
+
        images[b] = image

        image_index += input_size
--- a/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py
+++ b/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py
@@ -9,6 +9,7 @@ from .logging import logger

 import json
 import re
+import numpy as np

 def static_vars(**kwargs):
    def decorate(func):
@@ -291,11 +292,33 @@ def parse_input_parameters(parameter_string, input_info):
            raise Exception(f"Can't parse input parameter: {parameter_string}")
    return return_value

+def parse_scale_or_mean(parameter_string, input_info):
+    # Parse parameter string like "input0[value0],input1[value1]" or "[value]" (applied to all inputs)
+    return_value = {}
+    if parameter_string:
+        matches = re.findall(r'(.*?)\[(.*?)\],?', parameter_string)
+        if matches:
+            for match in matches:
+                input_name, value = match
+                f_value = np.array(value.split(",")).astype(np.float)
+                if input_name != '':
+                    return_value[input_name] = f_value
+                else:
+                    print("input_info: ", input_info)
+                    for name, description in input_info.items():
+                        if description.is_image:
+                            return_value[name] = f_value
+        else:
+            raise Exception(f"Can't parse input parameter: {parameter_string}")
+    return return_value
+
 class InputInfo:
    def __init__(self):
        self.precision = None
        self.layout = ""
        self.shape = []
+        self.scale = []
+        self.mean = []

    @property
    def is_image(self):
@@ -334,7 +357,7 @@ class InputInfo:
    def depth(self):
        return self.getDimentionByLayout("D")

-def get_inputs_info(shape_string, layout_string, batch_size, input_info):
+def get_inputs_info(shape_string, layout_string, batch_size, scale_string, mean_string, input_info):
    shape_map = parse_input_parameters(shape_string, input_info)
    layout_map = parse_input_parameters(layout_string, input_info)
    reshape = False
@@ -359,6 +382,21 @@ def get_inputs_info(shape_string, layout_string, batch_size, input_info):
                info.shape[batch_index] = batch_size
                reshape = True
        info_map[name] = info
+
+    # Update scale, mean
+    scale_map = parse_scale_or_mean(scale_string, info_map)
+    mean_map = parse_scale_or_mean(mean_string, info_map)
+
+    for name, descriptor in info_map.items():
+        if descriptor.is_image:
+            descriptor.scale = np.ones(3)
+            descriptor.mean = np.zeros(3)
+
+            if name in scale_map:
+                descriptor.scale = scale_map[name]
+            if name in mean_map:
+                descriptor.mean = mean_map[name]
+
    return info_map, reshape

 def get_batch_size(inputs_info):