#!/usr/bin/env python3
# Copyright (C) 2018-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# Sample usage:
#   ./scripts/init_corpus.py ./paddle_layer_models/**/*.pdmodel --join pdiparams
#   mkdir -p corpus && find ./paddle_layer_models/ -name "*.fuzz" -exec cp \{\} .//import_paddle-corpus \;

import argparse
import glob
import os
from pathlib import Path
import shutil
import sys


def globber(paths):
    """Generator extending paths with wildcards"""
    for path in paths:
        # XXX: use non-public `has_magic` here as we'd like to differentiate between glob and normal paths
        #  i.e. in the case when user specifies "normal" but non-existing path - we'd like to handle it by ourselves
        if glob.has_magic(path):
            for resolved in glob.iglob(path, recursive=True):
                yield resolved
        else:
            yield path


def main():
    """ Main entrypoint """
    parser = argparse.ArgumentParser(
        description="Join multiple files of the same name to a single *.fuzz file"
    )
    parser.add_argument("input", nargs="+", help="A file to add to the corpus")
    parser.add_argument(
        "--join",
        help="Colon separated list of file extensions to concatenate to corpus entry",
    )
    args = parser.parse_args()

    for input in globber(args.input):
        base = os.path.splitext(input)[0]
        output = f"{base}.fuzz"
        shutil.copyfile(input, output)
        if args.join:
            with open(output, "ab") as output_file:
                for join in args.join.split(":"):
                    join = f"{base}.{join}"
                    if os.path.isfile(join):
                        with open(join, "rb") as join_file:
                            output_file.write(bytes("FUZZ_NEXT_FIELD", "utf-8"))
                            output_file.write(join_file.read())


if __name__ == "__main__":
    sys.exit(main())