scripts: check-html-references: Detect pages that are not linked to

Prevent sub-pages without a way to reach them. Signed-off-by: Peter Krempa <pkrempa@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
2025-02-25 18:55:26 -06:00 · 2023-02-14 13:14:25 +01:00 · 2023-02-14 13:14:25 +01:00 · 61dcca1b23
commit 61dcca1b23
parent 5f3a897b93
1 changed files with 57 additions and 0 deletions
--- a/scripts/check-html-references.py
+++ b/scripts/check-html-references.py
@ -40,6 +40,8 @@ def get_file_list(prefix):
            filelist.append(os.path.join(root, file))
    filelist.sort()
    return filelist
@ -118,9 +120,54 @@ def check_targets(targets, anchors):
    return False
 def check_usage_crawl(page, targets, visited):
    visited.append(page)
    tocrawl = []
    for filename, docname, target, _ in targets:
        if page != filename:
            continue
        targetpage = target.split("#", 1)[0]
        if targetpage not in visited and targetpage not in tocrawl:
            tocrawl.append(targetpage)
    for crawl in tocrawl:
        check_usage_crawl(crawl, targets, visited)
 # crawls the document references starting from entrypoint and tries to find
 # unreachable pages
 def check_usage(targets, files, entrypoint):
    visited = []
    fail = False
    check_usage_crawl(entrypoint, targets, visited)
    for file in files:
        if file not in visited:
            brokendoc = file
            for filename, docname, _, _ in targets:
                if filename != file:
                    continue
                if docname:
                    brokendoc = docname
                    break
            print(f'ERROR: \'{brokendoc}\': is not referenced from anywhere')
            fail = True
    return fail
 parser = argparse.ArgumentParser(description='HTML reference checker')
 parser.add_argument('--webroot', required=True,
                    help='path to the web root')
 parser.add_argument('--entrypoint', default="index.html",
                    help='file name of web entry point relative to --webroot')
 parser.add_argument('--external', action="store_true",
                    help='print external references instead')
@ -128,8 +175,12 @@ args = parser.parse_args()
 files = get_file_list(os.path.abspath(args.webroot))
 entrypoint = os.path.join(os.path.abspath(args.webroot), args.entrypoint)
 targets, anchors = process_all(files)
 fail = False
 if args.external:
    prev = None
    externallinks.sort()
@ -140,6 +191,12 @@ if args.external:
        prev = ext
 else:
    if check_targets(targets, anchors):
        fail = True
    if check_usage(targets, files, entrypoint):
        fail = True
    if fail:
        sys.exit(1)
    sys.exit(0)