pgadmin4/tools/sql_keywords.py

# -*- coding: utf-8 -*-

##########################################################################
#
# pgAdmin 4 - PostgreSQL Tools
#
# Copyright (C) 2013 - 2024, The pgAdmin Development Team
# This software is released under the PostgreSQL Licence
#
##########################################################################

# This utility will extract SQL keywords from postgres website and
# pgsql keywords from the code git paths mentioned in PG_CODES_URLS
# Note that, PG_CODES_URLS may need to be changed manually per version change

import re
import requests
import argparse

PG_CODES_URLS = [
    "https://raw.githubusercontent.com/postgres/postgres/master/src/pl/"
    "plpgsql/src/pl_scanner.c",
]
PG_CODES_REGEX = r"PG_KEYWORD\(\"([a-z]*)\"[A-Z_, ]*\)"

PG_SQL_DOCS_URL = \
    "https://www.postgresql.org/docs/current/sql-keywords-appendix.html"
PG_SQL_DOCS_REGEX = "<[a-z =\"]*>([A-Z_]*)"

PG_CURRENT_VERSION_URL = "https://www.postgresql.org/docs/current/index.html"
PG_CURRENT_VERSION_REGEX = "PostgreSQL ([0-9.]+) Documentation"


def apply_regex(text, regex):
    return re.findall(regex, text)


def get_file_from_url(url):
    req = requests.get(url)
    return req.text


def extract_keywords(text, regex):
    keywords = apply_regex(text, regex)
    return [k.lower() for k in keywords]


def get_release_tag(current_url=PG_CURRENT_VERSION_URL,
                    version_regex=PG_CURRENT_VERSION_REGEX):
    resp_text = get_file_from_url(current_url)
    version = apply_regex(resp_text, version_regex)
    if isinstance(version, list):
        version = version[0]

    return "REL_" + version.replace(".", "_")


def get_keywords_pg_code(file_urls=PG_CODES_URLS,
                         keyword_regex=PG_CODES_REGEX):
    keywords = []

    # Lets get the latest version first
    rel_tag = get_release_tag()
    for file_url in file_urls:
        if "hb" not in file_url:
            file_url = file_url + ";hb=" + rel_tag
        resp_text = get_file_from_url(file_url)

        # Sample entry - PG_KEYWORD("begin", K_BEGIN, RESERVED_KEYWORD)
        keywords.extend(extract_keywords(resp_text, keyword_regex))

    return keywords


def get_keywords_pg_docs(docs_url=PG_SQL_DOCS_URL,
                         keyword_regex=PG_SQL_DOCS_REGEX):
    resp_text = get_file_from_url(docs_url)
    # Sample entry - <code class="token">ABORT</code>
    keywords = extract_keywords(resp_text, keyword_regex)

    return keywords


def get_all_keywords():
    final_keywords = set()

    final_keywords.update(get_keywords_pg_code())
    final_keywords.update(get_keywords_pg_docs())

    return len(final_keywords), " ".join(sorted(list(final_keywords))).strip()


if __name__ == '__main__':
    args_parser = argparse.ArgumentParser(description="SQL Keywords extractor")
    args_parser.add_argument(
        '--total',
        help="Print with total number of keywords",
        action="store_true"
    )
    args = args_parser.parse_args()

    total, keywords = get_all_keywords()
    if args.total:
        print(keywords + "%s\n\n%d keywords extracted." % (keywords, total))
    else:
        print(keywords)
Add a small utility for extracting PostgreSQL SQL and pl/pgsql keywords from the interwebs. 2019-03-20 08:53:37 -05:00			`# -- coding: utf-8 --`

			`##########################################################################`
			`#`
			`# pgAdmin 4 - PostgreSQL Tools`
			`#`
Update copyright notices for 2024 2024-01-01 02:43:48 -06:00			`# Copyright (C) 2013 - 2024, The pgAdmin Development Team`
Add a small utility for extracting PostgreSQL SQL and pl/pgsql keywords from the interwebs. 2019-03-20 08:53:37 -05:00			`# This software is released under the PostgreSQL Licence`
			`#`
			`##########################################################################`

			`# This utility will extract SQL keywords from postgres website and`
			`# pgsql keywords from the code git paths mentioned in PG_CODES_URLS`
			`# Note that, PG_CODES_URLS may need to be changed manually per version change`

			`import re`
			`import requests`
			`import argparse`

			`PG_CODES_URLS = [`
Update to use GitHub for source repos. 2022-09-21 04:56:03 -05:00			`"https://raw.githubusercontent.com/postgres/postgres/master/src/pl/"`
			`"plpgsql/src/pl_scanner.c",`
Add a small utility for extracting PostgreSQL SQL and pl/pgsql keywords from the interwebs. 2019-03-20 08:53:37 -05:00			`]`
1) Ensure that eventlet's subprocess is used for Python versions up to 3.11 and await the issue resolution for Python versions 3.12. 2) Fixed unescape sequence for Python 3.12 2023-11-02 05:33:50 -05:00			`PG_CODES_REGEX = r"PG_KEYWORD\(\"([a-z])\"[A-Z_, ]\)"`
Add a small utility for extracting PostgreSQL SQL and pl/pgsql keywords from the interwebs. 2019-03-20 08:53:37 -05:00
			`PG_SQL_DOCS_URL = \`
			`"https://www.postgresql.org/docs/current/sql-keywords-appendix.html"`
			`PG_SQL_DOCS_REGEX = "<[a-z =\"]>([A-Z_])"`

			`PG_CURRENT_VERSION_URL = "https://www.postgresql.org/docs/current/index.html"`
			`PG_CURRENT_VERSION_REGEX = "PostgreSQL ([0-9.]+) Documentation"`


			`def apply_regex(text, regex):`
			`return re.findall(regex, text)`


			`def get_file_from_url(url):`
			`req = requests.get(url)`
			`return req.text`


			`def extract_keywords(text, regex):`
			`keywords = apply_regex(text, regex)`
			`return [k.lower() for k in keywords]`


			`def get_release_tag(current_url=PG_CURRENT_VERSION_URL,`
			`version_regex=PG_CURRENT_VERSION_REGEX):`
			`resp_text = get_file_from_url(current_url)`
			`version = apply_regex(resp_text, version_regex)`
Change the following to replace Python 2 code with Python 3: 1) Replace the deprecated unit test method. 2) Wraps filter usage in a list call. 3) Converts the old metaclass syntax to new. 4) Use range instead of xrange method. 5) Change Unicode to str. 6) Several other transformations. 7) Fixed change password test cases. 8) Use simplejson instead of plain JSON. 2020-08-31 06:15:31 -05:00			`if isinstance(version, list):`
Add a small utility for extracting PostgreSQL SQL and pl/pgsql keywords from the interwebs. 2019-03-20 08:53:37 -05:00			`version = version[0]`

			`return "REL_" + version.replace(".", "_")`


			`def get_keywords_pg_code(file_urls=PG_CODES_URLS,`
			`keyword_regex=PG_CODES_REGEX):`
			`keywords = []`

			`# Lets get the latest version first`
			`rel_tag = get_release_tag()`
			`for file_url in file_urls:`
			`if "hb" not in file_url:`
			`file_url = file_url + ";hb=" + rel_tag`
			`resp_text = get_file_from_url(file_url)`

			`# Sample entry - PG_KEYWORD("begin", K_BEGIN, RESERVED_KEYWORD)`
			`keywords.extend(extract_keywords(resp_text, keyword_regex))`

			`return keywords`


			`def get_keywords_pg_docs(docs_url=PG_SQL_DOCS_URL,`
			`keyword_regex=PG_SQL_DOCS_REGEX):`
			`resp_text = get_file_from_url(docs_url)`
			`# Sample entry - <code class="token">ABORT</code>`
			`keywords = extract_keywords(resp_text, keyword_regex)`

			`return keywords`


			`def get_all_keywords():`
			`final_keywords = set()`

			`final_keywords.update(get_keywords_pg_code())`
			`final_keywords.update(get_keywords_pg_docs())`

			`return len(final_keywords), " ".join(sorted(list(final_keywords))).strip()`


			`if __name__ == '__main__':`
			`args_parser = argparse.ArgumentParser(description="SQL Keywords extractor")`
			`args_parser.add_argument(`
			`'--total',`
			`help="Print with total number of keywords",`
			`action="store_true"`
			`)`
			`args = args_parser.parse_args()`

			`total, keywords = get_all_keywords()`
			`if args.total:`
			`print(keywords + "%s\n\n%d keywords extracted." % (keywords, total))`
			`else:`
			`print(keywords)`