pgadmin4/tools/sql_keywords.py
Cyril Jouve 5a253f9053 Change the following to replace Python 2 code with Python 3:
1) Replace the deprecated unit test method.
2) Wraps filter usage in a list call.
3) Converts the old metaclass syntax to new.
4) Use range instead of xrange method.
5) Change Unicode to str.
6) Several other transformations.
7) Fixed change password test cases.
8) Use simplejson instead of plain JSON.
2020-08-31 16:45:31 +05:30

107 lines
3.1 KiB
Python

# -*- coding: utf-8 -*-
##########################################################################
#
# pgAdmin 4 - PostgreSQL Tools
#
# Copyright (C) 2013 - 2020, The pgAdmin Development Team
# This software is released under the PostgreSQL Licence
#
##########################################################################
# This utility will extract SQL keywords from postgres website and
# pgsql keywords from the code git paths mentioned in PG_CODES_URLS
# Note that, PG_CODES_URLS may need to be changed manually per version change
import re
import requests
import argparse
PG_CODES_URLS = [
"https://git.postgresql.org/gitweb/?p=postgresql.git;a=blob_plain;"
"f=src/pl/plpgsql/src/pl_scanner.c",
]
PG_CODES_REGEX = "PG_KEYWORD\(\"([a-z]*)\"[A-Z_, ]*\)"
PG_SQL_DOCS_URL = \
"https://www.postgresql.org/docs/current/sql-keywords-appendix.html"
PG_SQL_DOCS_REGEX = "<[a-z =\"]*>([A-Z_]*)"
PG_CURRENT_VERSION_URL = "https://www.postgresql.org/docs/current/index.html"
PG_CURRENT_VERSION_REGEX = "PostgreSQL ([0-9.]+) Documentation"
def apply_regex(text, regex):
return re.findall(regex, text)
def get_file_from_url(url):
req = requests.get(url)
return req.text
def extract_keywords(text, regex):
keywords = apply_regex(text, regex)
return [k.lower() for k in keywords]
def get_release_tag(current_url=PG_CURRENT_VERSION_URL,
version_regex=PG_CURRENT_VERSION_REGEX):
resp_text = get_file_from_url(current_url)
version = apply_regex(resp_text, version_regex)
if isinstance(version, list):
version = version[0]
return "REL_" + version.replace(".", "_")
def get_keywords_pg_code(file_urls=PG_CODES_URLS,
keyword_regex=PG_CODES_REGEX):
keywords = []
# Lets get the latest version first
rel_tag = get_release_tag()
for file_url in file_urls:
if "hb" not in file_url:
file_url = file_url + ";hb=" + rel_tag
resp_text = get_file_from_url(file_url)
# Sample entry - PG_KEYWORD("begin", K_BEGIN, RESERVED_KEYWORD)
keywords.extend(extract_keywords(resp_text, keyword_regex))
return keywords
def get_keywords_pg_docs(docs_url=PG_SQL_DOCS_URL,
keyword_regex=PG_SQL_DOCS_REGEX):
resp_text = get_file_from_url(docs_url)
# Sample entry - <code class="token">ABORT</code>
keywords = extract_keywords(resp_text, keyword_regex)
return keywords
def get_all_keywords():
final_keywords = set()
final_keywords.update(get_keywords_pg_code())
final_keywords.update(get_keywords_pg_docs())
return len(final_keywords), " ".join(sorted(list(final_keywords))).strip()
if __name__ == '__main__':
args_parser = argparse.ArgumentParser(description="SQL Keywords extractor")
args_parser.add_argument(
'--total',
help="Print with total number of keywords",
action="store_true"
)
args = args_parser.parse_args()
total, keywords = get_all_keywords()
if args.total:
print(keywords + "%s\n\n%d keywords extracted." % (keywords, total))
else:
print(keywords)