From c1dec4ad42caab04ffea076de16461676e412857 Mon Sep 17 00:00:00 2001 From: Alexander Zhogov Date: Thu, 23 Jul 2020 17:29:18 +0300 Subject: [PATCH] GitHub CI: Add Python scripts for controlling organization (#1437) --- .github/org_control/__init__.py | 0 .github/org_control/check_org.py | 51 ++++ .github/org_control/check_pr.py | 133 +++++++++++ .github/org_control/config.json | 18 ++ .github/org_control/configs.py | 113 +++++++++ .github/org_control/dev_emails-test.txt | 9 + .github/org_control/github_api.py | 287 +++++++++++++++++++++++ .github/org_control/requirements.txt | 1 + .github/org_control/requirements_dev.txt | 1 + 9 files changed, 613 insertions(+) create mode 100644 .github/org_control/__init__.py create mode 100644 .github/org_control/check_org.py create mode 100644 .github/org_control/check_pr.py create mode 100644 .github/org_control/config.json create mode 100644 .github/org_control/configs.py create mode 100644 .github/org_control/dev_emails-test.txt create mode 100644 .github/org_control/github_api.py create mode 100644 .github/org_control/requirements.txt create mode 100644 .github/org_control/requirements_dev.txt diff --git a/.github/org_control/__init__.py b/.github/org_control/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/.github/org_control/check_org.py b/.github/org_control/check_org.py new file mode 100644 index 00000000000..a67aba2cc11 --- /dev/null +++ b/.github/org_control/check_org.py @@ -0,0 +1,51 @@ +# Copyright (C) 2020 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +""" +Check GitHub organization and invite members +""" + +# pylint: disable=fixme,no-member + +from argparse import ArgumentParser + +import github_api +from configs import Config + + +def main(): + """The main entry point function""" + arg_parser = ArgumentParser() + arg_parser.add_argument("--cfg-file", metavar="PATH", default=Config.default_cfg_path, + help=f"Path to json configuration file, e.g. {Config.default_cfg_path}") + arg_parser.add_argument("--teams", action="store_true", help="Check GitHub teams") + args, unknown_args = arg_parser.parse_known_args() + + Config(args.cfg_file, unknown_args) + gh_api = github_api.GithubOrgApi() + + if args.teams: + gh_api.get_org_teams() + else: + dev_emails = github_api.get_dev_emails() + print(f'\nDeveloper emails {len(dev_emails)}:', '; '.join(dev_emails)) + + org_emails = gh_api.get_org_emails() + print(f'\nOrg emails {len(org_emails)}:', '; '.join(org_emails)) + + org_pendig_invitation_emails = gh_api.get_org_invitation_emails() + + invite_emails = dev_emails.difference(org_emails).difference(org_pendig_invitation_emails) + print(f'\nInvite emails {len(invite_emails)}:', '; '.join(invite_emails)) + + no_in_dev_emails = org_emails.difference(dev_emails) + print(f'\nOrg members - no in developers list {len(no_in_dev_emails)}:', + '; '.join(no_in_dev_emails)) + + valid_github_users = gh_api.get_valid_github_users(invite_emails) + + gh_api.invite_users(valid_github_users) + + +if __name__ == '__main__': + main() diff --git a/.github/org_control/check_pr.py b/.github/org_control/check_pr.py new file mode 100644 index 00000000000..93fe3249df1 --- /dev/null +++ b/.github/org_control/check_pr.py @@ -0,0 +1,133 @@ +# Copyright (C) 2020 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +""" +Check GitHub PRs and set labels by type and categories, e.g. 'ExternalPR', 'category: ci' +""" + +# pylint: disable=fixme,no-member + +import re +from argparse import ArgumentParser +from enum import Enum + +import github_api +from configs import Config + + +class PrType(Enum): + """Constants for type of GitHub pull request by author membership""" + EXTERNAL = 'ExternalPR' + INTEL = 'IntelDevPR' + ORG = 'OpenvinoDevPR' + BAD = 'BadPR' + + +def get_pr_labels(pull): + """Gets PR labels as set""" + pr_lables = set() + for label in pull.labels: + pr_lables.add(label.name) + return pr_lables + + +def set_pr_label(pull, labels): + """Sets PR labels""" + if not labels or Config().DRY_RUN: + return + print(f'Set PR labels:', labels) + # TODO: Review labels and enable. Check setting existing labels + #pull.set_labels(labels) + + +def get_pr_type(pull): + """Gets PR type using labels""" + pr_lables = get_pr_labels(pull) + pr_types = set(type.value for type in PrType) + pr_types_labels = pr_lables & pr_types + if not pr_types_labels: + return None + if len(pr_types_labels) > 1: + print(f'Duplicated labels: {pr_types_labels}') + return PrType.BAD + return PrType(PrType(pr_types_labels.pop())) + + +def get_label_by_team_name(team_name): + """Generates labels by PR reviwer teams""" + if 'admins' in team_name: + return 'category: ci' + cfg = Config() + label = team_name + re_compile_label = re.compile(rf'{cfg.GITHUB_REPO}-(.+)-maintainers') + re_label = re_compile_label.match(team_name) + if re_label: + label = re_label.group(1).strip() + return f'category: {label}' + + +def main(): + """The main entry point function""" + arg_parser = ArgumentParser() + arg_parser.add_argument("--cfg-file", metavar="PATH", default=Config.default_cfg_path, + help=f"Path to json configuration file, e.g. {Config.default_cfg_path}") + arg_parser.add_argument("--pr", metavar="NUMBER", + help="Get GitHub pull request with the number") + arg_parser.add_argument("--pr-state", default="open", choices=["open", "closed"], + help="Set GitHub pull request state") + args, unknown_args = arg_parser.parse_known_args() + + Config(args.cfg_file, unknown_args) + gh_api = github_api.GithubOrgApi() + + if args.pr: + pulls = [gh_api.repo.get_pull(int(args.pr))] + else: + pulls = gh_api.repo.get_pulls(state=args.pr_state) + print(f'PRs count ({args.pr_state}):', pulls.totalCount) + non_org_intel_pr_users = set() + non_org_pr_users = set() + set_labels = [] + for pull in pulls: + pr_lables = get_pr_labels(pull) + pr_type = get_pr_type(pull) + print('\n', pull, f'- Labels: {pr_lables} -', f'Type: {pr_type}', end='') + if gh_api.is_org_user(pull.user): + print(' - Org user') + if pr_type is not PrType.ORG: + print(f'NO "{PrType.ORG.value}" label - ', end='') + github_api.print_users(pull.user) + set_labels.append(PrType.ORG.value) + elif github_api.is_intel_email(pull.user.email) or \ + github_api.is_intel_company(pull.user.company): + print(' - Non org user with Intel email or company') + non_org_intel_pr_users.add(pull.user) + if pr_type is not PrType.INTEL: + print(f'NO "{PrType.INTEL.value}" label - ', end='') + github_api.print_users(pull.user) + set_labels.append(PrType.INTEL.value) + else: + print(f' - Non org user with NO Intel email or company') + non_org_pr_users.add(pull.user) + if pr_type is not PrType.EXTERNAL: + print(f'NO "{PrType.EXTERNAL.value}" label - ', end='') + github_api.print_users(pull.user) + set_labels.append(PrType.EXTERNAL.value) + print('Add category labels: ', end='') + for reviewer_team in pull.get_review_requests()[1]: + reviewer_label = get_label_by_team_name(reviewer_team.name) + if reviewer_label and reviewer_label not in pr_lables: + print(get_label_by_team_name(reviewer_team.name), '| ', end='') + set_labels.append(reviewer_label) + print() + + set_pr_label(pull, set_labels) + + print(f'\nNon org user with Intel email or company:') + github_api.print_users(non_org_intel_pr_users) + print(f'\nNon org user with NO Intel email or company:') + github_api.print_users(non_org_pr_users) + + +if __name__ == '__main__': + main() diff --git a/.github/org_control/config.json b/.github/org_control/config.json new file mode 100644 index 00000000000..2b5b85b5e20 --- /dev/null +++ b/.github/org_control/config.json @@ -0,0 +1,18 @@ +{ + "GITHUB_TOKEN": "", + "GITHUB_ORGANIZATION": "openvinotoolkit", + "GITHUB_REPO": "openvino", + "IGNORE_LOGINS": [ + "openvino-ci", + "openvino-pushbot", + "lab-nerval", + "lab-nerval-onnx-ci" + ], + "EMAILS_FILE_PATH": "dev_emails-test.txt", + "PROXIES": { + "HTTP_PROXY": null, + "HTTPS_PROXY": null, + "NO_PROXY": "localhost,127.0.0.1,.intel.com" + }, + "DRY_RUN": false +} diff --git a/.github/org_control/configs.py b/.github/org_control/configs.py new file mode 100644 index 00000000000..9dd3b5fbf30 --- /dev/null +++ b/.github/org_control/configs.py @@ -0,0 +1,113 @@ +# Copyright (C) 2020 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +""" +Configurations management +""" + +# pylint: disable=fixme,broad-except + +import os +import sys +import ast +import json +from pathlib import Path + + +if sys.hexversion < 0x3060000: + raise Exception('Python version must be >= 3.6') + + +class ConfigException(Exception): + """Base configuration exception""" + + +class Config: + """Configuration wrapper""" + _instance = None + properties = None + default_cfg_path = Path(__file__).resolve().parent / 'config.json' + + def __new__(cls, *_args, **_kwargs): + if not Config._instance: + Config._instance = super(Config, cls).__new__(cls) + return Config._instance + + def __init__(self, file_path=None, cli_args=None): + """ + :param file_path: Path to json configuration file + :type file_path: String + + :param args: List of argparse arguments with patterns: 'name=value' or 'name' + :type args: list + """ + if Config.properties: + return + + self._file_path = file_path or Config.default_cfg_path + self._cli_args = cli_args or [] + + self._json_cfg = {} + self._args = {} + + self._load_cfg() + self._parse_cli_args() + + Config.properties = {} + for name, value in self._json_cfg.items(): + if hasattr(self, name): + raise ConfigException(f'Duplicating prosperity: {name}') + prosperity_value = self._args.get(name) or os.getenv(name) + if prosperity_value: + # Try to set prosperity_value as Python literal structures, e.g. DRY_RUN=False + try: + prosperity_value = ast.literal_eval(prosperity_value) + except Exception: + pass + if not isinstance(prosperity_value, type(value)): + raise ConfigException(f'Python type of {name} parameter must be {type(value)}') + else: + prosperity_value = value + setattr(self, name, prosperity_value) + Config.properties[name] = prosperity_value + + self.set_proxy() + + def _load_cfg(self): + """Load the json configuration file""" + try: + with open(self._file_path) as conf: + self._json_cfg = json.load(conf) + except: + print('Failed to load configuration from:', self._file_path) + raise + + def _parse_cli_args(self): + """Parse argparse arguments with patterns: 'name=value' or 'name'""" + for cli_arg in self._cli_args: + arg = cli_arg.split('=') + if arg[0] not in self._json_cfg: + raise ConfigException(f'Unsupported argument: {arg}') + self._args[arg[0]] = True if len(arg) == 1 else '='.join(arg[1:]) + + def get_properties(self): + """Get all properties as Dict""" + return self.properties + + def set_proxy(self): + """Set proxies""" + for proxy_name, url in self.properties['PROXIES'].items(): + if url is not None: + print(f'Set proxy: {proxy_name}={url}') + os.environ[proxy_name] = url + + +def _test(): + """Test and debug""" + print('Config.default_cfg_path:', Config.default_cfg_path) + cfg = Config(cli_args=['DRY_RUN=True']) + print('Config.properties:', cfg.get_properties()) + + +if __name__ == '__main__': + _test() diff --git a/.github/org_control/dev_emails-test.txt b/.github/org_control/dev_emails-test.txt new file mode 100644 index 00000000000..4711a52b2f9 --- /dev/null +++ b/.github/org_control/dev_emails-test.txt @@ -0,0 +1,9 @@ +# good comment +Last_name, First_name +first_name.last_name@intel.com +openvino_pushbot@intel.com + +# Wrong emails +foo@foo.com + foo1 foo2 +foo1 foo2@intel.com diff --git a/.github/org_control/github_api.py b/.github/org_control/github_api.py new file mode 100644 index 00000000000..3dae60fa75b --- /dev/null +++ b/.github/org_control/github_api.py @@ -0,0 +1,287 @@ +# Copyright (C) 2020 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +""" +GitHub API for controlling organization +""" + +# pylint: disable=fixme,no-member + +import re +import time + +from github import Github, GithubException, RateLimitExceededException, IncompletableObject +from github import UnknownObjectException +from github.PaginatedList import PaginatedList + +from configs import Config + + +def is_valid_user(user): + """Checks that user is valid github.Github object""" + try: + return user and user.login + except IncompletableObject: + return False + + +def is_user_ignored(user): + """Checks that user should be ignored""" + cfg = Config() + if is_valid_user(user) and user.login.lower() not in cfg.properties['IGNORE_LOGINS']: + return False + return True + + +def is_valid_name(name): + """Checks that GitHub user's name is valid""" + return name and len(name) >= 3 and ' ' in name + + +def is_intel_email(email): + """Checks that email is valid Intel email""" + return email and len(email) > 10 and ' ' not in email and email.lower().endswith('@intel.com') + + +def is_intel_company(company): + """Checks that company contains intel""" + return company and 'intel' in company.lower() + + +def is_valid_intel_user(user): + """Checks that user is valid GitHub and Intel user""" + return is_valid_user(user) and (is_valid_name(user.name) and is_intel_email(user.email) or + is_user_ignored(user)) + + +def print_users(users): + """Print list of users in different formats: list, set, PaginatedList""" + if isinstance(users, (list, set, PaginatedList)): + users_count = users.totalCount if isinstance(users, PaginatedList) else len(users) + print(f'\nGitHub users {users_count} (login - name - company - email - valid):') + else: + users = [users] + for user in users: + if not is_valid_user(user): + print('WRONG GitHub user: ???') + continue + valid_check = 'OK' if is_valid_intel_user(user) else 'FIX' + if not is_intel_email(user.email): + valid_check += ' email' + if not is_valid_name(user.name): + valid_check += ' name' + print(f'{user.login} - "{user.name}" - "{user.company}" - {user.email} - {valid_check}') + + +def get_dev_emails(): + """ + Read a file with developer emails. Supported email formats + first_name.last_name@intel.com + Import from Outlook: Last_name, First_name + """ + re_email = re.compile(r'.+<(.+)>') + emails = set() + cfg = Config() + with open(cfg.properties['EMAILS_FILE_PATH']) as file_obj: + for line in file_obj: + line = line.strip().lower() + if not line or line.startswith('#'): + continue + re_outlook_email = re_email.match(line) + if re_outlook_email: + line = re_outlook_email.group(1).strip() + if not is_intel_email(line): + print(f'Wrong email in {cfg.properties["EMAILS_FILE_PATH"]}: {line}') + continue + emails.add(line) + return emails + + +class GithubOrgApi: + """Common API for GitHub organization""" + + def __init__(self): + self._cfg = Config() + self.github = Github(self._cfg.GITHUB_TOKEN) + self.github_org = self.github.get_organization(self._cfg.GITHUB_ORGANIZATION) + self.repo = self.github.get_repo(f'{self._cfg.GITHUB_ORGANIZATION}/' + f'{self._cfg.GITHUB_REPO}') + + def is_org_user(self, user): + """Checks that user is a member of GitHub organization""" + if is_valid_user(user): + try: + membership = user.get_organization_membership(self.github_org) + # membership.role can be 'member' or 'admin' + if membership.state == 'active' and membership.role: + return True + except UnknownObjectException: + pass + return False + + def get_org_emails(self): + """Gets and prints all emails of GitHub organization members""" + org_members = self.github_org.get_members() + org_emails = set() + org_members_fix = set() + org_emails_fix_name = set() + org_logins_fix_intel_email = set() + + print(f'\nOrg members {org_members.totalCount} (login - name - company - email - valid):') + for org_member in org_members: + print_users(org_member) + if is_user_ignored(org_member): + continue + if is_intel_email(org_member.email): + org_emails.add(org_member.email.lower()) + if not is_valid_name(org_member.name): + org_members_fix.add(org_member) + org_emails_fix_name.add(org_member.email.lower()) + else: + org_members_fix.add(org_member) + org_logins_fix_intel_email.add(org_member.login.lower()) + + print_users(org_members_fix) + print(f'\nOrg members - no Intel emails {len(org_logins_fix_intel_email)}:', + '; '.join(org_logins_fix_intel_email)) + print(f'\nOrg members - no real name {len(org_emails_fix_name)}:', + '; '.join(org_emails_fix_name)) + return org_emails + + def get_org_invitation_emails(self): + """Gets GitHub organization teams prints info""" + org_invitations = self.github_org.invitations() + org_invitation_emails = set() + + print(f'\nOrg invitations {org_invitations.totalCount} (login - name - email - valid):') + for org_invitation in org_invitations: + # TODO: investigate GithubException while access to user name and enable print_users() + # github.GithubException.IncompletableObject: 400 "Returned object contains no URL" + #print_users(org_invitation) + print(f'{org_invitation.login} - ??? - {org_invitation.email} - ???') + if is_user_ignored(org_invitation): + continue + if is_intel_email(org_invitation.email): + org_invitation_emails.add(org_invitation.email.lower()) + else: + print('Strange org invitation:', org_invitation) + + print(f'\nOrg invitation emails {len(org_invitation_emails)}:', + '; '.join(org_invitation_emails)) + return org_invitation_emails + + def get_org_teams(self): + """Gets GitHub organization teams prints info""" + teams = [] + org_teams = self.github_org.get_teams() + print('\nOrg teams count:', org_teams.totalCount) + for team in org_teams: + teams.append(team.name) + print(f'\nTeam: {team.name} - parent: {team.parent}') + + repos = team.get_repos() + print('Repos:') + for repo in repos: + print(f' {repo.name} -', team.get_repo_permission(repo)) + + team_maintainers = team.get_members(role='maintainer') + team_maintainer_logins = set() + for maintainer in team_maintainers: + team_maintainer_logins.add(maintainer.login) + team_members = team.get_members(role='member') + team_member_logins = set() + for member in team_members: + team_member_logins.add(member.login) + members = team.get_members(role='all') + member_emails = [] + print('Members (role - login - name - company - email - valid):') + for user in members: + if user.login in team_maintainer_logins: + print(' Maintainer - ', end='') + elif user.login in team_member_logins: + print(' Member - ', end='') + else: + # It is not possible to check child teams members + print(' ??? - ', end='') + print_users(user) + if is_intel_email(user.email) and not is_user_ignored(user): + member_emails.append(user.email.lower()) + print(f'Intel emails {len(member_emails)}:', '; '.join(member_emails)) + return teams + + def get_valid_github_users(self, emails): + """Gets valid GitHub users by email and prints status""" + valid_users = set() + no_account_emails = set() + print(f'\nGitHub users from {len(emails)} invite emails (email - status):') + for email in emails: + if not is_intel_email(email): + print(f'{email} - Non Intel email') + continue + + # You can make up to 30 requests per minute; https://developer.github.com/v3/search/ + # Sleep 2.4 sec is about 25 requests per minute + time.sleep(2.4) + try: + users = self.github.search_users(f'{email} in:email') + except RateLimitExceededException: + time.sleep(5) + users = self.github.search_users(f'{email} in:email') + + if users.totalCount == 0: + print(f'{email} - No valid GitHub account') + no_account_emails.add(email) + continue + if users.totalCount > 1: + print(f'{email} - Found {users.totalCount} GitHub accounts') + for user in users: + if user.email and user.email.lower() == email: + print(f'{email} - OK') + valid_users.add(user) + else: + print(f'{email} - Non public or wrong email - login: {user.login} - ' + f'email: {user.email}') + print('Valid users count:', len(valid_users)) + print_users(valid_users) + print(f'\nIntel emails - No valid GitHub account {len(no_account_emails)}:', + '; '.join(no_account_emails)) + return valid_users + + def invite_users(self, users): + """Invites users and prints status""" + if isinstance(users, (list, set)): + print(f'\nInvite {len(users)} users:') + else: + users = [users] + + for user in users: + if isinstance(user, str): + print(f'Email: {user}') + self.github_org.invite_user(email=user) + else: + print(f'{user.login} - "{user.name}" - {user.email} - ', end='') + try: + if is_user_ignored(user): + print('Ignored') + continue + if not self._cfg.DRY_RUN: + self.github_org.invite_user(user=user) + print('OK') + else: + print('Dry run') + except GithubException as exc: + print(f'FAIL: {exc.data["errors"][0]["message"]}') + + +def _test(): + """Test and debug""" + Config(cli_args=['DRY_RUN=True']) + dev_emails = get_dev_emails() + print('dev_emails:', dev_emails) + + gh_api = GithubOrgApi() + gh_api.get_org_emails() + + +if __name__ == '__main__': + _test() diff --git a/.github/org_control/requirements.txt b/.github/org_control/requirements.txt new file mode 100644 index 00000000000..625c5c263e1 --- /dev/null +++ b/.github/org_control/requirements.txt @@ -0,0 +1 @@ +PyGithub==1.51 diff --git a/.github/org_control/requirements_dev.txt b/.github/org_control/requirements_dev.txt new file mode 100644 index 00000000000..cbc9f5a6d0c --- /dev/null +++ b/.github/org_control/requirements_dev.txt @@ -0,0 +1 @@ +pylint==2.3.0