GitHub CI: Add Python scripts for controlling organization (#1437)

This commit is contained in:
Alexander Zhogov 2020-07-23 17:29:18 +03:00 committed by GitHub
parent 8a6bd1dba3
commit c1dec4ad42
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 613 additions and 0 deletions

0
.github/org_control/__init__.py vendored Normal file
View File

51
.github/org_control/check_org.py vendored Normal file
View File

@ -0,0 +1,51 @@
# Copyright (C) 2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
"""
Check GitHub organization and invite members
"""
# pylint: disable=fixme,no-member
from argparse import ArgumentParser
import github_api
from configs import Config
def main():
"""The main entry point function"""
arg_parser = ArgumentParser()
arg_parser.add_argument("--cfg-file", metavar="PATH", default=Config.default_cfg_path,
help=f"Path to json configuration file, e.g. {Config.default_cfg_path}")
arg_parser.add_argument("--teams", action="store_true", help="Check GitHub teams")
args, unknown_args = arg_parser.parse_known_args()
Config(args.cfg_file, unknown_args)
gh_api = github_api.GithubOrgApi()
if args.teams:
gh_api.get_org_teams()
else:
dev_emails = github_api.get_dev_emails()
print(f'\nDeveloper emails {len(dev_emails)}:', '; '.join(dev_emails))
org_emails = gh_api.get_org_emails()
print(f'\nOrg emails {len(org_emails)}:', '; '.join(org_emails))
org_pendig_invitation_emails = gh_api.get_org_invitation_emails()
invite_emails = dev_emails.difference(org_emails).difference(org_pendig_invitation_emails)
print(f'\nInvite emails {len(invite_emails)}:', '; '.join(invite_emails))
no_in_dev_emails = org_emails.difference(dev_emails)
print(f'\nOrg members - no in developers list {len(no_in_dev_emails)}:',
'; '.join(no_in_dev_emails))
valid_github_users = gh_api.get_valid_github_users(invite_emails)
gh_api.invite_users(valid_github_users)
if __name__ == '__main__':
main()

133
.github/org_control/check_pr.py vendored Normal file
View File

@ -0,0 +1,133 @@
# Copyright (C) 2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
"""
Check GitHub PRs and set labels by type and categories, e.g. 'ExternalPR', 'category: ci'
"""
# pylint: disable=fixme,no-member
import re
from argparse import ArgumentParser
from enum import Enum
import github_api
from configs import Config
class PrType(Enum):
"""Constants for type of GitHub pull request by author membership"""
EXTERNAL = 'ExternalPR'
INTEL = 'IntelDevPR'
ORG = 'OpenvinoDevPR'
BAD = 'BadPR'
def get_pr_labels(pull):
"""Gets PR labels as set"""
pr_lables = set()
for label in pull.labels:
pr_lables.add(label.name)
return pr_lables
def set_pr_label(pull, labels):
"""Sets PR labels"""
if not labels or Config().DRY_RUN:
return
print(f'Set PR labels:', labels)
# TODO: Review labels and enable. Check setting existing labels
#pull.set_labels(labels)
def get_pr_type(pull):
"""Gets PR type using labels"""
pr_lables = get_pr_labels(pull)
pr_types = set(type.value for type in PrType)
pr_types_labels = pr_lables & pr_types
if not pr_types_labels:
return None
if len(pr_types_labels) > 1:
print(f'Duplicated labels: {pr_types_labels}')
return PrType.BAD
return PrType(PrType(pr_types_labels.pop()))
def get_label_by_team_name(team_name):
"""Generates labels by PR reviwer teams"""
if 'admins' in team_name:
return 'category: ci'
cfg = Config()
label = team_name
re_compile_label = re.compile(rf'{cfg.GITHUB_REPO}-(.+)-maintainers')
re_label = re_compile_label.match(team_name)
if re_label:
label = re_label.group(1).strip()
return f'category: {label}'
def main():
"""The main entry point function"""
arg_parser = ArgumentParser()
arg_parser.add_argument("--cfg-file", metavar="PATH", default=Config.default_cfg_path,
help=f"Path to json configuration file, e.g. {Config.default_cfg_path}")
arg_parser.add_argument("--pr", metavar="NUMBER",
help="Get GitHub pull request with the number")
arg_parser.add_argument("--pr-state", default="open", choices=["open", "closed"],
help="Set GitHub pull request state")
args, unknown_args = arg_parser.parse_known_args()
Config(args.cfg_file, unknown_args)
gh_api = github_api.GithubOrgApi()
if args.pr:
pulls = [gh_api.repo.get_pull(int(args.pr))]
else:
pulls = gh_api.repo.get_pulls(state=args.pr_state)
print(f'PRs count ({args.pr_state}):', pulls.totalCount)
non_org_intel_pr_users = set()
non_org_pr_users = set()
set_labels = []
for pull in pulls:
pr_lables = get_pr_labels(pull)
pr_type = get_pr_type(pull)
print('\n', pull, f'- Labels: {pr_lables} -', f'Type: {pr_type}', end='')
if gh_api.is_org_user(pull.user):
print(' - Org user')
if pr_type is not PrType.ORG:
print(f'NO "{PrType.ORG.value}" label - ', end='')
github_api.print_users(pull.user)
set_labels.append(PrType.ORG.value)
elif github_api.is_intel_email(pull.user.email) or \
github_api.is_intel_company(pull.user.company):
print(' - Non org user with Intel email or company')
non_org_intel_pr_users.add(pull.user)
if pr_type is not PrType.INTEL:
print(f'NO "{PrType.INTEL.value}" label - ', end='')
github_api.print_users(pull.user)
set_labels.append(PrType.INTEL.value)
else:
print(f' - Non org user with NO Intel email or company')
non_org_pr_users.add(pull.user)
if pr_type is not PrType.EXTERNAL:
print(f'NO "{PrType.EXTERNAL.value}" label - ', end='')
github_api.print_users(pull.user)
set_labels.append(PrType.EXTERNAL.value)
print('Add category labels: ', end='')
for reviewer_team in pull.get_review_requests()[1]:
reviewer_label = get_label_by_team_name(reviewer_team.name)
if reviewer_label and reviewer_label not in pr_lables:
print(get_label_by_team_name(reviewer_team.name), '| ', end='')
set_labels.append(reviewer_label)
print()
set_pr_label(pull, set_labels)
print(f'\nNon org user with Intel email or company:')
github_api.print_users(non_org_intel_pr_users)
print(f'\nNon org user with NO Intel email or company:')
github_api.print_users(non_org_pr_users)
if __name__ == '__main__':
main()

18
.github/org_control/config.json vendored Normal file
View File

@ -0,0 +1,18 @@
{
"GITHUB_TOKEN": "<Put token here or set as arg or as env variable>",
"GITHUB_ORGANIZATION": "openvinotoolkit",
"GITHUB_REPO": "openvino",
"IGNORE_LOGINS": [
"openvino-ci",
"openvino-pushbot",
"lab-nerval",
"lab-nerval-onnx-ci"
],
"EMAILS_FILE_PATH": "dev_emails-test.txt",
"PROXIES": {
"HTTP_PROXY": null,
"HTTPS_PROXY": null,
"NO_PROXY": "localhost,127.0.0.1,.intel.com"
},
"DRY_RUN": false
}

113
.github/org_control/configs.py vendored Normal file
View File

@ -0,0 +1,113 @@
# Copyright (C) 2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
"""
Configurations management
"""
# pylint: disable=fixme,broad-except
import os
import sys
import ast
import json
from pathlib import Path
if sys.hexversion < 0x3060000:
raise Exception('Python version must be >= 3.6')
class ConfigException(Exception):
"""Base configuration exception"""
class Config:
"""Configuration wrapper"""
_instance = None
properties = None
default_cfg_path = Path(__file__).resolve().parent / 'config.json'
def __new__(cls, *_args, **_kwargs):
if not Config._instance:
Config._instance = super(Config, cls).__new__(cls)
return Config._instance
def __init__(self, file_path=None, cli_args=None):
"""
:param file_path: Path to json configuration file
:type file_path: String
:param args: List of argparse arguments with patterns: 'name=value' or 'name'
:type args: list
"""
if Config.properties:
return
self._file_path = file_path or Config.default_cfg_path
self._cli_args = cli_args or []
self._json_cfg = {}
self._args = {}
self._load_cfg()
self._parse_cli_args()
Config.properties = {}
for name, value in self._json_cfg.items():
if hasattr(self, name):
raise ConfigException(f'Duplicating prosperity: {name}')
prosperity_value = self._args.get(name) or os.getenv(name)
if prosperity_value:
# Try to set prosperity_value as Python literal structures, e.g. DRY_RUN=False
try:
prosperity_value = ast.literal_eval(prosperity_value)
except Exception:
pass
if not isinstance(prosperity_value, type(value)):
raise ConfigException(f'Python type of {name} parameter must be {type(value)}')
else:
prosperity_value = value
setattr(self, name, prosperity_value)
Config.properties[name] = prosperity_value
self.set_proxy()
def _load_cfg(self):
"""Load the json configuration file"""
try:
with open(self._file_path) as conf:
self._json_cfg = json.load(conf)
except:
print('Failed to load configuration from:', self._file_path)
raise
def _parse_cli_args(self):
"""Parse argparse arguments with patterns: 'name=value' or 'name'"""
for cli_arg in self._cli_args:
arg = cli_arg.split('=')
if arg[0] not in self._json_cfg:
raise ConfigException(f'Unsupported argument: {arg}')
self._args[arg[0]] = True if len(arg) == 1 else '='.join(arg[1:])
def get_properties(self):
"""Get all properties as Dict"""
return self.properties
def set_proxy(self):
"""Set proxies"""
for proxy_name, url in self.properties['PROXIES'].items():
if url is not None:
print(f'Set proxy: {proxy_name}={url}')
os.environ[proxy_name] = url
def _test():
"""Test and debug"""
print('Config.default_cfg_path:', Config.default_cfg_path)
cfg = Config(cli_args=['DRY_RUN=True'])
print('Config.properties:', cfg.get_properties())
if __name__ == '__main__':
_test()

View File

@ -0,0 +1,9 @@
# good comment
Last_name, First_name <first_name.last_name@intel.com>
first_name.last_name@intel.com
openvino_pushbot@intel.com
# Wrong emails
foo@foo.com
foo1 foo2
foo1 foo2@intel.com

287
.github/org_control/github_api.py vendored Normal file
View File

@ -0,0 +1,287 @@
# Copyright (C) 2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
"""
GitHub API for controlling organization
"""
# pylint: disable=fixme,no-member
import re
import time
from github import Github, GithubException, RateLimitExceededException, IncompletableObject
from github import UnknownObjectException
from github.PaginatedList import PaginatedList
from configs import Config
def is_valid_user(user):
"""Checks that user is valid github.Github object"""
try:
return user and user.login
except IncompletableObject:
return False
def is_user_ignored(user):
"""Checks that user should be ignored"""
cfg = Config()
if is_valid_user(user) and user.login.lower() not in cfg.properties['IGNORE_LOGINS']:
return False
return True
def is_valid_name(name):
"""Checks that GitHub user's name is valid"""
return name and len(name) >= 3 and ' ' in name
def is_intel_email(email):
"""Checks that email is valid Intel email"""
return email and len(email) > 10 and ' ' not in email and email.lower().endswith('@intel.com')
def is_intel_company(company):
"""Checks that company contains intel"""
return company and 'intel' in company.lower()
def is_valid_intel_user(user):
"""Checks that user is valid GitHub and Intel user"""
return is_valid_user(user) and (is_valid_name(user.name) and is_intel_email(user.email) or
is_user_ignored(user))
def print_users(users):
"""Print list of users in different formats: list, set, PaginatedList"""
if isinstance(users, (list, set, PaginatedList)):
users_count = users.totalCount if isinstance(users, PaginatedList) else len(users)
print(f'\nGitHub users {users_count} (login - name - company - email - valid):')
else:
users = [users]
for user in users:
if not is_valid_user(user):
print('WRONG GitHub user: ???')
continue
valid_check = 'OK' if is_valid_intel_user(user) else 'FIX'
if not is_intel_email(user.email):
valid_check += ' email'
if not is_valid_name(user.name):
valid_check += ' name'
print(f'{user.login} - "{user.name}" - "{user.company}" - {user.email} - {valid_check}')
def get_dev_emails():
"""
Read a file with developer emails. Supported email formats
first_name.last_name@intel.com
Import from Outlook: Last_name, First_name <first_name.last_name@intel.com>
"""
re_email = re.compile(r'.+<(.+)>')
emails = set()
cfg = Config()
with open(cfg.properties['EMAILS_FILE_PATH']) as file_obj:
for line in file_obj:
line = line.strip().lower()
if not line or line.startswith('#'):
continue
re_outlook_email = re_email.match(line)
if re_outlook_email:
line = re_outlook_email.group(1).strip()
if not is_intel_email(line):
print(f'Wrong email in {cfg.properties["EMAILS_FILE_PATH"]}: {line}')
continue
emails.add(line)
return emails
class GithubOrgApi:
"""Common API for GitHub organization"""
def __init__(self):
self._cfg = Config()
self.github = Github(self._cfg.GITHUB_TOKEN)
self.github_org = self.github.get_organization(self._cfg.GITHUB_ORGANIZATION)
self.repo = self.github.get_repo(f'{self._cfg.GITHUB_ORGANIZATION}/'
f'{self._cfg.GITHUB_REPO}')
def is_org_user(self, user):
"""Checks that user is a member of GitHub organization"""
if is_valid_user(user):
try:
membership = user.get_organization_membership(self.github_org)
# membership.role can be 'member' or 'admin'
if membership.state == 'active' and membership.role:
return True
except UnknownObjectException:
pass
return False
def get_org_emails(self):
"""Gets and prints all emails of GitHub organization members"""
org_members = self.github_org.get_members()
org_emails = set()
org_members_fix = set()
org_emails_fix_name = set()
org_logins_fix_intel_email = set()
print(f'\nOrg members {org_members.totalCount} (login - name - company - email - valid):')
for org_member in org_members:
print_users(org_member)
if is_user_ignored(org_member):
continue
if is_intel_email(org_member.email):
org_emails.add(org_member.email.lower())
if not is_valid_name(org_member.name):
org_members_fix.add(org_member)
org_emails_fix_name.add(org_member.email.lower())
else:
org_members_fix.add(org_member)
org_logins_fix_intel_email.add(org_member.login.lower())
print_users(org_members_fix)
print(f'\nOrg members - no Intel emails {len(org_logins_fix_intel_email)}:',
'; '.join(org_logins_fix_intel_email))
print(f'\nOrg members - no real name {len(org_emails_fix_name)}:',
'; '.join(org_emails_fix_name))
return org_emails
def get_org_invitation_emails(self):
"""Gets GitHub organization teams prints info"""
org_invitations = self.github_org.invitations()
org_invitation_emails = set()
print(f'\nOrg invitations {org_invitations.totalCount} (login - name - email - valid):')
for org_invitation in org_invitations:
# TODO: investigate GithubException while access to user name and enable print_users()
# github.GithubException.IncompletableObject: 400 "Returned object contains no URL"
#print_users(org_invitation)
print(f'{org_invitation.login} - ??? - {org_invitation.email} - ???')
if is_user_ignored(org_invitation):
continue
if is_intel_email(org_invitation.email):
org_invitation_emails.add(org_invitation.email.lower())
else:
print('Strange org invitation:', org_invitation)
print(f'\nOrg invitation emails {len(org_invitation_emails)}:',
'; '.join(org_invitation_emails))
return org_invitation_emails
def get_org_teams(self):
"""Gets GitHub organization teams prints info"""
teams = []
org_teams = self.github_org.get_teams()
print('\nOrg teams count:', org_teams.totalCount)
for team in org_teams:
teams.append(team.name)
print(f'\nTeam: {team.name} - parent: {team.parent}')
repos = team.get_repos()
print('Repos:')
for repo in repos:
print(f' {repo.name} -', team.get_repo_permission(repo))
team_maintainers = team.get_members(role='maintainer')
team_maintainer_logins = set()
for maintainer in team_maintainers:
team_maintainer_logins.add(maintainer.login)
team_members = team.get_members(role='member')
team_member_logins = set()
for member in team_members:
team_member_logins.add(member.login)
members = team.get_members(role='all')
member_emails = []
print('Members (role - login - name - company - email - valid):')
for user in members:
if user.login in team_maintainer_logins:
print(' Maintainer - ', end='')
elif user.login in team_member_logins:
print(' Member - ', end='')
else:
# It is not possible to check child teams members
print(' ??? - ', end='')
print_users(user)
if is_intel_email(user.email) and not is_user_ignored(user):
member_emails.append(user.email.lower())
print(f'Intel emails {len(member_emails)}:', '; '.join(member_emails))
return teams
def get_valid_github_users(self, emails):
"""Gets valid GitHub users by email and prints status"""
valid_users = set()
no_account_emails = set()
print(f'\nGitHub users from {len(emails)} invite emails (email - status):')
for email in emails:
if not is_intel_email(email):
print(f'{email} - Non Intel email')
continue
# You can make up to 30 requests per minute; https://developer.github.com/v3/search/
# Sleep 2.4 sec is about 25 requests per minute
time.sleep(2.4)
try:
users = self.github.search_users(f'{email} in:email')
except RateLimitExceededException:
time.sleep(5)
users = self.github.search_users(f'{email} in:email')
if users.totalCount == 0:
print(f'{email} - No valid GitHub account')
no_account_emails.add(email)
continue
if users.totalCount > 1:
print(f'{email} - Found {users.totalCount} GitHub accounts')
for user in users:
if user.email and user.email.lower() == email:
print(f'{email} - OK')
valid_users.add(user)
else:
print(f'{email} - Non public or wrong email - login: {user.login} - '
f'email: {user.email}')
print('Valid users count:', len(valid_users))
print_users(valid_users)
print(f'\nIntel emails - No valid GitHub account {len(no_account_emails)}:',
'; '.join(no_account_emails))
return valid_users
def invite_users(self, users):
"""Invites users and prints status"""
if isinstance(users, (list, set)):
print(f'\nInvite {len(users)} users:')
else:
users = [users]
for user in users:
if isinstance(user, str):
print(f'Email: {user}')
self.github_org.invite_user(email=user)
else:
print(f'{user.login} - "{user.name}" - {user.email} - ', end='')
try:
if is_user_ignored(user):
print('Ignored')
continue
if not self._cfg.DRY_RUN:
self.github_org.invite_user(user=user)
print('OK')
else:
print('Dry run')
except GithubException as exc:
print(f'FAIL: {exc.data["errors"][0]["message"]}')
def _test():
"""Test and debug"""
Config(cli_args=['DRY_RUN=True'])
dev_emails = get_dev_emails()
print('dev_emails:', dev_emails)
gh_api = GithubOrgApi()
gh_api.get_org_emails()
if __name__ == '__main__':
_test()

1
.github/org_control/requirements.txt vendored Normal file
View File

@ -0,0 +1 @@
PyGithub==1.51

View File

@ -0,0 +1 @@
pylint==2.3.0