CI: Update org control scripts (#6035)

This commit is contained in:
Alexander Zhogov
2021-06-04 13:28:19 +03:00
committed by GitHub
parent 563a095bc1
commit 126d1a649c
10 changed files with 354 additions and 57 deletions

View File

@@ -1,3 +0,0 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

View File

@@ -5,12 +5,13 @@
Check GitHub organization and invite members
"""
# pylint: disable=fixme,no-member
# pylint: disable=fixme,no-member,too-many-locals
from argparse import ArgumentParser
import github_api
from configs import Config
from github_api import GithubOrgApi, get_dev_emails
from ldap_api import LdapApi, print_user_info, InfoLevel
def main():
@@ -19,32 +20,74 @@ def main():
arg_parser.add_argument("--cfg-file", metavar="PATH", default=Config.default_cfg_path,
help=f"Path to json configuration file, e.g. {Config.default_cfg_path}")
arg_parser.add_argument("--teams", action="store_true", help="Check GitHub teams")
arg_parser.add_argument("--no-ldap", action="store_true", help="Don't use LDAP info")
args, unknown_args = arg_parser.parse_known_args()
Config(args.cfg_file, unknown_args)
gh_api = github_api.GithubOrgApi()
gh_api = GithubOrgApi()
if args.teams:
gh_api.get_org_teams()
else:
dev_emails = github_api.get_dev_emails()
print(f'\nDeveloper emails {len(dev_emails)}:', '; '.join(dev_emails))
return
org_emails = gh_api.get_org_emails()
print(f'\nOrg emails {len(org_emails)}:', '; '.join(org_emails))
cfg_emails = get_dev_emails()
print(f'\nCfg developer emails {len(cfg_emails)}:', '; '.join(sorted(cfg_emails)))
org_pendig_invitation_emails = gh_api.get_org_invitation_emails()
dev_emails = set()
dev_emails.update(cfg_emails)
invite_emails = dev_emails.difference(org_emails).difference(org_pendig_invitation_emails)
print(f'\nInvite emails {len(invite_emails)}:', '; '.join(invite_emails))
if not args.no_ldap:
ldap_api = LdapApi()
ldap_emails = ldap_api.get_user_emails()
dev_emails.update(ldap_emails)
print(f'\nLDAP developer emails {len(ldap_emails)}:', '; '.join(sorted(ldap_emails)))
no_in_dev_emails = org_emails.difference(dev_emails)
print(f'\nOrg members - no in developers list {len(no_in_dev_emails)}:',
'; '.join(no_in_dev_emails))
cfg_emails_no_in_ldap = ldap_api.get_absent_emails(cfg_emails)
print(f'\nCfg developer emails - absent in LDAP at all {len(cfg_emails_no_in_ldap)}:',
'; '.join(sorted(cfg_emails_no_in_ldap)))
valid_github_users = gh_api.get_valid_github_users(invite_emails)
cfg_ldap_inters = cfg_emails.intersection(ldap_emails)
print(f'\nCfg developer emails - present in LDAP developers {len(cfg_ldap_inters)}:',
'; '.join(sorted(cfg_ldap_inters)))
gh_api.invite_users(valid_github_users)
org_emails, org_logins_no_intel_email = gh_api.get_org_emails()
print(f'\nOrg emails {len(org_emails)}:', '; '.join(sorted(org_emails)))
org_emails_no_in_ldap = set()
if not args.no_ldap:
org_ldap_diff = org_emails.difference(ldap_emails)
print(f'\nOrg member emails - absent in LDAP developers {len(org_ldap_diff)}:',
'; '.join(sorted(org_ldap_diff)))
for email in org_ldap_diff:
user_info = ldap_api.get_user_info_by_email(email)
if user_info:
print_user_info(user_info, InfoLevel.PDL)
else:
org_emails_no_in_ldap.add(email)
org_pendig_invitation_emails = gh_api.get_org_invitation_emails()
invite_emails = dev_emails.difference(org_emails).difference(org_pendig_invitation_emails)
print(f'\nInvite emails {len(invite_emails)}:', '; '.join(sorted(invite_emails)))
valid_github_users = gh_api.get_valid_github_users(invite_emails)
gh_api.invite_users(valid_github_users)
print('\nCheck accounts below and remove from the GitHub organization and cfg list')
cfg_emails_no_in_org = sorted(cfg_emails.difference(org_emails))
print(f'\nCfg developer emails - absent in GitHub organization {len(cfg_emails_no_in_org)}:',
'; '.join(cfg_emails_no_in_org))
org_emails_no_in_dev = sorted(org_emails.difference(dev_emails))
print(f'\nOrg member emails - absent in cfg and LDAP developers {len(org_emails_no_in_dev)}:',
'; '.join(org_emails_no_in_dev))
print(f'\nOrg member emails - absent in LDAP at all {len(org_emails_no_in_ldap)}:',
'; '.join(sorted(org_emails_no_in_ldap)))
print(f'\nOrg member logins - absent Intel email {len(org_logins_no_intel_email)}:',
'; '.join(sorted(org_logins_no_intel_email)))
if __name__ == '__main__':

View File

@@ -33,13 +33,23 @@ def get_pr_labels(pull):
def set_pr_labels(pull, labels):
"""Sets PR labels"""
"""Sets new PR labels (all previously set labels are removed)"""
if not labels or Config().DRY_RUN:
return
print(f'Set PR labels:', labels)
print('Set PR labels:', labels)
# set_labels() should accept list but fails with empty "AssertionError:"
pull.set_labels(labels)
def add_pr_labels(pull, labels):
"""Adds PR labels"""
if not labels or Config().DRY_RUN:
return
print('Add PR labels:', labels)
for label in labels:
pull.add_to_labels(label)
def get_pr_type_by_labels(pull):
"""Gets PR type using labels"""
pr_lables = get_pr_labels(pull)
@@ -80,6 +90,17 @@ def get_category_labels(pull):
return labels
def get_pr_info_str(pull):
"""Gets info about PR using a few workarounds"""
pr_title = pull.title.encode("ASCII", "ignore").decode()
# Workaround for PyGithub issue: https://github.com/PyGithub/PyGithub/issues/512
pr_created_at = pull.created_at.replace(tzinfo=datetime.timezone.utc).astimezone()
return f'PR: {pull.number} - {pr_title} - Created: {pr_created_at} - ' \
f'Labels: {get_pr_labels(pull)} - Type: {get_pr_type_by_labels(pull)}'
def main():
"""The main entry point function"""
arg_parser = ArgumentParser()
@@ -103,19 +124,19 @@ def main():
print(f'\nPRs count ({args.pr_state}):', pulls.totalCount)
if args.newer:
pr_created_after = datetime.datetime.now() - datetime.timedelta(minutes=int(args.newer))
print('PRs created after:', pr_created_after)
pr_created_after = (datetime.datetime.now() -
datetime.timedelta(minutes=int(args.newer))).astimezone()
print('Checking PRs created after:', pr_created_after)
non_org_intel_pr_users = set()
non_org_pr_users = set()
for pull in pulls:
if args.newer and pull.created_at <= pr_created_after:
print(f'\nIGNORE: {pull} - Created: {pull.created_at}')
pr_created_at = pull.created_at.replace(tzinfo=datetime.timezone.utc).astimezone()
if args.newer and pr_created_at <= pr_created_after:
print(f'\nIGNORE: {get_pr_info_str(pull)}')
continue
pr_lables = get_pr_labels(pull)
pr_type_by_labels = get_pr_type_by_labels(pull)
set_labels = []
print(f'\n{pull} - Created: {pull.created_at} - Labels: {pr_lables} -',
f'Type: {pr_type_by_labels}', end='')
add_labels = []
print(f'\n{get_pr_info_str(pull)}', end='')
# Checks PR source type
if gh_api.is_org_user(pull.user):
@@ -127,21 +148,23 @@ def main():
if pr_type_by_labels is not PrType.INTEL:
print(f'NO "{PrType.INTEL.value}" label: ', end='')
github_api.print_users(pull.user)
set_labels.append(PrType.INTEL.value)
add_labels.append(PrType.INTEL.value)
elif github_api.is_user_ignored(pull.user):
print(' - IGNORED non org user with NO Intel email or company')
else:
print(f' - Non org user with NO Intel email or company')
print(' - Non org user with NO Intel email or company')
non_org_pr_users.add(pull.user)
if pr_type_by_labels is not PrType.EXTERNAL:
print(f'NO "{PrType.EXTERNAL.value}" label: ', end='')
github_api.print_users(pull.user)
set_labels.append(PrType.EXTERNAL.value)
add_labels.append(PrType.EXTERNAL.value)
set_labels += get_category_labels(pull)
set_pr_labels(pull, set_labels)
add_labels += get_category_labels(pull)
add_pr_labels(pull, add_labels)
print(f'\nNon org user with Intel email or company:')
print('\nNon org user with Intel email or company:')
github_api.print_users(non_org_intel_pr_users)
print(f'\nNon org user with NO Intel email or company:')
print('\nNon org user with NO Intel email or company:')
github_api.print_users(non_org_pr_users)

View File

@@ -6,7 +6,9 @@
"openvino-ci",
"openvino-pushbot",
"lab-nerval",
"lab-nerval-onnx-ci"
"lab-nerval-onnx-ci",
"onnx-watchdog-agent",
"dependabot"
],
"EMAILS_FILE_PATH": "dev_emails-test.txt",
"PROXIES": {

View File

@@ -57,19 +57,19 @@ class Config:
for name, value in self._json_cfg.items():
if hasattr(self, name):
raise ConfigException(f'Duplicating prosperity: {name}')
prosperity_value = self._args.get(name) or os.getenv(name)
if prosperity_value:
property_value = self._args.get(name) or os.getenv(name)
if property_value:
# Try to set prosperity_value as Python literal structures, e.g. DRY_RUN=False
try:
prosperity_value = ast.literal_eval(prosperity_value)
property_value = ast.literal_eval(property_value)
except Exception:
pass
if not isinstance(prosperity_value, type(value)):
if not isinstance(property_value, type(value)):
raise ConfigException(f'Python type of {name} parameter must be {type(value)}')
else:
prosperity_value = value
setattr(self, name, prosperity_value)
Config.properties[name] = prosperity_value
property_value = value
setattr(self, name, property_value)
Config.properties[name] = property_value
self.set_proxy()
@@ -78,7 +78,7 @@ class Config:
try:
with open(self._file_path) as conf:
self._json_cfg = json.load(conf)
except:
except Exception:
print('Failed to load configuration from:', self._file_path)
raise
@@ -105,7 +105,7 @@ class Config:
def _test():
"""Test and debug"""
print('Config.default_cfg_path:', Config.default_cfg_path)
cfg = Config(cli_args=['DRY_RUN=True'])
cfg = Config(cli_args=['DRY_RUN', 'PROXIES={"NO_PROXY": "localhost"}'])
print('Config.properties:', cfg.get_properties())

View File

@@ -11,7 +11,6 @@ import re
import time
from github import Github, GithubException, RateLimitExceededException, IncompletableObject
from github import UnknownObjectException
from github.PaginatedList import PaginatedList
from configs import Config
@@ -110,17 +109,13 @@ class GithubOrgApi:
def is_org_user(self, user):
"""Checks that user is a member of GitHub organization"""
if is_valid_user(user):
try:
membership = user.get_organization_membership(self.github_org)
# membership.role can be 'member' or 'admin'
if membership.state == 'active' and membership.role:
return True
except UnknownObjectException:
pass
# user.get_organization_membership(self.github_org) doesn't work with org members
# permissions, GITHUB_TOKEN must be org owner now
return self.github_org.has_in_members(user)
return False
def get_org_emails(self):
"""Gets and prints all emails of GitHub organization members"""
"""Gets and prints emails of all GitHub organization members"""
org_members = self.github_org.get_members()
org_emails = set()
org_members_fix = set()
@@ -146,7 +141,7 @@ class GithubOrgApi:
'; '.join(org_logins_fix_intel_email))
print(f'\nOrg members - no real name {len(org_emails_fix_name)}:',
'; '.join(org_emails_fix_name))
return org_emails
return (org_emails, org_logins_fix_intel_email)
def get_org_invitation_emails(self):
"""Gets GitHub organization teams prints info"""

236
.github/org_control/ldap_api.py vendored Normal file
View File

@@ -0,0 +1,236 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
"""
Gets info about users and groups via LDAP
"""
# pylint: disable=fixme,no-member
from enum import Enum
from ldap3 import Server, Connection, ALL, SUBTREE
from configs import Config
class LdapApiException(Exception):
"""Base LDAP API exception"""
class InfoLevel(Enum):
"""Constants for printing user info from LDAP"""
PDL = 'PDL' # Public Distribution List (group of e-mail addresses)
FULL = 'Full'
def print_user_info(info, info_level=None):
"""Pretty-print of a user info data structure (dict). info_level is the InfoLevel Enum"""
if not info or not info.get('mail'):
raise LdapApiException('ERROR: No info or absent mail')
def get_membership():
if info_level == InfoLevel.PDL:
membership_info = ' PDLs:'
elif info_level == InfoLevel.FULL:
membership_info = ' memberOf :'
else:
return ''
# Grouping groups by purpose
if info_level == InfoLevel.PDL:
sort_key = lambda i: i.split(',', 1)[0].lower()
else:
sort_key = lambda i: i.split(',', 1)[1] + i.split(',', 1)[0].lower()
for item in sorted(info['memberOf'], key=sort_key):
if info_level == InfoLevel.PDL and 'OU=Delegated' not in item:
continue
membership_info += f'\n {item}'
return membership_info
try:
text_info = \
f'\n{info["cn"]} <{info["mail"]}>; {info["sAMAccountName"]}; {info["employeeID"]}' \
f'\n Org group: {info["intelSuperGroupDescr"]} ({info["intelSuperGroupShortName"]}) /'\
f' {info["intelGroupDescr"]} ({info["intelGroupShortName"]}) /' \
f' {info["intelDivisionDescr"]} ({info["intelDivisionShortName"]}) /' \
f' {info["intelOrgUnitDescr"]}' \
f'\n Manager: {info["manager"]}' \
f'\n Location: {info["intelRegionCode"]} / {info["co"]} / {info["intelSiteCode"]} /' \
f' {info["intelBldgCode"]} ({info["intelSiteName"]}) /' \
f' {info["physicalDeliveryOfficeName"]}' \
f'\n Other: {info["employeeType"]} | {info["intelExportCountryGroup"]} |' \
f' {info["whenCreated"]} | {info["intelCostCenterDescr"]} | {info["jobDescription"]}'
except Exception as exc:
raise LdapApiException(f'ERROR: Failed to get info about "{info["mail"]}". ' \
f'Exception occurred:\n{repr(exc)}') from exc
print(text_info)
membership = get_membership()
if info_level == InfoLevel.PDL and membership:
print(membership)
elif info_level == InfoLevel.FULL:
for key in sorted(info):
if isinstance(info[key], list):
if key == 'memberOf':
print(membership)
else:
print(f' {key} :')
for item in info[key]:
print(' ', item)
else:
print(f' {key} : {info[key]}')
class LdapApi:
"""LDAP API for getting user info and emails"""
_binary_blobs = ['thumbnailPhoto', 'msExchUMSpokenName', 'msExchBlockedSendersHash']
_check_existing = [
'intelExportCountryGroup',
'physicalDeliveryOfficeName',
'intelSuperGroupShortName',
'intelGroupShortName',
'intelDivisionShortName',
]
null = '<null>'
def __init__(self):
self._cfg = Config()
self.server = Server(self._cfg.LDAP_SERVER, get_info=ALL)
self.connection = Connection(self.server,
user=self._cfg.LDAP_USER,
password=self._cfg.LDAP_PASSWORD,
auto_bind=True)
self.connection.bind()
def get_user_emails(self, groups=None):
"""Gets emails of LDAP groups and sub-groups"""
print('\nGet emails from LDAP groups:')
processed_ldap_members = {}
def process_group_members(member, parent_group):
if member in processed_ldap_members:
processed_ldap_members[member]['parent_groups'].append(parent_group)
print('\nWARNING: Ignore LDAP member to avoid duplication and recursive cycling '
f'of PDLs: {member}\n '
f'email: {processed_ldap_members[member].get("email")}\n parent_groups:')
for group in processed_ldap_members[member].get('parent_groups', []):
print(7 * ' ', group)
return
processed_ldap_members[member] = {'email': None, 'parent_groups': [parent_group]}
# AD moves terminated users to the boneyard OU in case the user returns,
# so it can be reactivated with little effort.
# After 30 days it is removed and the unix personality becomes unlinked.
if 'OU=Boneyard' in member:
return
self.connection.search(member, r'(objectClass=*)', SUBTREE,
attributes=['cn', 'member', 'mail'])
#print(self.connection.entries)
if not self.connection.response:
raise LdapApiException(f'ERROR: empty response. LDAP member: {member}')
# Check that the member is worker.
# The response can contain several items, but the first item is valid only
if 'OU=Workers' in member:
if self.connection.response[0]['attributes']['mail']:
processed_ldap_members[member]['email'] = \
self.connection.response[0]['attributes']['mail'].lower()
return
raise LdapApiException(f'ERROR: no mail. LDAP worker: {member}\n'
f'{self.connection.entries}')
if len(self.connection.response) > 1:
raise LdapApiException(f'ERROR: multiple responses for {member}: '
f'{len(self.connection.response)}\n'
f'{self.connection.entries}')
if self.connection.response[0]['attributes']['member']:
for group_member in self.connection.response[0]['attributes']['member']:
process_group_members(group_member, member)
else:
print(f'\nERROR: no members in LDAP group: {member}\n{self.connection.entries}')
for group in groups or self._cfg.LDAP_PDLs:
print('\nProcess ROOT LDAP group:', group)
process_group_members(group, 'ROOT')
return {
member.get('email') for member in processed_ldap_members.values() if member.get('email')
}
def _get_user_info(self, query):
"""Gets user info from LDAP as dict matching key and values pairs from query"""
query_filter = ''.join(f'({key}={value})' for key, value in query.items())
for domain in self._cfg.LDAP_DOMAINS:
search_base = f'OU=Workers,DC={domain},DC=corp,DC=intel,DC=com'
self.connection.search(
search_base,
f'(&(objectcategory=person)(objectclass=user)(intelflags=1){query_filter})',
SUBTREE,
attributes=['*'])
if self.connection.response:
if len(self.connection.response) > 1:
raise LdapApiException(f'ERROR: multiple responses for {query_filter}: '
f'{len(self.connection.response)}\n'
f'{self.connection.entries}')
info = self.connection.response[0]['attributes']
# remove long binary blobs
for blob in LdapApi._binary_blobs:
info[blob] = b''
for key in LdapApi._check_existing:
if not info.get(key):
info[key] = LdapApi.null
return info
return {}
def get_user_info_by_idsid(self, idsid):
"""Gets user info from LDAP as dict using account name for searching"""
return self._get_user_info({'sAMAccountName': idsid})
def get_user_info_by_name(self, name):
"""Gets user info from LDAP as dict using common name for searching"""
return self._get_user_info({'cn': name})
def get_user_info_by_email(self, email):
"""Gets user info from LDAP as dict using emails for searching"""
return self._get_user_info({'mail': email})
def get_absent_emails(self, emails):
"""Checks users by email in LDAP and returns absent emails"""
absent_emails = set()
for email in emails:
if not self.get_user_info_by_email(email):
absent_emails.add(email)
return absent_emails
def _test():
"""Test and debug"""
ldap = LdapApi()
emails = ldap.get_user_emails()
print(f'\nLDAP emails count: {len(emails)}\n{"; ".join(emails)}')
emails = ['foo@intel.com']
for email in emails:
info = ldap.get_user_info_by_email(email)
if info:
print_user_info(info, InfoLevel.PDL)
else:
print(f'\n{email} - not found')
if __name__ == '__main__':
_test()

View File

@@ -0,0 +1 @@
pylint==2.5.3

View File

@@ -1 +1,2 @@
PyGithub==1.51
ldap3==2.7

View File

@@ -1 +0,0 @@
pylint==2.3.0