Skip to content

Commit 10ae845

Browse files
authored
Merge pull request #2908 from hoffie/tools-release-contributors-weblate
tools/get_release_contributors: Improve Weblate compatibility (lookup by email, merge commit support)
2 parents 61e3d12 + cb9dba0 commit 10ae845

1 file changed

Lines changed: 89 additions & 29 deletions

File tree

tools/get_release_contributors.py

Lines changed: 89 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ class UnexpectedGithubStatus(RuntimeError):
3434
pass
3535

3636

37-
# List of user names which should be ignored (such as bots):
38-
ignore_list = ['github-actions[bot]', 'imgbot[bot]', 'actions-bot', 'actions-user', 'ImgBotApp']
37+
# List of contributor names which should be ignored (such as bots):
38+
ignore_list = ['@github-actions[bot]', '@imgbot[bot]', '@actions-bot', '@actions-user', '@ImgBotApp', '@dependabot[bot]', '@weblate']
3939

4040
CHARSET = 'utf-8'
4141

@@ -62,7 +62,7 @@ def set_repo(self, repo):
6262
def set_github_token(self, token):
6363
self.token = token
6464

65-
def get_login(self, key, commit_hash):
65+
def _get_login(self, key, commit_hash):
6666
"""
6767
Returns the Github login associated with the given name+email key.
6868
A related commit hash is required in order to have a reference
@@ -71,24 +71,36 @@ def get_login(self, key, commit_hash):
7171
Once looked up, the results are cached in a local file.
7272
"""
7373
if key not in self.keys_to_user:
74-
if commit_hash:
75-
self.keys_to_user[key] = self.get_user_by_commit(commit_hash)
74+
user = self.get_user_by_email(key)
75+
if not user and commit_hash:
76+
user = self.get_user_by_commit(commit_hash)
77+
78+
if user:
79+
self.keys_to_user[key] = user
7680
self.save()
77-
else:
78-
return None
79-
return self.keys_to_user[key]
81+
82+
return self.keys_to_user.get(key, None)
83+
84+
def get_login_or_realname(self, key, commit_hash):
85+
"""
86+
Returns the Github login (@-prefixed) or, if not found, the real name from
87+
the given name+email key.
88+
"""
89+
login = self._get_login(key, commit_hash)
90+
if login:
91+
return f'@{login}'
92+
m = re.match('\A([^@<>]+) <.*>\Z', key)
93+
if m:
94+
return m.group(1)
95+
logger.warning(f'unable to extract github login or real name from {repr(key)}')
96+
return None
8097

8198
def get_user_by_commit(self, hash):
8299
"""
83100
Retrieves the associated Github user name for the given commit
84101
hash.
85102
"""
86-
headers = {
87-
'Accept': 'application/vnd.github.v3+json',
88-
}
89-
if self.token:
90-
headers['Authorization'] = 'token %s' % self.token
91-
r = requests.get('https://api.github.com/repos/jamulussoftware/%s/commits/%s' % (self.repo, hash), headers=headers)
103+
r = self._github_api_get(f'repos/jamulussoftware/{self.repo}/commits/{hash}')
92104
if 200 <= r.status_code < 300:
93105
try:
94106
return r.json()['author']['login']
@@ -100,6 +112,38 @@ def get_user_by_commit(self, hash):
100112
return ''
101113
raise UnexpectedGithubStatus('status was %d' % r.status_code)
102114

115+
def _github_api_get(self, path, *args, **kwargs):
116+
headers = {
117+
'Accept': 'application/vnd.github.v3+json',
118+
}
119+
if self.token:
120+
headers['Authorization'] = 'token %s' % self.token
121+
r = requests.get(f'https://api.github.com/{path}', *args, headers=headers, **kwargs)
122+
return r
123+
124+
def get_user_by_email(self, key):
125+
m = re.match(r'\A[^<]+<([^<> ]+@[^<> ]+)>\Z', key)
126+
if not m:
127+
return None
128+
email = m.group(1)
129+
# Handle Github-generated email addresses via static matching:
130+
m = re.match(r'\A(\d+\+)?([^+@]+)\@users\.noreply\.github\.com\Z', email)
131+
if m:
132+
return m.group(2)
133+
r = self._github_api_get('search/users', params={'q': f'{email} in:email'})
134+
if r.status_code < 200 or r.status_code >= 300:
135+
logger.warning(f'search/users for {email} failed with code {r.status_code}')
136+
return None
137+
items = r.json().get('items', [])
138+
for item in items:
139+
login = item['login']
140+
u = self._github_api_get(f'users/{login}').json()
141+
if u.get('email', '') == email:
142+
return login
143+
144+
logger.warning(f'unable to find a github profile with public email {email}')
145+
return None
146+
103147
def save(self):
104148
"""
105149
Saves the cache to disk.
@@ -149,8 +193,8 @@ def print_website_contributors(from_, to):
149193

150194

151195
def print_contributors(title, git_log_selector, from_, to):
152-
contributors = ['@%s' % u for u in find_contributors(git_log_selector, from_, to) if u and u not in ignore_list]
153-
contributors_str = ' '.join(contributors)
196+
contributors = [u for u in find_contributors(git_log_selector, from_, to) if u and u not in ignore_list]
197+
contributors_str = ', '.join(contributors)
154198
print('%s: %s' % (title, contributors_str))
155199

156200

@@ -164,37 +208,40 @@ def find_contributors(git_log_selector, from_, to):
164208
"""
165209
contributors = set()
166210
co_author_keys = set()
167-
commits = subprocess.check_output(['git', 'log', '-z', '--format=format:%H %an <%ae>%n%b', '%s..%s' % (from_, to), '--'] + git_log_selector)
211+
commits = subprocess.check_output(['git', 'log', '-z', '--show-pulls', '--format=format:%H %an <%ae>%n%b', '%s..%s' % (from_, to), '--'] + git_log_selector)
168212
commits = commits.decode(CHARSET)
169213
for commit in commits.split('\0'):
170214
if not commit:
171215
continue
172216
hash, author_key = commit.split('\n', 1)[0].split(' ', 1)
173-
login = authors.get_login(author_key, hash)
174-
contributors.add(login)
175-
co_authors = re.findall('Co-authored-by:\s*(\S.*(<[^ >]+>))\s*\n', commit, re.I)
217+
contributor = authors.get_login_or_realname(author_key, hash)
218+
contributors.add(contributor)
219+
co_authors = re.findall('Co-authored-by:\s*(\S.*(<[^ >]+>))\s*(?:$|\n)', commit, re.I)
176220
for co_author_full, co_author_email in co_authors:
177-
login = authors.get_login(co_author_full, None)
178-
if not login:
221+
logger.debug(f'checking co author {co_author_full}')
222+
contributor = authors.get_login_or_realname(co_author_full, None)
223+
if not contributor or not contributor.startswith('@'):
179224
# try to find a previous commit by this mail address
180-
# and pass this commit id to get_login() to retrieve the
225+
# and pass this commit id to get_login_or_realname() to retrieve the
181226
# associated handle from the github API.
182227
commit = subprocess.check_output(['git', 'log', '--format=%H', '--max-count=1', '--author=%s' % re.escape(co_author_email)]).strip().decode(CHARSET)
183228
if commit:
184-
login = authors.get_login(co_author_full, commit)
185-
if login:
186-
contributors.add(login)
229+
contributor = authors.get_login_or_realname(co_author_full, commit)
230+
if contributor:
231+
contributors.add(contributor)
187232

188233
# Resolve co-authors last because we have to rely on having seen the
189234
# email-to-login mapping via some other commit.
190235
for co_author in co_author_keys:
191-
login = authors.get_login(co_author, None)
192-
contributors.add(login)
236+
contributor = authors.get_login_or_realname(co_author, None)
237+
if contributor:
238+
contributors.add(contributor)
239+
else:
240+
contributors.add(realname)
193241
return sorted(contributors, key=str.casefold)
194242

195243

196244
if __name__ == '__main__':
197-
logging.basicConfig(format='%(levelname)s %(message)s')
198245
p = argparse.ArgumentParser(
199246
description='Generates a list of Github user names who contributed to a specific release.')
200247
p.add_argument('--from', dest='from_', required=True,
@@ -205,7 +252,20 @@ def find_contributors(git_log_selector, from_, to):
205252
help='the path to the git repository to be analyzed, e.g. ./jamuluswebsite')
206253
p.add_argument('--github-token',
207254
help='a Github Personal Access Token; optional, but might be needed if we exceed the anonymous API requests per hour limit')
255+
p.add_argument('--verbose', '-v', action='store_true',
256+
help='enable verbose output')
257+
p.add_argument('--quiet', '-q', action='store_true',
258+
help='only log errors')
208259
args = p.parse_args()
260+
if args.verbose and args.quiet:
261+
p.error('--verbose and --quiet are mutually exclusive')
262+
if args.verbose:
263+
level = logging.DEBUG
264+
elif args.quiet:
265+
level = logging.ERROR
266+
else:
267+
level = logging.WARNING
268+
logging.basicConfig(format='%(levelname)s %(message)s', level=level)
209269
os.chdir(args.repo)
210270
authors.set_github_token(args.github_token)
211271
main(args.from_, args.to)

0 commit comments

Comments
 (0)