@@ -34,8 +34,8 @@ class UnexpectedGithubStatus(RuntimeError):
3434 pass
3535
3636
37- # List of user names which should be ignored (such as bots):
38- ignore_list = ['github-actions[bot]' , 'imgbot[bot]' , 'actions-bot' , 'actions-user' , 'ImgBotApp' ]
37+ # List of contributor names which should be ignored (such as bots):
38+ ignore_list = ['@ github-actions[bot]' , '@ imgbot[bot]' , '@ actions-bot' , '@ actions-user' , '@ ImgBotApp' , '@dependabot[bot]' , '@weblate ' ]
3939
4040CHARSET = 'utf-8'
4141
@@ -62,7 +62,7 @@ def set_repo(self, repo):
6262 def set_github_token (self , token ):
6363 self .token = token
6464
65- def get_login (self , key , commit_hash ):
65+ def _get_login (self , key , commit_hash ):
6666 """
6767 Returns the Github login associated with the given name+email key.
6868 A related commit hash is required in order to have a reference
@@ -71,24 +71,36 @@ def get_login(self, key, commit_hash):
7171 Once looked up, the results are cached in a local file.
7272 """
7373 if key not in self .keys_to_user :
74- if commit_hash :
75- self .keys_to_user [key ] = self .get_user_by_commit (commit_hash )
74+ user = self .get_user_by_email (key )
75+ if not user and commit_hash :
76+ user = self .get_user_by_commit (commit_hash )
77+
78+ if user :
79+ self .keys_to_user [key ] = user
7680 self .save ()
77- else :
78- return None
79- return self .keys_to_user [key ]
81+
82+ return self .keys_to_user .get (key , None )
83+
84+ def get_login_or_realname (self , key , commit_hash ):
85+ """
86+ Returns the Github login (@-prefixed) or, if not found, the real name from
87+ the given name+email key.
88+ """
89+ login = self ._get_login (key , commit_hash )
90+ if login :
91+ return f'@{ login } '
92+ m = re .match ('\A([^@<>]+) <.*>\Z' , key )
93+ if m :
94+ return m .group (1 )
95+ logger .warning (f'unable to extract github login or real name from { repr (key )} ' )
96+ return None
8097
8198 def get_user_by_commit (self , hash ):
8299 """
83100 Retrieves the associated Github user name for the given commit
84101 hash.
85102 """
86- headers = {
87- 'Accept' : 'application/vnd.github.v3+json' ,
88- }
89- if self .token :
90- headers ['Authorization' ] = 'token %s' % self .token
91- r = requests .get ('https://api.github.com/repos/jamulussoftware/%s/commits/%s' % (self .repo , hash ), headers = headers )
103+ r = self ._github_api_get (f'repos/jamulussoftware/{ self .repo } /commits/{ hash } ' )
92104 if 200 <= r .status_code < 300 :
93105 try :
94106 return r .json ()['author' ]['login' ]
@@ -100,6 +112,38 @@ def get_user_by_commit(self, hash):
100112 return ''
101113 raise UnexpectedGithubStatus ('status was %d' % r .status_code )
102114
115+ def _github_api_get (self , path , * args , ** kwargs ):
116+ headers = {
117+ 'Accept' : 'application/vnd.github.v3+json' ,
118+ }
119+ if self .token :
120+ headers ['Authorization' ] = 'token %s' % self .token
121+ r = requests .get (f'https://api.github.com/{ path } ' , * args , headers = headers , ** kwargs )
122+ return r
123+
124+ def get_user_by_email (self , key ):
125+ m = re .match (r'\A[^<]+<([^<> ]+@[^<> ]+)>\Z' , key )
126+ if not m :
127+ return None
128+ email = m .group (1 )
129+ # Handle Github-generated email addresses via static matching:
130+ m = re .match (r'\A(\d+\+)?([^+@]+)\@users\.noreply\.github\.com\Z' , email )
131+ if m :
132+ return m .group (2 )
133+ r = self ._github_api_get ('search/users' , params = {'q' : f'{ email } in:email' })
134+ if r .status_code < 200 or r .status_code >= 300 :
135+ logger .warning (f'search/users for { email } failed with code { r .status_code } ' )
136+ return None
137+ items = r .json ().get ('items' , [])
138+ for item in items :
139+ login = item ['login' ]
140+ u = self ._github_api_get (f'users/{ login } ' ).json ()
141+ if u .get ('email' , '' ) == email :
142+ return login
143+
144+ logger .warning (f'unable to find a github profile with public email { email } ' )
145+ return None
146+
103147 def save (self ):
104148 """
105149 Saves the cache to disk.
@@ -149,8 +193,8 @@ def print_website_contributors(from_, to):
149193
150194
151195def print_contributors (title , git_log_selector , from_ , to ):
152- contributors = ['@%s' % u for u in find_contributors (git_log_selector , from_ , to ) if u and u not in ignore_list ]
153- contributors_str = ' ' .join (contributors )
196+ contributors = [u for u in find_contributors (git_log_selector , from_ , to ) if u and u not in ignore_list ]
197+ contributors_str = ', ' .join (contributors )
154198 print ('%s: %s' % (title , contributors_str ))
155199
156200
@@ -164,37 +208,40 @@ def find_contributors(git_log_selector, from_, to):
164208 """
165209 contributors = set ()
166210 co_author_keys = set ()
167- commits = subprocess .check_output (['git' , 'log' , '-z' , '--format=format:%H %an <%ae>%n%b' , '%s..%s' % (from_ , to ), '--' ] + git_log_selector )
211+ commits = subprocess .check_output (['git' , 'log' , '-z' , '--show-pulls' , '-- format=format:%H %an <%ae>%n%b' , '%s..%s' % (from_ , to ), '--' ] + git_log_selector )
168212 commits = commits .decode (CHARSET )
169213 for commit in commits .split ('\0 ' ):
170214 if not commit :
171215 continue
172216 hash , author_key = commit .split ('\n ' , 1 )[0 ].split (' ' , 1 )
173- login = authors .get_login (author_key , hash )
174- contributors .add (login )
175- co_authors = re .findall ('Co-authored-by:\s*(\S.*(<[^ >]+>))\s*\n ' , commit , re .I )
217+ contributor = authors .get_login_or_realname (author_key , hash )
218+ contributors .add (contributor )
219+ co_authors = re .findall ('Co-authored-by:\s*(\S.*(<[^ >]+>))\s*(?:$| \n ) ' , commit , re .I )
176220 for co_author_full , co_author_email in co_authors :
177- login = authors .get_login (co_author_full , None )
178- if not login :
221+ logger .debug (f'checking co author { co_author_full } ' )
222+ contributor = authors .get_login_or_realname (co_author_full , None )
223+ if not contributor or not contributor .startswith ('@' ):
179224 # try to find a previous commit by this mail address
180- # and pass this commit id to get_login () to retrieve the
225+ # and pass this commit id to get_login_or_realname () to retrieve the
181226 # associated handle from the github API.
182227 commit = subprocess .check_output (['git' , 'log' , '--format=%H' , '--max-count=1' , '--author=%s' % re .escape (co_author_email )]).strip ().decode (CHARSET )
183228 if commit :
184- login = authors .get_login (co_author_full , commit )
185- if login :
186- contributors .add (login )
229+ contributor = authors .get_login_or_realname (co_author_full , commit )
230+ if contributor :
231+ contributors .add (contributor )
187232
188233 # Resolve co-authors last because we have to rely on having seen the
189234 # email-to-login mapping via some other commit.
190235 for co_author in co_author_keys :
191- login = authors .get_login (co_author , None )
192- contributors .add (login )
236+ contributor = authors .get_login_or_realname (co_author , None )
237+ if contributor :
238+ contributors .add (contributor )
239+ else :
240+ contributors .add (realname )
193241 return sorted (contributors , key = str .casefold )
194242
195243
196244if __name__ == '__main__' :
197- logging .basicConfig (format = '%(levelname)s %(message)s' )
198245 p = argparse .ArgumentParser (
199246 description = 'Generates a list of Github user names who contributed to a specific release.' )
200247 p .add_argument ('--from' , dest = 'from_' , required = True ,
@@ -205,7 +252,20 @@ def find_contributors(git_log_selector, from_, to):
205252 help = 'the path to the git repository to be analyzed, e.g. ./jamuluswebsite' )
206253 p .add_argument ('--github-token' ,
207254 help = 'a Github Personal Access Token; optional, but might be needed if we exceed the anonymous API requests per hour limit' )
255+ p .add_argument ('--verbose' , '-v' , action = 'store_true' ,
256+ help = 'enable verbose output' )
257+ p .add_argument ('--quiet' , '-q' , action = 'store_true' ,
258+ help = 'only log errors' )
208259 args = p .parse_args ()
260+ if args .verbose and args .quiet :
261+ p .error ('--verbose and --quiet are mutually exclusive' )
262+ if args .verbose :
263+ level = logging .DEBUG
264+ elif args .quiet :
265+ level = logging .ERROR
266+ else :
267+ level = logging .WARNING
268+ logging .basicConfig (format = '%(levelname)s %(message)s' , level = level )
209269 os .chdir (args .repo )
210270 authors .set_github_token (args .github_token )
211271 main (args .from_ , args .to )
0 commit comments