@@ -331,27 +331,27 @@ def extract_license_from_xml(record_xml):
331331 """
332332 try :
333333 root = ET .fromstring (record_xml )
334-
334+
335335 # Find license element in arXiv namespace
336336 license_element = root .find (".//{http://arxiv.org/OAI/arXiv/}license" )
337-
337+
338338 if license_element is not None and license_element .text :
339339 license_url = license_element .text .strip ()
340-
340+
341341 # Check exact mapping first
342342 if license_url in LICENSE_MAPPING :
343343 return LICENSE_MAPPING [license_url ]
344-
344+
345345 # Validate CC URLs more strictly
346346 if "creativecommons.org/licenses/" in license_url .lower ():
347347 return f"CC (unmapped): { license_url } "
348348 elif "creativecommons.org" in license_url .lower ():
349349 return f"CC (ambiguous): { license_url } "
350-
350+
351351 return f"Non-CC: { license_url } "
352-
352+
353353 return "No license field"
354-
354+
355355 except ET .ParseError as e :
356356 LOGGER .error (f"XML parsing failed: { e } " )
357357 return "XML parse error"
@@ -440,8 +440,12 @@ def save_count_data(
440440 for license_name , count in license_counts .items ():
441441 data .append ({"TOOL_IDENTIFIER" : license_name , "COUNT" : count })
442442 data .sort (key = itemgetter ("TOOL_IDENTIFIER" ))
443- with open (FILE_ARXIV_COUNT , "w" , encoding = "utf-8" , newline = "\n " ) as file_handle :
444- writer = csv .DictWriter (file_handle , fieldnames = HEADER_COUNT , dialect = "unix" )
443+ with open (
444+ FILE_ARXIV_COUNT , "w" , encoding = "utf-8" , newline = "\n "
445+ ) as file_handle :
446+ writer = csv .DictWriter (
447+ file_handle , fieldnames = HEADER_COUNT , dialect = "unix"
448+ )
445449 writer .writeheader ()
446450 for row in data :
447451 writer .writerow (row )
@@ -474,10 +478,16 @@ def save_count_data(
474478 data = []
475479 for license_name , years in year_counts .items ():
476480 for year , count in years .items ():
477- data .append ({"TOOL_IDENTIFIER" : license_name , "YEAR" : year , "COUNT" : count })
481+ data .append (
482+ {"TOOL_IDENTIFIER" : license_name , "YEAR" : year , "COUNT" : count }
483+ )
478484 data .sort (key = itemgetter ("TOOL_IDENTIFIER" , "YEAR" ))
479- with open (FILE_ARXIV_YEAR , "w" , encoding = "utf-8" , newline = "\n " ) as file_handle :
480- writer = csv .DictWriter (file_handle , fieldnames = HEADER_YEAR , dialect = "unix" )
485+ with open (
486+ FILE_ARXIV_YEAR , "w" , encoding = "utf-8" , newline = "\n "
487+ ) as file_handle :
488+ writer = csv .DictWriter (
489+ file_handle , fieldnames = HEADER_YEAR , dialect = "unix"
490+ )
481491 writer .writeheader ()
482492 for row in data :
483493 writer .writerow (row )
@@ -492,7 +502,11 @@ def save_count_data(
492502 bucket_counts [bucket ] += count
493503 for bucket , count in bucket_counts .items ():
494504 data .append (
495- {"TOOL_IDENTIFIER" : license_name , "AUTHOR_BUCKET" : bucket , "COUNT" : count }
505+ {
506+ "TOOL_IDENTIFIER" : license_name ,
507+ "AUTHOR_BUCKET" : bucket ,
508+ "COUNT" : count ,
509+ }
496510 )
497511 data .sort (key = itemgetter ("TOOL_IDENTIFIER" , "AUTHOR_BUCKET" ))
498512 with open (
@@ -656,8 +670,15 @@ def query_arxiv(args):
656670
657671 # Write provenance YAML for auditing
658672 try :
659- with open (FILE_PROVENANCE , "w" , encoding = "utf-8" , newline = "\n " ) as file_handle :
660- yaml .dump (provenance_data , file_handle , default_flow_style = False , indent = 2 )
673+ with open (
674+ FILE_PROVENANCE , "w" , encoding = "utf-8" , newline = "\n "
675+ ) as file_handle :
676+ yaml .dump (
677+ provenance_data ,
678+ file_handle ,
679+ default_flow_style = False ,
680+ indent = 2 ,
681+ )
661682 except Exception as e :
662683 LOGGER .error (f"Failed to write provenance file: { e } " )
663684 raise shared .QuantifyingException (
0 commit comments