4747 "TOOL_IDENTIFIER" ,
4848 "MEDIA_COUNT" ,
4949]
50+ OPENVERSE_LEGAL_TOOLS = [
51+ "by" ,
52+ "by-nc" ,
53+ "by-nc-nd" ,
54+ "by-nc-sa" ,
55+ "by-nd" ,
56+ "by-sa" ,
57+ "cc0" ,
58+ "nc-sampling+" ,
59+ "pdm" ,
60+ "sampling+" ,
61+ ]
5062
5163
5264def parse_arguments ():
@@ -89,21 +101,8 @@ def get_all_sources_and_licenses(session, media_type):
89101 """
90102 Fetch all available sources for a given media_type.
91103 """
92- LOGGER .info (f"Fetching all sources for { media_type } " )
104+ LOGGER .info (f"Fetching all sources for the / { media_type } / endpoint " )
93105 url = f"{ OPENVERSE_BASE_URL } /{ media_type } /stats/?format=json"
94- # Standard /stats/ license
95- OPENVERSE_LEGAL_TOOLS = [
96- "by" ,
97- "by-nc" ,
98- "by-nc-nd" ,
99- "by-nc-sa" ,
100- "by-nd" ,
101- "by-sa" ,
102- "cc0" ,
103- "nc-sampling+" ,
104- "pdm" ,
105- "sampling+" ,
106- ]
107106 try :
108107 response = session .get (url )
109108 response .raise_for_status ()
@@ -159,7 +158,12 @@ def query_openverse(session):
159158 f"license={ encoded_license } "
160159 "&format=json&page=1"
161160 )
162- LOGGER .info (f"Target URL: { url } " )
161+ LOGGER .info (
162+ "Fetching Openverse data: "
163+ f"media_type={ media_type } | "
164+ f"source={ source } | "
165+ f"license={ license } "
166+ )
163167 try :
164168 response = session .get (url )
165169 if response .status_code == 401 :
@@ -177,17 +181,17 @@ def query_openverse(session):
177181 tally [key ] = count
178182 else :
179183 LOGGER .warning (
180- f"Skipping { source } , { license } : count is 0"
184+ f"Skipping ( { source } , { license } ) : count is 0"
181185 )
182186 except (requests .HTTPError , requests .RequestException ) as e :
183187 raise shared .QuantifyingException (
184188 f"Openverse fetch failed: { e } " , exit_code = 1
185189 )
186190 LOGGER .info ("Aggregating the data" )
187191 aggregate = []
188- for field , count in tally .items ():
189- source_name = field [0 ]
190- media_type_name = field [1 ]
192+ for field , media_count in tally .items ():
193+ source = field [0 ]
194+ media_type = field [1 ]
191195 license_code = field [2 ]
192196 # Append prefix "cc" except for 'pdm' and 'cc0'
193197 if license_code not in ["pdm" , "cc0" ]:
@@ -196,12 +200,10 @@ def query_openverse(session):
196200 tool_identifier = license_code
197201 aggregate .append (
198202 {
199- OPENVERSE_FIELDS [0 ].lower (): source_name , # SOURCE
200- OPENVERSE_FIELDS [1 ].lower (): media_type_name , # MEDIA_TYPE
201- OPENVERSE_FIELDS [
202- 2
203- ].lower (): tool_identifier , # LEGAL_TOOL_IDENTIFIER
204- OPENVERSE_FIELDS [3 ].lower (): count , # MEDIA_COUNT
203+ OPENVERSE_FIELDS [0 ]: source ,
204+ OPENVERSE_FIELDS [1 ]: media_type ,
205+ OPENVERSE_FIELDS [2 ]: tool_identifier .upper (),
206+ OPENVERSE_FIELDS [3 ]: media_count ,
205207 }
206208 )
207209 return aggregate
@@ -219,7 +221,7 @@ def write_data(args, data):
219221 )
220222 writer .writeheader ()
221223 for row in data :
222- writer .writerow ({ key . upper (): value for key , value in row . items ()} )
224+ writer .writerow (row )
223225
224226
225227def main ():
0 commit comments