@@ -89,10 +89,9 @@ def get_all_sources_and_licenses(session, media_type):
8989 """
9090 Fetch all available sources for a given media_type.
9191 """
92- LOGGER .info (f"Fetching all sources and licenses for { media_type } " )
92+ LOGGER .info (f"Fetching all sources for { media_type } " )
9393 url = f"{ OPENVERSE_BASE_URL } /{ media_type } /stats/?format=json"
94- # encoded_nc_sampling = urllib.parse.quote("nc-sampling+", safe="")
95- # encoded_sampling = urllib.parse.quote("sampling+", safe="")
94+ # Standard /stats/ license
9695 licenses = [
9796 "by" ,
9897 "by-nc" ,
@@ -117,8 +116,8 @@ def get_all_sources_and_licenses(session, media_type):
117116 ]
118117 )
119118 """
120- To ensure the sources in /stats/ endpoints are indexed in
121- Openverse's catalog.
119+ To ensure the sources in /stats/ endpoints are truly
120+ indexed in Openverse's catalog.
122121 """
123122 valid_sources = set ()
124123 for source in raw_sources :
@@ -130,8 +129,8 @@ def get_all_sources_and_licenses(session, media_type):
130129 valid_sources .add (source )
131130 else :
132131 LOGGER .info (
133- f"Skipping source { source } :"
134- f" not available in /{ media_type } / endpoint"
132+ f"Skipping source { source } : "
133+ f"not available in /{ media_type } / endpoint"
135134 )
136135 LOGGER .info (f"Found { len (valid_sources )} sources for { media_type } " )
137136 return valid_sources , set (licenses )
@@ -144,9 +143,8 @@ def get_all_sources_and_licenses(session, media_type):
144143
145144def query_openverse (session ):
146145 """
147- Fetch available sources given the
148- media_type and use standard list
149- of Openverse's standard licenses.
146+ Fetch available sources given the media_type and use
147+ standard list of Openverse's standard licenses.
150148 """
151149 tally = {}
152150 for media_type in MEDIA_TYPES :
@@ -186,7 +184,9 @@ def query_openverse(session):
186184 {
187185 OPENVERSE_FIELDS [0 ].lower (): field [0 ], # SOURCE
188186 OPENVERSE_FIELDS [1 ].lower (): field [1 ], # MEDIA_TYPE
189- OPENVERSE_FIELDS [2 ].lower (): field [2 ], # LICENSE
187+ OPENVERSE_FIELDS [2 ].lower (): (
188+ f"{ 'cc ' + field [2 ] if field [2 ] not in ['pdm' , 'cc0' ] else field [2 ]} " # noqa: E501
189+ ), # LICENSE
190190 OPENVERSE_FIELDS [3 ].lower (): count , # MEDIA_COUNT
191191 }
192192 for field , count in tally .items ()
@@ -214,7 +214,6 @@ def main():
214214 session = get_requests_session ()
215215 LOGGER .info ("Starting Openverse Fetch Script..." )
216216 records = query_openverse (session )
217- LOGGER .info (f"CHECKING: { records [0 ]} " )
218217 write_data (args , records )
219218 LOGGER .info (f"Fetched { len (records )} unique Openverse records" )
220219
0 commit comments