Skip to content

Commit ea1eb95

Browse files
committed
refactor code
1 parent b06b7af commit ea1eb95

1 file changed

Lines changed: 28 additions & 26 deletions

File tree

scripts/1-fetch/openverse_fetch.py

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,18 @@
4747
"TOOL_IDENTIFIER",
4848
"MEDIA_COUNT",
4949
]
50+
OPENVERSE_LEGAL_TOOLS = [
51+
"by",
52+
"by-nc",
53+
"by-nc-nd",
54+
"by-nc-sa",
55+
"by-nd",
56+
"by-sa",
57+
"cc0",
58+
"nc-sampling+",
59+
"pdm",
60+
"sampling+",
61+
]
5062

5163

5264
def parse_arguments():
@@ -89,21 +101,8 @@ def get_all_sources_and_licenses(session, media_type):
89101
"""
90102
Fetch all available sources for a given media_type.
91103
"""
92-
LOGGER.info(f"Fetching all sources for {media_type}")
104+
LOGGER.info(f"Fetching all sources for the /{media_type}/ endpoint")
93105
url = f"{OPENVERSE_BASE_URL}/{media_type}/stats/?format=json"
94-
# Standard /stats/ license
95-
OPENVERSE_LEGAL_TOOLS = [
96-
"by",
97-
"by-nc",
98-
"by-nc-nd",
99-
"by-nc-sa",
100-
"by-nd",
101-
"by-sa",
102-
"cc0",
103-
"nc-sampling+",
104-
"pdm",
105-
"sampling+",
106-
]
107106
try:
108107
response = session.get(url)
109108
response.raise_for_status()
@@ -159,7 +158,12 @@ def query_openverse(session):
159158
f"license={encoded_license}"
160159
"&format=json&page=1"
161160
)
162-
LOGGER.info(f"Target URL: {url}")
161+
LOGGER.info(
162+
"Fetching Openverse data: "
163+
f"media_type={media_type} | "
164+
f"source={source} | "
165+
f"license={license}"
166+
)
163167
try:
164168
response = session.get(url)
165169
if response.status_code == 401:
@@ -177,17 +181,17 @@ def query_openverse(session):
177181
tally[key] = count
178182
else:
179183
LOGGER.warning(
180-
f"Skipping {source}, {license}: count is 0"
184+
f"Skipping ({source}, {license}): count is 0"
181185
)
182186
except (requests.HTTPError, requests.RequestException) as e:
183187
raise shared.QuantifyingException(
184188
f"Openverse fetch failed: {e}", exit_code=1
185189
)
186190
LOGGER.info("Aggregating the data")
187191
aggregate = []
188-
for field, count in tally.items():
189-
source_name = field[0]
190-
media_type_name = field[1]
192+
for field, media_count in tally.items():
193+
source = field[0]
194+
media_type = field[1]
191195
license_code = field[2]
192196
# Append prefix "cc" except for 'pdm' and 'cc0'
193197
if license_code not in ["pdm", "cc0"]:
@@ -196,12 +200,10 @@ def query_openverse(session):
196200
tool_identifier = license_code
197201
aggregate.append(
198202
{
199-
OPENVERSE_FIELDS[0].lower(): source_name, # SOURCE
200-
OPENVERSE_FIELDS[1].lower(): media_type_name, # MEDIA_TYPE
201-
OPENVERSE_FIELDS[
202-
2
203-
].lower(): tool_identifier, # LEGAL_TOOL_IDENTIFIER
204-
OPENVERSE_FIELDS[3].lower(): count, # MEDIA_COUNT
203+
OPENVERSE_FIELDS[0]: source,
204+
OPENVERSE_FIELDS[1]: media_type,
205+
OPENVERSE_FIELDS[2]: tool_identifier.upper(),
206+
OPENVERSE_FIELDS[3]: media_count,
205207
}
206208
)
207209
return aggregate
@@ -219,7 +221,7 @@ def write_data(args, data):
219221
)
220222
writer.writeheader()
221223
for row in data:
222-
writer.writerow({key.upper(): value for key, value in row.items()})
224+
writer.writerow(row)
223225

224226

225227
def main():

0 commit comments

Comments
 (0)