Skip to content

Commit 035c45b

Browse files
Remove datasets with multiple versions (#12)
Co-authored-by: Bouwe Andela <b.andela@esciencecenter.nl>
1 parent eaa9d59 commit 035c45b

8 files changed

Lines changed: 34 additions & 1 deletion

download_sample_data.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
"""
1313
import datetime
1414
import warnings
15+
from itertools import groupby
1516
from pathlib import Path
1617

1718
import iris
@@ -107,6 +108,34 @@ def select_host(hosts, preferred_hosts, ignore_hosts):
107108
return hosts[0]
108109

109110

111+
def select_latest_versions(datasets: dict) -> dict:
112+
"""Return a dict with only the latest version of each dataset.
113+
114+
Parameters
115+
----------
116+
datasets : dict
117+
A dict with dataset objects
118+
119+
Returns
120+
-------
121+
most_recent_datasets : dict
122+
A dict containing only the most recent version of each dataset object,
123+
in case multiple versions have been passed.
124+
"""
125+
keys = (key.rsplit('.', 1) for key in datasets)
126+
keys = sorted(keys)
127+
grouped = groupby(keys, key=lambda key: key[0])
128+
129+
most_recent_keys = (list(versions)[-1] for group, versions in grouped)
130+
most_recent_datasets = {}
131+
132+
for name, version in most_recent_keys:
133+
key = f'{name}.{version}'
134+
most_recent_datasets[key] = datasets[key]
135+
136+
return most_recent_datasets
137+
138+
110139
def search(connection, preferred_hosts, ignore_hosts, facets):
111140
"""Search for files on ESGF.
112141
@@ -139,7 +168,11 @@ def search(connection, preferred_hosts, ignore_hosts, facets):
139168
datasets[dataset_name] = {}
140169
datasets[dataset_name][host] = dataset
141170

142-
print("Found", len(datasets), "unique datasets")
171+
# For some datasets, multiple versions are returned
172+
# https://github.com/ESMValGroup/ESMValTool_sample_data/issues/5
173+
datasets = select_latest_versions(datasets)
174+
175+
print(f"Found {len(datasets)} datasets (only the latest versions)")
143176

144177
# Select host and find files on host
145178
files = {}

0 commit comments

Comments
 (0)