|
12 | 12 | """ |
13 | 13 | import datetime |
14 | 14 | import warnings |
| 15 | +from itertools import groupby |
15 | 16 | from pathlib import Path |
16 | 17 |
|
17 | 18 | import iris |
@@ -107,6 +108,34 @@ def select_host(hosts, preferred_hosts, ignore_hosts): |
107 | 108 | return hosts[0] |
108 | 109 |
|
109 | 110 |
|
| 111 | +def select_latest_versions(datasets: dict) -> dict: |
| 112 | + """Return a dict with only the latest version of each dataset. |
| 113 | +
|
| 114 | + Parameters |
| 115 | + ---------- |
| 116 | + datasets : dict |
| 117 | + A dict with dataset objects |
| 118 | +
|
| 119 | + Returns |
| 120 | + ------- |
| 121 | + most_recent_datasets : dict |
| 122 | + A dict containing only the most recent version of each dataset object, |
| 123 | + in case multiple versions have been passed. |
| 124 | + """ |
| 125 | + keys = (key.rsplit('.', 1) for key in datasets) |
| 126 | + keys = sorted(keys) |
| 127 | + grouped = groupby(keys, key=lambda key: key[0]) |
| 128 | + |
| 129 | + most_recent_keys = (list(versions)[-1] for group, versions in grouped) |
| 130 | + most_recent_datasets = {} |
| 131 | + |
| 132 | + for name, version in most_recent_keys: |
| 133 | + key = f'{name}.{version}' |
| 134 | + most_recent_datasets[key] = datasets[key] |
| 135 | + |
| 136 | + return most_recent_datasets |
| 137 | + |
| 138 | + |
110 | 139 | def search(connection, preferred_hosts, ignore_hosts, facets): |
111 | 140 | """Search for files on ESGF. |
112 | 141 |
|
@@ -139,7 +168,11 @@ def search(connection, preferred_hosts, ignore_hosts, facets): |
139 | 168 | datasets[dataset_name] = {} |
140 | 169 | datasets[dataset_name][host] = dataset |
141 | 170 |
|
142 | | - print("Found", len(datasets), "unique datasets") |
| 171 | + # For some datasets, multiple versions are returned |
| 172 | + # https://github.com/ESMValGroup/ESMValTool_sample_data/issues/5 |
| 173 | + datasets = select_latest_versions(datasets) |
| 174 | + |
| 175 | + print(f"Found {len(datasets)} datasets (only the latest versions)") |
143 | 176 |
|
144 | 177 | # Select host and find files on host |
145 | 178 | files = {} |
|
0 commit comments