|
| 1 | +from pathlib import Path |
| 2 | + |
| 3 | +import cf_units |
| 4 | +import iris |
| 5 | +import yaml |
| 6 | + |
| 7 | +base_dir = Path(__file__).parent |
| 8 | + |
| 9 | +VERBOSE = False |
| 10 | + |
| 11 | +with open(base_dir / 'datasets.yml', 'r') as f: |
| 12 | + config = yaml.safe_load(f) |
| 13 | + |
| 14 | +ignore_list = [fn.replace('.', '/') for fn in config['ignore']] |
| 15 | + |
| 16 | + |
| 17 | +def strip_attributes(cube: 'iris.Cube') -> None: |
| 18 | + """Remove attributes in-place that cause issues with merging and |
| 19 | + concatenation.""" |
| 20 | + for attr in ['creation_date', 'tracking_id', 'history']: |
| 21 | + if attr in cube.attributes: |
| 22 | + cube.attributes.pop(attr) |
| 23 | + |
| 24 | + |
| 25 | +def simplify_time(cube: 'iris.Cube') -> None: |
| 26 | + """Simplifies the time coordinate in-place.""" |
| 27 | + coord = cube.coord('time') |
| 28 | + coord.convert_units( |
| 29 | + cf_units.Unit('days since 1850-1-1 00:00:00', |
| 30 | + calendar=coord.units.calendar)) |
| 31 | + |
| 32 | + |
| 33 | +def load_cubes_from_input_dirs(input_dirs: list) -> 'iris.Cube': |
| 34 | + """Generator that loads all *.nc files from each input dir into a cube.""" |
| 35 | + for i, input_dir in enumerate(sorted(input_dirs)): |
| 36 | + if VERBOSE: |
| 37 | + print(f'Loading #{i:02d}:', input_dir) |
| 38 | + |
| 39 | + files = input_dir.glob('*.nc') |
| 40 | + cubes = iris.load(str(file) for file in files) |
| 41 | + for cube in cubes: |
| 42 | + strip_attributes(cube) |
| 43 | + simplify_time(cube) |
| 44 | + |
| 45 | + cube = cubes.concatenate_cube() |
| 46 | + |
| 47 | + if VERBOSE: |
| 48 | + print(' ', cube.shape, cube.coord('time').units.calendar) |
| 49 | + |
| 50 | + yield cube |
| 51 | + |
| 52 | + |
| 53 | +def filter_ignored_datasets(dirs, root): |
| 54 | + for drc in dirs: |
| 55 | + test_drc = str(drc.relative_to(root)) |
| 56 | + if test_drc not in ignore_list: |
| 57 | + yield drc |
| 58 | + elif VERBOSE: |
| 59 | + print('Ignored:', test_drc) |
| 60 | + |
| 61 | + |
| 62 | +def load_timeseries_cubes(mip_table: str = 'Amon') -> list: |
| 63 | + """Returns a list of iris cubes with timeseries data. |
| 64 | +
|
| 65 | + The data are: ta / Amon / historical / r1i1p1f1, any grid, 1950 - onwards. |
| 66 | + All dimensions were reduced to a few steps except for the time dimension. |
| 67 | +
|
| 68 | + Parameters |
| 69 | + ---------- |
| 70 | + mip_table: str |
| 71 | + select monthly (`Amon`) or daily (`day`) data. |
| 72 | +
|
| 73 | + Returns |
| 74 | + ------- |
| 75 | + list of iris.cube |
| 76 | + """ |
| 77 | + |
| 78 | + timeseries_dir = base_dir / 'data' / 'timeseries' |
| 79 | + |
| 80 | + paths = timeseries_dir.glob(f'**/{mip_table}/**/*.nc') |
| 81 | + input_dirs = list(set(path.parent for path in paths)) |
| 82 | + |
| 83 | + input_dirs = list(filter_ignored_datasets(input_dirs, timeseries_dir)) |
| 84 | + |
| 85 | + cubes = load_cubes_from_input_dirs(input_dirs) |
| 86 | + |
| 87 | + return list(cubes) |
| 88 | + |
| 89 | + |
| 90 | +if __name__ == '__main__': |
| 91 | + VERBOSE = True |
| 92 | + |
| 93 | + for mip_table in ( |
| 94 | + 'Amon', |
| 95 | + 'day', |
| 96 | + ): |
| 97 | + print() |
| 98 | + print(f'Loading `{mip_table}`') |
| 99 | + ts = load_timeseries_cubes(mip_table) |
| 100 | + |
| 101 | + first_cube = ts[0] |
| 102 | + for i, cube in enumerate(ts): |
| 103 | + print(i) |
| 104 | + cube.regrid(grid=first_cube, scheme=iris.analysis.Linear()) |
| 105 | + |
| 106 | + # breakpoint() |
0 commit comments