Skip to content

Commit 136ba10

Browse files
committed
Merge branch 'main' of github.com:NCAS-CMS/cf-python
2 parents 13abf44 + e5c13f3 commit 136ba10

9 files changed

Lines changed: 367 additions & 1 deletion

File tree

Changelog.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ version 3.17.0
33

44
**2024-??-??**
55

6+
* New methods: `cf.Field.pad_missing` and `cf.Data.pad_missing`
7+
(https://github.com/NCAS-CMS/cf-python/issues/717)
68
* Fix occasional bug when calculating UGRID cell areas when
79
non-spatial coordinates span the discrete axis
810
(https://github.com/NCAS-CMS/cf-python/issues/721)

cf/aggregate.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3920,7 +3920,22 @@ def _sort_indices(m, canonical_axes):
39203920
"""
39213921
canonical_axes = [m.id_to_axis[identity] for identity in canonical_axes]
39223922
sort_indices = tuple([m.sort_indices[axis] for axis in canonical_axes])
3923-
needs_sorting = sort_indices != (slice(None),) * len(sort_indices)
3923+
3924+
# Whether or not one or more of the axes needs sorting
3925+
needs_sorting = False
3926+
for sort_index in sort_indices:
3927+
# Note: sort_index can only be a slice object or a numpy array
3928+
# (see `_create_hash_and_first_values`)
3929+
if isinstance(sort_index, slice):
3930+
if sort_index != slice(None):
3931+
# sort_index is a slice other than slice(None)
3932+
needs_sorting = True
3933+
break
3934+
elif sort_index.size > 1:
3935+
# sort_index is an array of 2 or more integers
3936+
needs_sorting = True
3937+
break
3938+
39243939
return sort_indices, needs_sorting
39253940

39263941

cf/data/data.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2180,6 +2180,109 @@ def mean_of_upper_decile(
21802180

21812181
return d
21822182

2183+
@_inplace_enabled(default=False)
2184+
def pad_missing(self, axis, pad_width=None, to_size=None, inplace=False):
2185+
"""Pad an axis with missing data.
2186+
2187+
:Parameters:
2188+
2189+
axis: `int`
2190+
Select the axis for which the padding is to be
2191+
applied.
2192+
2193+
*Parameter example:*
2194+
Pad second axis: ``axis=1``.
2195+
2196+
*Parameter example:*
2197+
Pad the last axis: ``axis=-1``.
2198+
2199+
{{pad_width: sequence of `int`, optional}}
2200+
2201+
{{to_size: `int`, optional}}
2202+
2203+
{{inplace: `bool`, optional}}
2204+
2205+
:Returns:
2206+
2207+
`Data` or `None`
2208+
The padded data, or `None` if the operation was
2209+
in-place.
2210+
2211+
**Examples**
2212+
2213+
>>> d = cf.Data(np.arange(6).reshape(2, 3))
2214+
>>> print(d.array)
2215+
[[0 1 2]
2216+
[3 4 5]]
2217+
>>> e = d.pad_missing(1, (1, 2))
2218+
>>> print(e.array)
2219+
[[-- 0 1 2 -- --]
2220+
[-- 3 4 5 -- --]]
2221+
>>> f = e.pad_missing(0, (0, 1))
2222+
>>> print(f.array)
2223+
[[-- 0 1 2 -- --]
2224+
[-- 3 4 5 -- --]
2225+
[-- -- -- -- -- --]]
2226+
2227+
>>> g = d.pad_missing(1, to_size=5)
2228+
>>> print(g.array)
2229+
[[0 1 2 -- --]
2230+
[3 4 5 -- --]]
2231+
2232+
"""
2233+
if not 0 <= axis < self.ndim:
2234+
raise ValueError(
2235+
f"'axis' must be a valid dimension position. Got {axis}"
2236+
)
2237+
2238+
if to_size is not None:
2239+
# Set pad_width from to_size
2240+
if pad_width is not None:
2241+
raise ValueError("Can't set both 'pad_width' and 'to_size'")
2242+
2243+
pad_width = (0, to_size - self.shape[axis])
2244+
elif pad_width is None:
2245+
raise ValueError("Must set either 'pad_width' or 'to_size'")
2246+
2247+
pad_width = np.asarray(pad_width)
2248+
if pad_width.shape != (2,) or not pad_width.dtype.kind == "i":
2249+
raise ValueError(
2250+
"'pad_width' must be a sequence of two integers. "
2251+
f"Got: {pad_width}"
2252+
)
2253+
2254+
pad_width = tuple(pad_width)
2255+
if any(n < 0 for n in pad_width):
2256+
if to_size is not None:
2257+
raise ValueError(
2258+
f"'to_size' ({to_size}) must not be smaller than the "
2259+
f"original axis size ({self.shape[axis]})"
2260+
)
2261+
2262+
raise ValueError(
2263+
f"Can't set a negative number of pad values. Got: {pad_width}"
2264+
)
2265+
2266+
d = _inplace_enabled_define_and_cleanup(self)
2267+
2268+
dx = d.to_dask_array()
2269+
mask0 = da.ma.getmaskarray(dx)
2270+
2271+
pad = [(0, 0)] * dx.ndim
2272+
pad[axis] = pad_width
2273+
2274+
# Pad the data with zero. This will lose the original mask.
2275+
dx = da.pad(dx, pad, mode="constant", constant_values=0)
2276+
2277+
# Pad the mask with True
2278+
mask = da.pad(mask0, pad, mode="constant", constant_values=True)
2279+
2280+
# Set the mask
2281+
dx = da.ma.masked_where(mask, dx)
2282+
2283+
d._set_dask(dx)
2284+
return d
2285+
21832286
@_inplace_enabled(default=False)
21842287
def percentile(
21852288
self,

cf/docstring/docstring.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,14 @@
587587
"{{weights auto: `bool`, optional}}": """auto: `bool`, optional
588588
If True then return `False` if weights can't be found,
589589
rather than raising an exception.""",
590+
# pad_width
591+
"{{pad_width: sequence of `int`, optional}}": """pad_width: sequence of `int`, optional
592+
Number of values to pad before and after the edges of
593+
the axis.""",
594+
# to_size
595+
"{{to_size: `int`, optional}}": """to_size: `int`, optional
596+
Pad the axis after so that the new axis has the given
597+
size.""",
590598
# ----------------------------------------------------------------
591599
# Method description substitutions (4 levels of indentation)
592600
# ----------------------------------------------------------------

cf/field.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11953,6 +11953,136 @@ def halo(
1195311953

1195411954
return f
1195511955

11956+
@_inplace_enabled(default=False)
11957+
def pad_missing(self, axis, pad_width=None, to_size=None, inplace=False):
11958+
"""Pad an axis with missing data.
11959+
11960+
The field's data and all metadata constructs that span the
11961+
axis are padded.
11962+
11963+
.. versionadded:: 3.17.0
11964+
11965+
:Parameters:
11966+
11967+
axis: `str` or `int`
11968+
Select the domain axis which is to be padded, defined
11969+
by that which would be selected by passing the given
11970+
axis description to a call of the field construct's
11971+
`domain_axis` method. For example, for a value of
11972+
``'X'``, the domain axis construct returned by
11973+
``f.domain_axis('X')`` is selected.
11974+
11975+
{{pad_width: sequence of `int`, optional}}
11976+
11977+
{{to_size: `int`, optional}}
11978+
11979+
{{inplace: `bool`, optional}}
11980+
11981+
:Returns:
11982+
11983+
`Field` or `None`
11984+
The padded field construct, or `None` if the operation
11985+
was in-place.
11986+
11987+
**Examples*
11988+
11989+
>>> f = cf.example_field(6)
11990+
>>> print(f)
11991+
Field: precipitation_amount (ncvar%pr)
11992+
--------------------------------------
11993+
Data : precipitation_amount(cf_role=timeseries_id(2), time(4))
11994+
Dimension coords: time(4) = [2000-01-16 12:00:00, ..., 2000-04-15 00:00:00] gregorian
11995+
Auxiliary coords: latitude(cf_role=timeseries_id(2)) = [25.0, 7.0] degrees_north
11996+
: longitude(cf_role=timeseries_id(2)) = [10.0, 40.0] degrees_east
11997+
: cf_role=timeseries_id(cf_role=timeseries_id(2)) = [x1, y2]
11998+
: altitude(cf_role=timeseries_id(2), 3, 4) = [[[1.0, ..., --]]] m
11999+
Coord references: grid_mapping_name:latitude_longitude
12000+
>>> print(f.array)
12001+
[[1. 2. 3. 4.]
12002+
[5. 6. 7. 8.]]
12003+
>>> g = f.pad_missing('T', (0, 5))
12004+
>>> print(g)
12005+
Field: precipitation_amount (ncvar%pr)
12006+
--------------------------------------
12007+
Data : precipitation_amount(cf_role=timeseries_id(2), time(9))
12008+
Dimension coords: time(9) = [2000-01-16 12:00:00, ..., --] gregorian
12009+
Auxiliary coords: latitude(cf_role=timeseries_id(2)) = [25.0, 7.0] degrees_north
12010+
: longitude(cf_role=timeseries_id(2)) = [10.0, 40.0] degrees_east
12011+
: cf_role=timeseries_id(cf_role=timeseries_id(2)) = [x1, y2]
12012+
: altitude(cf_role=timeseries_id(2), 3, 4) = [[[1.0, ..., --]]] m
12013+
Coord references: grid_mapping_name:latitude_longitude
12014+
>>> print(g.array)
12015+
[[1.0 2.0 3.0 4.0 -- -- -- -- --]
12016+
[5.0 6.0 7.0 8.0 -- -- -- -- --]]
12017+
>>> h = g.pad_missing('cf_role=timeseries_id', (0, 1))
12018+
>>> print(h)
12019+
Field: precipitation_amount (ncvar%pr)
12020+
--------------------------------------
12021+
Data : precipitation_amount(cf_role=timeseries_id(3), time(9))
12022+
Dimension coords: time(9) = [2000-01-16 12:00:00, ..., --] gregorian
12023+
Auxiliary coords: latitude(cf_role=timeseries_id(3)) = [25.0, 7.0, --] degrees_north
12024+
: longitude(cf_role=timeseries_id(3)) = [10.0, 40.0, --] degrees_east
12025+
: cf_role=timeseries_id(cf_role=timeseries_id(3)) = [x1, y2, --]
12026+
: altitude(cf_role=timeseries_id(3), 3, 4) = [[[1.0, ..., --]]] m
12027+
Coord references: grid_mapping_name:latitude_longitude
12028+
>>> print(h.array)
12029+
[[1.0 2.0 3.0 4.0 -- -- -- -- --]
12030+
[5.0 6.0 7.0 8.0 -- -- -- -- --]
12031+
[ -- -- -- -- -- -- -- -- --]]
12032+
12033+
>>> print(f.pad_missing('time', to_size=6))
12034+
Field: precipitation_amount (ncvar%pr)
12035+
--------------------------------------
12036+
Data : precipitation_amount(cf_role=timeseries_id(2), time(6))
12037+
Dimension coords: time(6) = [2000-01-16 12:00:00, ..., --] gregorian
12038+
Auxiliary coords: latitude(cf_role=timeseries_id(2)) = [25.0, 7.0] degrees_north
12039+
: longitude(cf_role=timeseries_id(2)) = [10.0, 40.0] degrees_east
12040+
: cf_role=timeseries_id(cf_role=timeseries_id(2)) = [x1, y2]
12041+
: altitude(cf_role=timeseries_id(2), 3, 4) = [[[1.0, ..., --]]] m
12042+
Coord references: grid_mapping_name:latitude_longitude
12043+
12044+
"""
12045+
f = _inplace_enabled_define_and_cleanup(self)
12046+
12047+
try:
12048+
axis1 = f._parse_axes(axis)
12049+
except ValueError:
12050+
raise ValueError(
12051+
f"Can't pad_missing: Bad axis specification: {axis!r}"
12052+
)
12053+
12054+
if len(axis1) != 1:
12055+
raise ValueError(
12056+
f"Can't pad_missing: Bad axis specification: {axis!r}"
12057+
)
12058+
12059+
data_axes = f.get_data_axes()
12060+
axis = axis1[0]
12061+
iaxis = data_axes.index(axis)
12062+
12063+
# Pad the field
12064+
super(Field, f).pad_missing(
12065+
iaxis, pad_width=pad_width, to_size=to_size, inplace=True
12066+
)
12067+
12068+
# Set new domain axis size
12069+
domain_axis = f.domain_axis(axis)
12070+
domain_axis.set_size(f.shape[iaxis])
12071+
12072+
data_axes = f.constructs.data_axes()
12073+
for key, construct in f.constructs.filter_by_data(todict=True).items():
12074+
construct_axes = data_axes[key]
12075+
if axis not in construct_axes:
12076+
continue
12077+
12078+
# Pad the construct
12079+
iaxis = construct_axes.index(axis)
12080+
construct.pad_missing(
12081+
iaxis, pad_width=pad_width, to_size=to_size, inplace=True
12082+
)
12083+
12084+
return f
12085+
1195612086
def percentile(
1195712087
self,
1195812088
ranks,

cf/mixin/propertiesdata.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1888,6 +1888,38 @@ def minimum(self):
18881888
"ERROR: Can't get the minimum when there is no data array"
18891889
)
18901890

1891+
@_inplace_enabled(default=False)
1892+
def pad_missing(self, axis, pad_width=None, to_size=None, inplace=False):
1893+
"""Pad an axis with missing data.
1894+
1895+
:Parameters:
1896+
1897+
axis: `int`
1898+
Select the axis for which the padding is to be
1899+
applied.
1900+
1901+
{{pad_width: sequence of `int`, optional}}
1902+
1903+
{{to_size: `int`, optional}}
1904+
1905+
{{inplace: `bool`, optional}}
1906+
1907+
:Returns:
1908+
1909+
`{{class}}` or `None`
1910+
The {{class}} with padded data, or `None` if the
1911+
operation was in-place.
1912+
1913+
"""
1914+
return self._apply_data_oper(
1915+
_inplace_enabled_define_and_cleanup(self),
1916+
"pad_missing",
1917+
axis=axis,
1918+
pad_width=pad_width,
1919+
to_size=to_size,
1920+
inplace=inplace,
1921+
)
1922+
18911923
def period(self, *value, **config):
18921924
"""Return or set the period of the data.
18931925

cf/mixin/propertiesdatabounds.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3874,6 +3874,40 @@ def inspect(self):
38743874
"""
38753875
print(cf_inspect(self)) # pragma: no cover
38763876

3877+
@_inplace_enabled(default=False)
3878+
def pad_missing(self, axis, pad_width=None, to_size=None, inplace=False):
3879+
"""Pad an axis with missing data.
3880+
3881+
:Parameters:
3882+
3883+
axis: `int`
3884+
Select the axis for which the padding is to be
3885+
applied.
3886+
3887+
{{pad_width: sequence of `int`, optional}}
3888+
3889+
{{to_size: `int`, optional}}
3890+
3891+
{{inplace: `bool`, optional}}
3892+
3893+
:Returns:
3894+
3895+
`{{class}}` or `None`
3896+
The {{class}} with padded data, or `None` if the
3897+
operation was in-place.
3898+
3899+
"""
3900+
return self._apply_superclass_data_oper(
3901+
_inplace_enabled_define_and_cleanup(self),
3902+
"pad_missing",
3903+
bounds=True,
3904+
interior_ring=True,
3905+
axis=axis,
3906+
pad_width=pad_width,
3907+
to_size=to_size,
3908+
inplace=inplace,
3909+
)
3910+
38773911
def period(self, *value, **config):
38783912
"""Return or set the period for cyclic values.
38793913

0 commit comments

Comments
 (0)