Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/321.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Restored compatibility with pandas 3.0 by replacing two calls that pandas 3 no longer accepts: `numpy.issubdtype(col.dtype, numpy.number)` in `RunGroupBy.__init__` (raised on `StringDtype` meta columns) and `Series[0]` positional indexing in `_xarray._many_to_one` (raised `KeyError: 0`). The previously-failing `ScmRun.groupby` and `ScmRun.to_nc` paths now run on both pandas 2 and pandas 3.
5 changes: 4 additions & 1 deletion src/scmdata/_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,10 @@ def _many_to_one(df, col1, col2):
# thanks https://stackoverflow.com/a/59091549
checker = df[[col1, col2]].drop_duplicates()

max_count = checker.groupby(col2).count().max()[0]
# ``.iloc[0]`` rather than ``[0]``: pandas 3.0 removed positional
# integer indexing on label-indexed Series, so ``[0]`` would raise
# ``KeyError: 0`` on the Series returned by the chained ``.max()``.
max_count = checker.groupby(col2).count().max().iloc[0]
if max_count < 1: # pragma: no cover # emergency valve
raise AssertionError

Expand Down
8 changes: 6 additions & 2 deletions src/scmdata/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,12 @@ def __init__(
m = run.meta.reset_index(drop=True)
self.na_fill_value = float(na_fill_value)

# Work around the bad handling of NaN values in groupbys
if any([np.issubdtype(m[c].dtype, np.number) for c in m]):
# Work around the bad handling of NaN values in groupbys.
# pd.api.types.is_numeric_dtype accepts every dtype scmdata
# ever emits; np.issubdtype(..., np.number) raises on
# pandas 3.0's default StringDtype with
# ``TypeError: Cannot interpret '<StringDtype(...)>'``.
if any([pd.api.types.is_numeric_dtype(m[c]) for c in m]):
if (m == na_fill_value).any(axis=None):
raise ValueError(
"na_fill_value conflicts with data value. Choose a na_fill_value "
Expand Down