From 5cf382d0e0c4e371c221cb7562551e5bc87a9c7b Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Thu, 7 May 2026 15:39:47 -0300 Subject: [PATCH 1/2] update --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6710cea..206c29c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -39,7 +39,7 @@ repos: - id: add-trailing-comma - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.15.9 + rev: v0.15.12 hooks: - id: ruff args: ["--fix", "--show-fixes"] @@ -62,12 +62,12 @@ repos: - id: nb-strip-paths - repo: https://github.com/tox-dev/pyproject-fmt - rev: v2.21.0 + rev: v2.21.2 hooks: - id: pyproject-fmt - repo: https://github.com/woodruffw/zizmor-pre-commit - rev: v1.23.1 + rev: v1.24.1 hooks: - id: zizmor From 7b2b87e34eb6334035496ceb901f271f5c33b8d0 Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Thu, 7 May 2026 15:39:52 -0300 Subject: [PATCH 2/2] coerce bad num values, pin chardet, fail early --- ctd/read.py | 44 ++++++++++++++++++++++---------------------- requirements.txt | 2 +- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/ctd/read.py b/ctd/read.py index a9c213e..34b48da 100644 --- a/ctd/read.py +++ b/ctd/read.py @@ -86,6 +86,8 @@ def _read_file(fname: str | Path | StringIO) -> StringIO: ) # Read as bytes but we need to return strings for the parsers. encoding = chardet.detect(contents)["encoding"] + if encoding is None: + encoding = "utf-8" text = contents.decode(encoding=encoding, errors="replace") return StringIO(text) @@ -418,17 +420,28 @@ def from_cnv(fname: str | Path) -> pd.DataFrame: metadata = _parse_seabird(f.readlines(), ftype="cnv") f.seek(0) - cast = pd.read_fwf( - f, - header=None, - index_col=None, - names=metadata["names"], - skiprows=metadata["skiprows"], - sep=r"\s+", - widths=[11] * len(metadata["names"]), - ) + lines = f.readlines()[metadata["skiprows"] :] f.close() + data = [line.strip().split() for line in lines] + cast = pd.DataFrame( + data, + columns=metadata["names"], + ) + + dtypes = {"bpos": int, "pumps": bool, "flag": bool} + for column in cast.columns: + if column in dtypes: + cast[column] = cast[column].astype(dtypes[column]) + else: + try: + cast[column] = pd.to_numeric(cast[column], errors="coerce") + except ValueError: + warnings.warn( + f"Could not convert {column} to float.", + stacklevel=2, + ) + prkeys = [ "prM", "prE", @@ -476,19 +489,6 @@ def from_cnv(fname: str | Path) -> pd.DataFrame: name = _basename(fname)[1] metadata["name"] = str(name) - dtypes = {"bpos": int, "pumps": bool, "flag": bool} - for column in cast.columns: - if column in dtypes: - cast[column] = cast[column].astype(dtypes[column]) - else: - try: - cast[column] = cast[column].astype(float) - except ValueError: - warnings.warn( - f"Could not convert {column} to float.", - stacklevel=2, - ) - cast._metadata = metadata # noqa: SLF001 return cast diff --git a/requirements.txt b/requirements.txt index a8cf98b..e2f463d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -chardet +chardet<7 gsw>=3.3.0 matplotlib numpy>=2