From 3f130e5fe0f0422bfbd3ecf108e6aad0335aedc3 Mon Sep 17 00:00:00 2001 From: mirzaees Date: Mon, 1 Jun 2026 07:48:33 -0700 Subject: [PATCH 01/11] read metadata of remote files with h5netcdf --- src/dolphin/io/_core.py | 124 +++++++++++++++++++++++++++++++++++----- 1 file changed, 110 insertions(+), 14 deletions(-) diff --git a/src/dolphin/io/_core.py b/src/dolphin/io/_core.py index 94b4ff59..864e1491 100644 --- a/src/dolphin/io/_core.py +++ b/src/dolphin/io/_core.py @@ -308,10 +308,57 @@ def _is_nisar_h5(filename: Filename) -> bool: return s.endswith(".h5") and Path(s).name.upper().startswith("NISAR_") +# def read_nisar_grid_metadata( +# filename: Filename, subdataset: str +# ) -> tuple[int, int, tuple[float, ...], int, str]: +# """Read NISAR GSLC grid metadata via h5py. + +# NISAR's GSLC files store the projection in a sibling ``projection`` +# dataset and grid coordinates in ``xCoordinates`` / ``yCoordinates`` +# (cell centers) inside the same group as the polarization dataset. +# GDAL's HDF5 driver doesn't expose any of this — it returns an +# identity geotransform and empty projection — so the only reliable +# path is to read it directly. + +# Honors the data dataset's ``grid_mapping`` attribute when present; +# falls back to the conventional ``projection`` dataset name. + +# Returns ``(nx, ny, geotransform, epsg, projection_wkt)``. +# """ +# from pathlib import PurePosixPath + +# from osgeo import osr + +# grid_path = str(PurePosixPath(subdataset).parent) +# with h5py.File(str(filename), "r") as f: +# dset = f[subdataset] +# proj_name = dset.attrs.get("grid_mapping", "projection") +# if isinstance(proj_name, bytes): +# proj_name = proj_name.decode() +# proj_raw = f[f"{grid_path}/{proj_name}"][()] +# epsg = int(proj_raw.decode()) if isinstance(proj_raw, bytes) else int(proj_raw) +# x_coords = f[f"{grid_path}/xCoordinates"][:] +# y_coords = f[f"{grid_path}/yCoordinates"][:] +# dx = float(f[f"{grid_path}/xCoordinateSpacing"][()]) +# dy = float(f[f"{grid_path}/yCoordinateSpacing"][()]) +# # Cell-center coords -> upper-left edge: back off half a pixel. +# gt = ( +# float(x_coords[0]) - dx / 2.0, +# dx, +# 0.0, +# float(y_coords[0]) - dy / 2.0, +# 0.0, +# dy, +# ) +# srs = osr.SpatialReference() +# srs.ImportFromEPSG(epsg) +# return len(x_coords), len(y_coords), gt, epsg, srs.ExportToWkt() + + def read_nisar_grid_metadata( filename: Filename, subdataset: str ) -> tuple[int, int, tuple[float, ...], int, str]: - """Read NISAR GSLC grid metadata via h5py. + """Read NISAR GSLC grid metadata via h5py (local) or earthaccess+h5netcdf (remote). NISAR's GSLC files store the projection in a sibling ``projection`` dataset and grid coordinates in ``xCoordinates`` / ``yCoordinates`` @@ -327,21 +374,70 @@ def read_nisar_grid_metadata( """ from pathlib import PurePosixPath - from osgeo import osr + import earthaccess + import xarray as xr + file_str = str(filename) + is_remote = file_str.startswith(("http://", "https://", "s3://")) grid_path = str(PurePosixPath(subdataset).parent) - with h5py.File(str(filename), "r") as f: - dset = f[subdataset] - proj_name = dset.attrs.get("grid_mapping", "projection") - if isinstance(proj_name, bytes): - proj_name = proj_name.decode() - proj_raw = f[f"{grid_path}/{proj_name}"][()] - epsg = int(proj_raw.decode()) if isinstance(proj_raw, bytes) else int(proj_raw) - x_coords = f[f"{grid_path}/xCoordinates"][:] - y_coords = f[f"{grid_path}/yCoordinates"][:] - dx = float(f[f"{grid_path}/xCoordinateSpacing"][()]) - dy = float(f[f"{grid_path}/yCoordinateSpacing"][()]) - # Cell-center coords -> upper-left edge: back off half a pixel. + from osgeo import osr + + if is_remote: + # 1. Handle Remote Streaming via Earthaccess + # Switch to HTTPS if an S3 link sneaks in outside of us-west-2 + if file_str.startswith("s3://"): + raise ValueError( + "Direct S3 links are not supported out-of-region. Please pass the HTTPS URL equivalent." + ) + + # Open file pointer wrapper using ASF provider credentials + file_objs = earthaccess.open([file_str], provider="ASF") + f_pointer = file_objs[0] + + # Use xarray with h5netcdf backend to lazily read group metadata + with xr.open_dataset( + f_pointer, group=grid_path, engine="h5netcdf", phony_dims="access" + ) as ds: + # Xarray group datasets expose dataset attributes via .attrs + # We open the subdataset path leaf name to get its specific attributes + leaf_name = PurePosixPath(subdataset).name + + # h5netcdf exposes attributes directly as strings/numbers (no byte decoding needed) + with xr.open_dataset( + f_pointer, engine="h5netcdf", phony_dims="access" + ) as root_ds: + dset_attrs = root_ds[subdataset].attrs + proj_name = dset_attrs.get("grid_mapping", "projection") + + # Fetch projection EPSG + epsg_raw = ds[proj_name].values + epsg = ( + int(epsg_raw.decode()) if isinstance(epsg_raw, bytes) else int(epsg_raw) + ) + + # Extract coordinate vectors and spacings + x_coords = ds["xCoordinates"].values + y_coords = ds["yCoordinates"].values + dx = float(ds["xCoordinateSpacing"].values) + dy = float(ds["yCoordinateSpacing"].values) + + else: + # 2. Traditional Local File Path (Original Logic) + with h5py.File(file_str, "r") as f: + dset = f[subdataset] + proj_name = dset.attrs.get("grid_mapping", "projection") + if isinstance(proj_name, bytes): + proj_name = proj_name.decode() + proj_raw = f[f"{grid_path}/{proj_name}"][()] + epsg = ( + int(proj_raw.decode()) if isinstance(proj_raw, bytes) else int(proj_raw) + ) + x_coords = f[f"{grid_path}/xCoordinates"][:] + y_coords = f[f"{grid_path}/yCoordinates"][:] + dx = float(f[f"{grid_path}/xCoordinateSpacing"][()]) + dy = float(f[f"{grid_path}/yCoordinateSpacing"][()]) + + # 3. GeoTransform Calculation (Identical for both local and remote) gt = ( float(x_coords[0]) - dx / 2.0, dx, From beee1f5d3c66bd88703f836d032694169d3517cf Mon Sep 17 00:00:00 2001 From: Sara Mirzaee Date: Mon, 1 Jun 2026 11:04:39 -0400 Subject: [PATCH 02/11] do not use absolute path for remote urls --- src/dolphin/workflows/config/_common.py | 7 ++++++- src/dolphin/workflows/config/_displacement.py | 9 ++++++--- src/dolphin/workflows/config/_ps.py | 8 ++++++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/dolphin/workflows/config/_common.py b/src/dolphin/workflows/config/_common.py index 6f6e39b7..85e8a7f4 100644 --- a/src/dolphin/workflows/config/_common.py +++ b/src/dolphin/workflows/config/_common.py @@ -7,6 +7,7 @@ from typing import Annotated, Any, Literal, Optional import tyro +from opera_utils import is_remote_url from pydantic import ( BaseModel, ConfigDict, @@ -540,8 +541,12 @@ def _read_file_list_or_glob(cls, value): # noqa: ARG001 filenames = [Path(f) for f in v_path.read_text().splitlines()] # If given as relative paths, make them relative to the text file + # Skip this for remote URLs (https://, http://, s3://) parent = v_path.parent - return [parent / f if not f.is_absolute() else f for f in filenames] + return [ + parent / f if (not f.is_absolute() and not is_remote_url(f)) else f + for f in filenames + ] else: msg = f"Input file list {v_path} does not exist or is not a file." raise ValueError(msg) diff --git a/src/dolphin/workflows/config/_displacement.py b/src/dolphin/workflows/config/_displacement.py index ce48ec12..22d968a6 100644 --- a/src/dolphin/workflows/config/_displacement.py +++ b/src/dolphin/workflows/config/_displacement.py @@ -5,7 +5,7 @@ from typing import Annotated, Any, Optional import tyro -from opera_utils import get_burst_id, get_dates, sort_files_by_date +from opera_utils import get_burst_id, get_dates, is_remote_url, sort_files_by_date from pydantic import ( BaseModel, ConfigDict, @@ -199,8 +199,11 @@ def model_post_init(self, context: Any, /) -> None: # Ensure outputs from workflow steps are within work directory. if not self.keep_paths_relative: - # Resolve all CSLC paths: - self.cslc_file_list = [p.resolve(strict=False) for p in self.cslc_file_list] + # Resolve all CSLC paths (skip remote URLs like https://, s3://): + self.cslc_file_list = [ + p if is_remote_url(p) else p.resolve(strict=False) + for p in self.cslc_file_list + ] work_dir = self.work_directory # For each workflow step that has an output folder, move it inside diff --git a/src/dolphin/workflows/config/_ps.py b/src/dolphin/workflows/config/_ps.py index bccc056a..c3d7178f 100644 --- a/src/dolphin/workflows/config/_ps.py +++ b/src/dolphin/workflows/config/_ps.py @@ -4,6 +4,7 @@ from pathlib import Path from typing import Any +from opera_utils import is_remote_url from pydantic import ConfigDict, Field, field_validator from ._common import ( @@ -62,8 +63,11 @@ def model_post_init(self, context: Any, /) -> None: super().model_post_init(context) # Ensure outputs from workflow steps are within work directory. if not self.keep_paths_relative: - # Resolve all CSLC paths: - self.cslc_file_list = [p.resolve(strict=False) for p in self.cslc_file_list] + # Resolve all CSLC paths (skip remote URLs like https://, s3://): + self.cslc_file_list = [ + p if is_remote_url(p) else p.resolve(strict=False) + for p in self.cslc_file_list + ] work_dir = self.work_directory # move the folders inside the work directory From ac0340d8a2019094e0b58029e6d3a713616a1c35 Mon Sep 17 00:00:00 2001 From: mirzaees Date: Tue, 2 Jun 2026 08:22:37 -0700 Subject: [PATCH 03/11] correct remote urls slash --- src/dolphin/io/_core.py | 77 +++++++++------------------ src/dolphin/io/_readers.py | 30 ++++++++++- src/dolphin/utils.py | 5 ++ src/dolphin/workflows/displacement.py | 29 ++++++++-- 4 files changed, 82 insertions(+), 59 deletions(-) diff --git a/src/dolphin/io/_core.py b/src/dolphin/io/_core.py index 864e1491..d472ab04 100644 --- a/src/dolphin/io/_core.py +++ b/src/dolphin/io/_core.py @@ -372,70 +372,41 @@ def read_nisar_grid_metadata( Returns ``(nx, ny, geotransform, epsg, projection_wkt)``. """ + import re from pathlib import PurePosixPath - import earthaccess - import xarray as xr + from opera_utils import is_remote_url + from opera_utils._remote import open_h5 as open_remote_h5 file_str = str(filename) - is_remote = file_str.startswith(("http://", "https://", "s3://")) + # Normalize malformed URLs missing one slash (e.g., https:/ -> https://) + file_str = re.sub(r"^(https?|s3):/(?!/)", r"\1://", file_str) grid_path = str(PurePosixPath(subdataset).parent) from osgeo import osr - if is_remote: - # 1. Handle Remote Streaming via Earthaccess - # Switch to HTTPS if an S3 link sneaks in outside of us-west-2 + if is_remote_url(file_str): if file_str.startswith("s3://"): raise ValueError( - "Direct S3 links are not supported out-of-region. Please pass the HTTPS URL equivalent." + "Direct S3 links are not supported out-of-region." + " Please pass the HTTPS URL equivalent." ) - - # Open file pointer wrapper using ASF provider credentials - file_objs = earthaccess.open([file_str], provider="ASF") - f_pointer = file_objs[0] - - # Use xarray with h5netcdf backend to lazily read group metadata - with xr.open_dataset( - f_pointer, group=grid_path, engine="h5netcdf", phony_dims="access" - ) as ds: - # Xarray group datasets expose dataset attributes via .attrs - # We open the subdataset path leaf name to get its specific attributes - leaf_name = PurePosixPath(subdataset).name - - # h5netcdf exposes attributes directly as strings/numbers (no byte decoding needed) - with xr.open_dataset( - f_pointer, engine="h5netcdf", phony_dims="access" - ) as root_ds: - dset_attrs = root_ds[subdataset].attrs - proj_name = dset_attrs.get("grid_mapping", "projection") - - # Fetch projection EPSG - epsg_raw = ds[proj_name].values - epsg = ( - int(epsg_raw.decode()) if isinstance(epsg_raw, bytes) else int(epsg_raw) - ) - - # Extract coordinate vectors and spacings - x_coords = ds["xCoordinates"].values - y_coords = ds["yCoordinates"].values - dx = float(ds["xCoordinateSpacing"].values) - dy = float(ds["yCoordinateSpacing"].values) - + h5_open = open_remote_h5(file_str) else: - # 2. Traditional Local File Path (Original Logic) - with h5py.File(file_str, "r") as f: - dset = f[subdataset] - proj_name = dset.attrs.get("grid_mapping", "projection") - if isinstance(proj_name, bytes): - proj_name = proj_name.decode() - proj_raw = f[f"{grid_path}/{proj_name}"][()] - epsg = ( - int(proj_raw.decode()) if isinstance(proj_raw, bytes) else int(proj_raw) - ) - x_coords = f[f"{grid_path}/xCoordinates"][:] - y_coords = f[f"{grid_path}/yCoordinates"][:] - dx = float(f[f"{grid_path}/xCoordinateSpacing"][()]) - dy = float(f[f"{grid_path}/yCoordinateSpacing"][()]) + h5_open = h5py.File(file_str, "r") + + with h5_open as f: + dset = f[subdataset] + proj_name = dset.attrs.get("grid_mapping", "projection") + if isinstance(proj_name, bytes): + proj_name = proj_name.decode() + proj_raw = f[f"{grid_path}/{proj_name}"][()] + epsg = ( + int(proj_raw.decode()) if isinstance(proj_raw, bytes) else int(proj_raw) + ) + x_coords = f[f"{grid_path}/xCoordinates"][:] + y_coords = f[f"{grid_path}/yCoordinates"][:] + dx = float(f[f"{grid_path}/xCoordinateSpacing"][()]) + dy = float(f[f"{grid_path}/yCoordinateSpacing"][()]) # 3. GeoTransform Calculation (Identical for both local and remote) gt = ( diff --git a/src/dolphin/io/_readers.py b/src/dolphin/io/_readers.py index 74997559..db187d77 100644 --- a/src/dolphin/io/_readers.py +++ b/src/dolphin/io/_readers.py @@ -21,7 +21,7 @@ import rasterio as rio import rasterio.windows from numpy.typing import ArrayLike -from opera_utils import get_dates, sort_files_by_date +from opera_utils import get_dates, is_remote_url, sort_files_by_date from osgeo import gdal from tqdm.auto import trange @@ -781,9 +781,19 @@ def __init__( if any(str(f).startswith("s3://") for f in file_list): files = [S3Path(str(f)) for f in file_list] elif use_abs_path: - files = [utils._resolve_gdal_path(p) for p in file_list] + # Skip resolving remote URLs (https://, http://) — Path.resolve() + # would prepend the cwd to them. + files = [ + p if is_remote_url(p) else utils._resolve_gdal_path(p) + for p in file_list + ] else: files = list(file_list) + + # Sanitize remote URLs: Path() collapses `https://` to `https:/`, + # which GDAL's HDF5 driver cannot resolve. Restore the missing slash. + files = [_sanitize_remote_url(f) for f in files] + # Extract the date/datetimes from the filenames dates = [get_dates(f, fmt=file_date_fmt) for f in file_list] if sort_files: @@ -1075,6 +1085,22 @@ def _parse_vrt_file(vrt_file): return filepaths, sds +def _sanitize_remote_url(file: Filename) -> Filename: + """Restore the missing slash on URLs that Path() has collapsed. + + ``Path("https://foo")`` round-trips to ``"https:/foo"`` because pathlib + collapses consecutive slashes. Return the URL as a plain string so the + double slash is preserved (wrapping it back in Path would collapse again). + """ + import re + + s = str(file) + fixed = re.sub(r"^(https?|s3):/(?!/)", r"\1://", s) + if fixed == s and not is_remote_url(s): + return file + return fixed + + def _assert_images_same_size(files): """Ensure all files are the same size.""" with ThreadPoolExecutor(5) as executor: diff --git a/src/dolphin/utils.py b/src/dolphin/utils.py index ce980ae7..b6f5e306 100644 --- a/src/dolphin/utils.py +++ b/src/dolphin/utils.py @@ -79,6 +79,8 @@ def _get_path_from_gdal_str(name: Filename) -> Path: def _resolve_gdal_path(gdal_str: Filename) -> Filename: """Resolve the file portion of a gdal-openable string to an absolute path.""" + from opera_utils import is_remote_url + s_clean = str(gdal_str).lstrip('"').lstrip("'").rstrip('"').rstrip("'") prefixes = ["DERIVED_SUBDATASET", "NETCDF", "HDF"] is_gdal_str = any(s_clean.upper().startswith(pre) for pre in prefixes) @@ -86,6 +88,9 @@ def _resolve_gdal_path(gdal_str: Filename) -> Filename: # strip quotes to add back in after file_part = file_part.strip('"').strip("'") + # Skip remote URLs — Path.resolve() would prepend the cwd to them. + if is_remote_url(file_part): + return gdal_str file_part_resolved = Path(file_part).resolve() resolved = s_clean.replace(file_part, str(file_part_resolved)) return Path(resolved) if not is_gdal_str else resolved diff --git a/src/dolphin/workflows/displacement.py b/src/dolphin/workflows/displacement.py index 107767bc..5d0a246c 100755 --- a/src/dolphin/workflows/displacement.py +++ b/src/dolphin/workflows/displacement.py @@ -82,14 +82,35 @@ def _prepare_grouped_inputs(cfg: DisplacementWorkflow) -> _GroupedInputs: bounds mask inside ``wrapped_phase._get_mask`` clips them per block automatically when bounds are set. """ + import re + + def _normalize_url(path: Path) -> Path: + # Fix malformed URLs missing one slash (e.g., https:/foo -> https://foo). + path_str = str(path) + fixed = re.sub(r"^(https?|s3):/(?!/)", r"\1://", path_str) + return Path(fixed) if fixed != path_str else path + + cfg.cslc_file_list = [_normalize_url(p) for p in cfg.cslc_file_list] + if cfg.amplitude_dispersion_files: + cfg.amplitude_dispersion_files = [ + _normalize_url(p) for p in cfg.amplitude_dispersion_files + ] + if cfg.amplitude_mean_files: + cfg.amplitude_mean_files = [_normalize_url(p) for p in cfg.amplitude_mean_files] + if cfg.layover_shadow_mask_files: + cfg.layover_shadow_mask_files = [ + _normalize_url(p) for p in cfg.layover_shadow_mask_files + ] + is_opera_burst_mode = True try: grouped_slc_files = group_by_burst(cfg.cslc_file_list) block_bounds: dict[str, BlockBounds] = {} except ValueError as e: - if "Could not parse burst id" not in str(e): - raise - # Non-OPERA-burst inputs (e.g. NISAR full-frame GSLCs). + # Could not parse burst id or other ValueError - fall back to non-OPERA mode + logger.warning( + f"Unable to parse OPERA bursts ({e}), falling back to non-OPERA mode" + ) is_opera_burst_mode = False if cfg.input_options.azimuth_blocks > 1: # Split the single frame into N synthetic "bursts" for parallelism. @@ -209,7 +230,7 @@ def run( # ###################################### # 1. Burst-wise Wrapped phase estimation # ###################################### - logger.info(f"Found SLC files from {len(grouped_slc_files)} bursts") + logger.info(f"Found SLC files from {len(grouped_slc_files)} bursts/blocks") wrapped_phase_cfgs = [ ( burst, # Include the burst for logging purposes From a4632721036fd5b42f23dc4af74a640dcc15ec49 Mon Sep 17 00:00:00 2001 From: mirzaees Date: Wed, 3 Jun 2026 15:34:33 -0700 Subject: [PATCH 04/11] remove extra lines --- src/dolphin/io/_core.py | 55 +++-------------------------------------- 1 file changed, 4 insertions(+), 51 deletions(-) diff --git a/src/dolphin/io/_core.py b/src/dolphin/io/_core.py index d472ab04..b4d6ff26 100644 --- a/src/dolphin/io/_core.py +++ b/src/dolphin/io/_core.py @@ -308,57 +308,12 @@ def _is_nisar_h5(filename: Filename) -> bool: return s.endswith(".h5") and Path(s).name.upper().startswith("NISAR_") -# def read_nisar_grid_metadata( -# filename: Filename, subdataset: str -# ) -> tuple[int, int, tuple[float, ...], int, str]: -# """Read NISAR GSLC grid metadata via h5py. - -# NISAR's GSLC files store the projection in a sibling ``projection`` -# dataset and grid coordinates in ``xCoordinates`` / ``yCoordinates`` -# (cell centers) inside the same group as the polarization dataset. -# GDAL's HDF5 driver doesn't expose any of this — it returns an -# identity geotransform and empty projection — so the only reliable -# path is to read it directly. - -# Honors the data dataset's ``grid_mapping`` attribute when present; -# falls back to the conventional ``projection`` dataset name. - -# Returns ``(nx, ny, geotransform, epsg, projection_wkt)``. -# """ -# from pathlib import PurePosixPath - -# from osgeo import osr - -# grid_path = str(PurePosixPath(subdataset).parent) -# with h5py.File(str(filename), "r") as f: -# dset = f[subdataset] -# proj_name = dset.attrs.get("grid_mapping", "projection") -# if isinstance(proj_name, bytes): -# proj_name = proj_name.decode() -# proj_raw = f[f"{grid_path}/{proj_name}"][()] -# epsg = int(proj_raw.decode()) if isinstance(proj_raw, bytes) else int(proj_raw) -# x_coords = f[f"{grid_path}/xCoordinates"][:] -# y_coords = f[f"{grid_path}/yCoordinates"][:] -# dx = float(f[f"{grid_path}/xCoordinateSpacing"][()]) -# dy = float(f[f"{grid_path}/yCoordinateSpacing"][()]) -# # Cell-center coords -> upper-left edge: back off half a pixel. -# gt = ( -# float(x_coords[0]) - dx / 2.0, -# dx, -# 0.0, -# float(y_coords[0]) - dy / 2.0, -# 0.0, -# dy, -# ) -# srs = osr.SpatialReference() -# srs.ImportFromEPSG(epsg) -# return len(x_coords), len(y_coords), gt, epsg, srs.ExportToWkt() - - def read_nisar_grid_metadata( filename: Filename, subdataset: str ) -> tuple[int, int, tuple[float, ...], int, str]: - """Read NISAR GSLC grid metadata via h5py (local) or earthaccess+h5netcdf (remote). + """Read NISAR GSLC grid metadata via h5py (local) or. + + earthaccess+h5netcdf (remote). NISAR's GSLC files store the projection in a sibling ``projection`` dataset and grid coordinates in ``xCoordinates`` / ``yCoordinates`` @@ -400,9 +355,7 @@ def read_nisar_grid_metadata( if isinstance(proj_name, bytes): proj_name = proj_name.decode() proj_raw = f[f"{grid_path}/{proj_name}"][()] - epsg = ( - int(proj_raw.decode()) if isinstance(proj_raw, bytes) else int(proj_raw) - ) + epsg = int(proj_raw.decode()) if isinstance(proj_raw, bytes) else int(proj_raw) x_coords = f[f"{grid_path}/xCoordinates"][:] y_coords = f[f"{grid_path}/yCoordinates"][:] dx = float(f[f"{grid_path}/xCoordinateSpacing"][()]) From cab9b5eeea27494847ffaf1be771f1ef8c63c2d2 Mon Sep 17 00:00:00 2001 From: Sara Mirzaee Date: Wed, 3 Jun 2026 21:50:59 -0400 Subject: [PATCH 05/11] add missing tests --- tests/test_goldstein.py | 78 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 tests/test_goldstein.py diff --git a/tests/test_goldstein.py b/tests/test_goldstein.py new file mode 100644 index 00000000..900e266e --- /dev/null +++ b/tests/test_goldstein.py @@ -0,0 +1,78 @@ +import numpy as np +import pytest + +from dolphin.goldstein import goldstein + + +@pytest.fixture +def smooth_complex(): + """A smooth, fully-valid complex interferogram patch.""" + ramp = np.linspace(0, 6, 64)[:, None] * np.ones((64, 64)) + return np.exp(1j * ramp).astype(np.complex64) + + +def test_output_shape_and_dtype_complex(smooth_complex): + out = goldstein(smooth_complex, alpha=0.5, psize=32) + assert out.shape == smooth_complex.shape + assert np.iscomplexobj(out) + + +def test_real_phase_input_returns_complex(smooth_complex): + """Real (float) phase input is wrapped to complex before filtering.""" + phase = np.angle(smooth_complex).astype(np.float64) + out = goldstein(phase, alpha=0.5, psize=32) + assert out.shape == phase.shape + assert np.iscomplexobj(out) + + +def test_alpha_zero_is_near_identity(smooth_complex): + """alpha=0 disables spectral weighting, so the overlap-add reconstructs input.""" + out = goldstein(smooth_complex, alpha=0.0, psize=32) + np.testing.assert_allclose(out, smooth_complex, atol=1e-5) + + +def test_negative_alpha_raises(smooth_complex): + with pytest.raises(ValueError, match="alpha must be >= 0"): + goldstein(smooth_complex, alpha=-0.1, psize=32) + + +def test_all_zero_returned_unchanged(): + data = np.zeros((40, 40), dtype=np.complex64) + out = goldstein(data, alpha=0.5, psize=32) + assert np.array_equal(out, data) + + +def test_all_nan_returned_unchanged(): + data = np.full((40, 40), np.nan, dtype=np.complex64) + out = goldstein(data, alpha=0.5, psize=32) + assert out.shape == data.shape + assert np.all(np.isnan(out)) + + +def test_empty_pixels_zeroed_in_output(smooth_complex): + """Zero-magnitude (invalid) input pixels are forced back to 0 on output.""" + data = smooth_complex.copy() + data[0, 0] = 0 + data[5, 7] = 0 + out = goldstein(data, alpha=0.5, psize=32) + assert out[0, 0] == 0 + assert out[5, 7] == 0 + + +def test_increasing_alpha_changes_output(smooth_complex): + """Higher alpha applies stronger filtering, producing a different result.""" + out_low = goldstein(smooth_complex, alpha=0.1, psize=32) + out_high = goldstein(smooth_complex, alpha=0.9, psize=32) + assert not np.allclose(out_low, out_high) + + +def test_non_divisible_shape_preserved(): + """Input dims that are not a multiple of psize are padded and cropped back.""" + rng = np.random.default_rng(42) + data = (rng.standard_normal((50, 37)) + 1j * rng.standard_normal((50, 37))).astype( + np.complex64 + ) + out = goldstein(data, alpha=0.5, psize=32) + assert out.shape == (50, 37) + # Fully valid input -> no pixel forced to zero by the empty mask + assert np.all(np.isfinite(out)) From 45aafef402287442395771a71eae7e77782bd51d Mon Sep 17 00:00:00 2001 From: mirzaees Date: Thu, 4 Jun 2026 11:26:06 -0700 Subject: [PATCH 06/11] update opera_utils --- conda-env.yml | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conda-env.yml b/conda-env.yml index 80959ab9..655ff39c 100644 --- a/conda-env.yml +++ b/conda-env.yml @@ -13,7 +13,7 @@ dependencies: - jax>=0.4.19 - numba>=0.56 - numpy>=1.23 - - opera-utils>=0.4.1 + - opera-utils>=0.25.7 - pydantic>=2.1 - pyproj>=3.3 - rasterio>=1.3 diff --git a/requirements.txt b/requirements.txt index f8f4ffe3..221b268a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ h5py>=3.6 jax>=0.4.19 numba>=0.56 numpy>=1.23 -opera-utils>=0.4.1 +opera-utils>=0.25.7 pydantic>=2.1 pyproj>=3.3 rasterio>=1.3 From 873dee39cecb54b5e935b1e57d022158d00c598e Mon Sep 17 00:00:00 2001 From: mirzaees Date: Thu, 4 Jun 2026 11:55:23 -0700 Subject: [PATCH 07/11] use github version for opera_utils --- conda-env.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/conda-env.yml b/conda-env.yml index 655ff39c..c7f2795e 100644 --- a/conda-env.yml +++ b/conda-env.yml @@ -13,7 +13,6 @@ dependencies: - jax>=0.4.19 - numba>=0.56 - numpy>=1.23 - - opera-utils>=0.25.7 - pydantic>=2.1 - pyproj>=3.3 - rasterio>=1.3 @@ -23,3 +22,5 @@ dependencies: - tqdm>=4.60 - typing_extensions>=3.10 - tyro>=0.9.20 + - pip: + - git+https://github.com/opera-adt/opera-utils.git@main \ No newline at end of file From f83b140fb65382ba625aa1dc1c0aa9cd53de4d87 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 4 Jun 2026 18:55:53 +0000 Subject: [PATCH 08/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- conda-env.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda-env.yml b/conda-env.yml index c7f2795e..02d61af7 100644 --- a/conda-env.yml +++ b/conda-env.yml @@ -23,4 +23,4 @@ dependencies: - typing_extensions>=3.10 - tyro>=0.9.20 - pip: - - git+https://github.com/opera-adt/opera-utils.git@main \ No newline at end of file + - git+https://github.com/opera-adt/opera-utils.git@main From b3ad4d0fad9acd3ac8c94bcd9a9deeab6aea9fde Mon Sep 17 00:00:00 2001 From: mirzaees Date: Thu, 4 Jun 2026 12:07:57 -0700 Subject: [PATCH 09/11] add aiohttp to dependancies --- conda-env.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/conda-env.yml b/conda-env.yml index 02d61af7..94bd600e 100644 --- a/conda-env.yml +++ b/conda-env.yml @@ -2,6 +2,7 @@ name: dolphin-env channels: - conda-forge dependencies: + - aiohttp - python>=3.9 - pip>=21.3 # https://pip.pypa.io/en/stable/reference/build-system/pyproject-toml/#editable-installation - git # for pip install, due to setuptools_scm From ad5a383adc5e14b0cf8cd87c382fe0e1a5ee74f1 Mon Sep 17 00:00:00 2001 From: mirzaees Date: Thu, 4 Jun 2026 12:11:11 -0700 Subject: [PATCH 10/11] add s3fs to dependancies --- conda-env.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/conda-env.yml b/conda-env.yml index 94bd600e..40c989f2 100644 --- a/conda-env.yml +++ b/conda-env.yml @@ -18,6 +18,7 @@ dependencies: - pyproj>=3.3 - rasterio>=1.3 - ruamel.yaml>=0.15 + - s3fs - scipy>=1.12 # "scipy 0.16+ is required for linear algebra", numba. 1.9 is the oldest version working with jax=0.4.19 - threadpoolctl>=3.0 - tqdm>=4.60 From c02a8886c377c836b372765cf1228a5a5a177a83 Mon Sep 17 00:00:00 2001 From: mirzaees Date: Thu, 4 Jun 2026 12:13:24 -0700 Subject: [PATCH 11/11] revert adding dependencies --- conda-env.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/conda-env.yml b/conda-env.yml index 40c989f2..02d61af7 100644 --- a/conda-env.yml +++ b/conda-env.yml @@ -2,7 +2,6 @@ name: dolphin-env channels: - conda-forge dependencies: - - aiohttp - python>=3.9 - pip>=21.3 # https://pip.pypa.io/en/stable/reference/build-system/pyproject-toml/#editable-installation - git # for pip install, due to setuptools_scm @@ -18,7 +17,6 @@ dependencies: - pyproj>=3.3 - rasterio>=1.3 - ruamel.yaml>=0.15 - - s3fs - scipy>=1.12 # "scipy 0.16+ is required for linear algebra", numba. 1.9 is the oldest version working with jax=0.4.19 - threadpoolctl>=3.0 - tqdm>=4.60