Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion conda-env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ dependencies:
- jax>=0.4.19
- numba>=0.56
- numpy>=1.23
- opera-utils>=0.4.1
- pydantic>=2.1
- pyproj>=3.3
- rasterio>=1.3
Expand All @@ -23,3 +22,5 @@ dependencies:
- tqdm>=4.60
- typing_extensions>=3.10
- tyro>=0.9.20
- pip:
- git+https://github.com/opera-adt/opera-utils.git@main
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ h5py>=3.6
jax>=0.4.19
numba>=0.56
numpy>=1.23
opera-utils>=0.4.1
opera-utils>=0.25.7
pydantic>=2.1
pyproj>=3.3
rasterio>=1.3
Expand Down
28 changes: 24 additions & 4 deletions src/dolphin/io/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,9 @@ def _is_nisar_h5(filename: Filename) -> bool:
def read_nisar_grid_metadata(
filename: Filename, subdataset: str
) -> tuple[int, int, tuple[float, ...], int, str]:
"""Read NISAR GSLC grid metadata via h5py.
"""Read NISAR GSLC grid metadata via h5py (local) or.

earthaccess+h5netcdf (remote).

NISAR's GSLC files store the projection in a sibling ``projection``
dataset and grid coordinates in ``xCoordinates`` / ``yCoordinates``
Expand All @@ -325,12 +327,29 @@ def read_nisar_grid_metadata(

Returns ``(nx, ny, geotransform, epsg, projection_wkt)``.
"""
import re
from pathlib import PurePosixPath

from osgeo import osr
from opera_utils import is_remote_url
from opera_utils._remote import open_h5 as open_remote_h5

file_str = str(filename)
# Normalize malformed URLs missing one slash (e.g., https:/ -> https://)
file_str = re.sub(r"^(https?|s3):/(?!/)", r"\1://", file_str)
grid_path = str(PurePosixPath(subdataset).parent)
with h5py.File(str(filename), "r") as f:
from osgeo import osr

if is_remote_url(file_str):
if file_str.startswith("s3://"):
raise ValueError(
"Direct S3 links are not supported out-of-region."
" Please pass the HTTPS URL equivalent."
)
h5_open = open_remote_h5(file_str)
else:
h5_open = h5py.File(file_str, "r")

with h5_open as f:
dset = f[subdataset]
proj_name = dset.attrs.get("grid_mapping", "projection")
if isinstance(proj_name, bytes):
Expand All @@ -341,7 +360,8 @@ def read_nisar_grid_metadata(
y_coords = f[f"{grid_path}/yCoordinates"][:]
dx = float(f[f"{grid_path}/xCoordinateSpacing"][()])
dy = float(f[f"{grid_path}/yCoordinateSpacing"][()])
# Cell-center coords -> upper-left edge: back off half a pixel.

# 3. GeoTransform Calculation (Identical for both local and remote)
gt = (
float(x_coords[0]) - dx / 2.0,
dx,
Expand Down
30 changes: 28 additions & 2 deletions src/dolphin/io/_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import rasterio as rio
import rasterio.windows
from numpy.typing import ArrayLike
from opera_utils import get_dates, sort_files_by_date
from opera_utils import get_dates, is_remote_url, sort_files_by_date
from osgeo import gdal
from tqdm.auto import trange

Expand Down Expand Up @@ -781,9 +781,19 @@ def __init__(
if any(str(f).startswith("s3://") for f in file_list):
files = [S3Path(str(f)) for f in file_list]
elif use_abs_path:
files = [utils._resolve_gdal_path(p) for p in file_list]
# Skip resolving remote URLs (https://, http://) — Path.resolve()
# would prepend the cwd to them.
files = [
p if is_remote_url(p) else utils._resolve_gdal_path(p)
for p in file_list
]
else:
files = list(file_list)

# Sanitize remote URLs: Path() collapses `https://` to `https:/`,
# which GDAL's HDF5 driver cannot resolve. Restore the missing slash.
files = [_sanitize_remote_url(f) for f in files]

# Extract the date/datetimes from the filenames
dates = [get_dates(f, fmt=file_date_fmt) for f in file_list]
if sort_files:
Expand Down Expand Up @@ -1075,6 +1085,22 @@ def _parse_vrt_file(vrt_file):
return filepaths, sds


def _sanitize_remote_url(file: Filename) -> Filename:
"""Restore the missing slash on URLs that Path() has collapsed.

``Path("https://foo")`` round-trips to ``"https:/foo"`` because pathlib
collapses consecutive slashes. Return the URL as a plain string so the
double slash is preserved (wrapping it back in Path would collapse again).
"""
import re

s = str(file)
fixed = re.sub(r"^(https?|s3):/(?!/)", r"\1://", s)
if fixed == s and not is_remote_url(s):
return file
return fixed


def _assert_images_same_size(files):
"""Ensure all files are the same size."""
with ThreadPoolExecutor(5) as executor:
Expand Down
5 changes: 5 additions & 0 deletions src/dolphin/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,18 @@ def _get_path_from_gdal_str(name: Filename) -> Path:

def _resolve_gdal_path(gdal_str: Filename) -> Filename:
"""Resolve the file portion of a gdal-openable string to an absolute path."""
from opera_utils import is_remote_url

s_clean = str(gdal_str).lstrip('"').lstrip("'").rstrip('"').rstrip("'")
prefixes = ["DERIVED_SUBDATASET", "NETCDF", "HDF"]
is_gdal_str = any(s_clean.upper().startswith(pre) for pre in prefixes)
file_part = str(_get_path_from_gdal_str(s_clean))

# strip quotes to add back in after
file_part = file_part.strip('"').strip("'")
# Skip remote URLs — Path.resolve() would prepend the cwd to them.
if is_remote_url(file_part):
return gdal_str
file_part_resolved = Path(file_part).resolve()
resolved = s_clean.replace(file_part, str(file_part_resolved))
return Path(resolved) if not is_gdal_str else resolved
Expand Down
7 changes: 6 additions & 1 deletion src/dolphin/workflows/config/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Annotated, Any, Literal, Optional

import tyro
from opera_utils import is_remote_url
from pydantic import (
BaseModel,
ConfigDict,
Expand Down Expand Up @@ -540,8 +541,12 @@ def _read_file_list_or_glob(cls, value): # noqa: ARG001
filenames = [Path(f) for f in v_path.read_text().splitlines()]

# If given as relative paths, make them relative to the text file
# Skip this for remote URLs (https://, http://, s3://)
parent = v_path.parent
return [parent / f if not f.is_absolute() else f for f in filenames]
return [
parent / f if (not f.is_absolute() and not is_remote_url(f)) else f
for f in filenames
]
else:
msg = f"Input file list {v_path} does not exist or is not a file."
raise ValueError(msg)
Expand Down
9 changes: 6 additions & 3 deletions src/dolphin/workflows/config/_displacement.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import Annotated, Any, Optional

import tyro
from opera_utils import get_burst_id, get_dates, sort_files_by_date
from opera_utils import get_burst_id, get_dates, is_remote_url, sort_files_by_date
from pydantic import (
BaseModel,
ConfigDict,
Expand Down Expand Up @@ -199,8 +199,11 @@ def model_post_init(self, context: Any, /) -> None:

# Ensure outputs from workflow steps are within work directory.
if not self.keep_paths_relative:
# Resolve all CSLC paths:
self.cslc_file_list = [p.resolve(strict=False) for p in self.cslc_file_list]
# Resolve all CSLC paths (skip remote URLs like https://, s3://):
self.cslc_file_list = [
p if is_remote_url(p) else p.resolve(strict=False)
for p in self.cslc_file_list
]

work_dir = self.work_directory
# For each workflow step that has an output folder, move it inside
Expand Down
8 changes: 6 additions & 2 deletions src/dolphin/workflows/config/_ps.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pathlib import Path
from typing import Any

from opera_utils import is_remote_url
from pydantic import ConfigDict, Field, field_validator

from ._common import (
Expand Down Expand Up @@ -62,8 +63,11 @@ def model_post_init(self, context: Any, /) -> None:
super().model_post_init(context)
# Ensure outputs from workflow steps are within work directory.
if not self.keep_paths_relative:
# Resolve all CSLC paths:
self.cslc_file_list = [p.resolve(strict=False) for p in self.cslc_file_list]
# Resolve all CSLC paths (skip remote URLs like https://, s3://):
self.cslc_file_list = [
p if is_remote_url(p) else p.resolve(strict=False)
for p in self.cslc_file_list
]

work_dir = self.work_directory
# move the folders inside the work directory
Expand Down
29 changes: 25 additions & 4 deletions src/dolphin/workflows/displacement.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,35 @@ def _prepare_grouped_inputs(cfg: DisplacementWorkflow) -> _GroupedInputs:
bounds mask inside ``wrapped_phase._get_mask`` clips them per block
automatically when bounds are set.
"""
import re

def _normalize_url(path: Path) -> Path:
# Fix malformed URLs missing one slash (e.g., https:/foo -> https://foo).
path_str = str(path)
fixed = re.sub(r"^(https?|s3):/(?!/)", r"\1://", path_str)
return Path(fixed) if fixed != path_str else path

cfg.cslc_file_list = [_normalize_url(p) for p in cfg.cslc_file_list]
if cfg.amplitude_dispersion_files:
cfg.amplitude_dispersion_files = [
_normalize_url(p) for p in cfg.amplitude_dispersion_files
]
if cfg.amplitude_mean_files:
cfg.amplitude_mean_files = [_normalize_url(p) for p in cfg.amplitude_mean_files]
if cfg.layover_shadow_mask_files:
cfg.layover_shadow_mask_files = [
_normalize_url(p) for p in cfg.layover_shadow_mask_files
]

is_opera_burst_mode = True
try:
grouped_slc_files = group_by_burst(cfg.cslc_file_list)
block_bounds: dict[str, BlockBounds] = {}
except ValueError as e:
if "Could not parse burst id" not in str(e):
raise
# Non-OPERA-burst inputs (e.g. NISAR full-frame GSLCs).
# Could not parse burst id or other ValueError - fall back to non-OPERA mode
logger.warning(
f"Unable to parse OPERA bursts ({e}), falling back to non-OPERA mode"
)
is_opera_burst_mode = False
if cfg.input_options.azimuth_blocks > 1:
# Split the single frame into N synthetic "bursts" for parallelism.
Expand Down Expand Up @@ -209,7 +230,7 @@ def run(
# ######################################
# 1. Burst-wise Wrapped phase estimation
# ######################################
logger.info(f"Found SLC files from {len(grouped_slc_files)} bursts")
logger.info(f"Found SLC files from {len(grouped_slc_files)} bursts/blocks")
wrapped_phase_cfgs = [
(
burst, # Include the burst for logging purposes
Expand Down
78 changes: 78 additions & 0 deletions tests/test_goldstein.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import numpy as np
import pytest

from dolphin.goldstein import goldstein


@pytest.fixture
def smooth_complex():
"""A smooth, fully-valid complex interferogram patch."""
ramp = np.linspace(0, 6, 64)[:, None] * np.ones((64, 64))
return np.exp(1j * ramp).astype(np.complex64)


def test_output_shape_and_dtype_complex(smooth_complex):
out = goldstein(smooth_complex, alpha=0.5, psize=32)
assert out.shape == smooth_complex.shape
assert np.iscomplexobj(out)


def test_real_phase_input_returns_complex(smooth_complex):
"""Real (float) phase input is wrapped to complex before filtering."""
phase = np.angle(smooth_complex).astype(np.float64)
out = goldstein(phase, alpha=0.5, psize=32)
assert out.shape == phase.shape
assert np.iscomplexobj(out)


def test_alpha_zero_is_near_identity(smooth_complex):
"""alpha=0 disables spectral weighting, so the overlap-add reconstructs input."""
out = goldstein(smooth_complex, alpha=0.0, psize=32)
np.testing.assert_allclose(out, smooth_complex, atol=1e-5)


def test_negative_alpha_raises(smooth_complex):
with pytest.raises(ValueError, match="alpha must be >= 0"):
goldstein(smooth_complex, alpha=-0.1, psize=32)


def test_all_zero_returned_unchanged():
data = np.zeros((40, 40), dtype=np.complex64)
out = goldstein(data, alpha=0.5, psize=32)
assert np.array_equal(out, data)


def test_all_nan_returned_unchanged():
data = np.full((40, 40), np.nan, dtype=np.complex64)
out = goldstein(data, alpha=0.5, psize=32)
assert out.shape == data.shape
assert np.all(np.isnan(out))


def test_empty_pixels_zeroed_in_output(smooth_complex):
"""Zero-magnitude (invalid) input pixels are forced back to 0 on output."""
data = smooth_complex.copy()
data[0, 0] = 0
data[5, 7] = 0
out = goldstein(data, alpha=0.5, psize=32)
assert out[0, 0] == 0
assert out[5, 7] == 0


def test_increasing_alpha_changes_output(smooth_complex):
"""Higher alpha applies stronger filtering, producing a different result."""
out_low = goldstein(smooth_complex, alpha=0.1, psize=32)
out_high = goldstein(smooth_complex, alpha=0.9, psize=32)
assert not np.allclose(out_low, out_high)


def test_non_divisible_shape_preserved():
"""Input dims that are not a multiple of psize are padded and cropped back."""
rng = np.random.default_rng(42)
data = (rng.standard_normal((50, 37)) + 1j * rng.standard_normal((50, 37))).astype(
np.complex64
)
out = goldstein(data, alpha=0.5, psize=32)
assert out.shape == (50, 37)
# Fully valid input -> no pixel forced to zero by the empty mask
assert np.all(np.isfinite(out))
Loading