Skip to content

Commit a68da80

Browse files
committed
initial import of tools for cesm inputdata
1 parent ae42715 commit a68da80

2 files changed

Lines changed: 297 additions & 0 deletions

File tree

relink.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import os
2+
import shutil
3+
import pwd
4+
5+
def find_and_replace_owned_files(source_dir, target_dir, username):
6+
"""
7+
Finds files owned by a specific user in a source directory tree,
8+
deletes them, and replaces them with symbolic links to the same
9+
relative path in a target directory tree.
10+
11+
Args:
12+
source_dir (str): The root of the directory tree to search for files.
13+
target_dir (str): The root of the directory tree containing the new files.
14+
username (str): The name of the user whose files will be processed.
15+
"""
16+
source_dir = os.path.abspath(source_dir)
17+
target_dir = os.path.abspath(target_dir)
18+
19+
# Get the user ID (UID) for the specified username
20+
try:
21+
user_uid = pwd.getpwnam(username).pw_uid
22+
except KeyError:
23+
print(f"Error: User '{username}' not found. Exiting.")
24+
return
25+
26+
print(f"Searching for files owned by '{username}' (UID: {user_uid}) in '{source_dir}'...")
27+
28+
for dirpath, dirnames, filenames in os.walk(source_dir):
29+
for filename in filenames:
30+
file_path = os.path.join(dirpath, filename)
31+
32+
# Use os.stat().st_uid to get the file's owner UID
33+
try:
34+
if os.path.islink(file_path):
35+
print(f"Skipping symlink: {file_path}")
36+
continue
37+
38+
file_uid = os.stat(file_path).st_uid
39+
except FileNotFoundError:
40+
continue # Skip if file was deleted during traversal
41+
42+
if file_uid == user_uid:
43+
print(f"Found owned file: {file_path}")
44+
45+
# Determine the relative path and the new link's destination
46+
relative_path = os.path.relpath(file_path, source_dir)
47+
link_target = os.path.join(target_dir, relative_path)
48+
49+
# Check if the target file actually exists
50+
if not os.path.exists(link_target):
51+
print(f"Warning: Corresponding file not found in '{target_dir}' for '{file_path}'. Skipping.")
52+
continue
53+
54+
# Get the link name
55+
link_name = file_path
56+
57+
# Remove the original file
58+
try:
59+
os.rename(link_name, link_name+".tmp")
60+
print(f"Deleted original file: {link_name}")
61+
except OSError as e:
62+
print(f"Error deleting file {link_name}: {e}. Skipping.")
63+
continue
64+
65+
# Create the symbolic link, handling necessary parent directories
66+
try:
67+
# Create parent directories for the link if they don't exist
68+
os.makedirs(os.path.dirname(link_name), exist_ok=True)
69+
os.symlink(link_target, link_name)
70+
os.remove(link_name+".tmp")
71+
print(f"Created symbolic link: {link_name} -> {link_target}")
72+
except OSError as e:
73+
os.rename(link_name+".tmp", link_name)
74+
print(f"Error creating symlink for {link_name}: {e}. Skipping.")
75+
76+
if __name__ == '__main__':
77+
# --- Configuration ---
78+
# Replace these with your actual directories and username
79+
source_root = '/glade/campaign/cesm/cesmdata/cseg/inputdata/'
80+
target_root = '/glade/campaign/collections/gdex/data/d651077/cesmdata/inputdata/'
81+
my_username = os.environ['USER']
82+
83+
# --- Execution ---
84+
find_and_replace_owned_files(source_root, target_root, my_username)
85+
86+

rimport

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
#!/glade/u/apps/derecho/24.12/opt/view/bin/python
2+
"""
3+
A drop-in CLI replacement for the legacy `rimport` csh tool.
4+
5+
This script preserves the original command-line interface:
6+
7+
SYNOPSIS
8+
rimport [-file filename] [-list filelist] [-inputdata inputdata_dir] [-help]
9+
10+
OPTIONS
11+
-file filename
12+
provide a single filename relative to the top inputdata directory
13+
-list filelist
14+
provide a file that contains a list of filenames to import. all filenames
15+
in the list are relative to the top inputdata area.
16+
-inputdata inputdata_dir
17+
change the default local top level inputdata directory
18+
-help
19+
get help about this tool
20+
21+
Customize the `do_new_action(path)` function to implement the new behavior.
22+
By default, it prints which files would be processed.
23+
"""
24+
from __future__ import annotations
25+
26+
import argparse
27+
import os
28+
import pwd
29+
import shutil
30+
import sys
31+
from pathlib import Path
32+
from typing import Iterable, List
33+
34+
35+
class PlainHelpFormatter(argparse.RawTextHelpFormatter):
36+
pass
37+
38+
39+
def build_parser() -> argparse.ArgumentParser:
40+
synopsis = (
41+
"rimport [-file filename] [-list filelist] [-inputdata inputdata_dir] [-help]"
42+
)
43+
44+
description = (
45+
"SYNOPSIS\n"
46+
f" {synopsis}\n\n"
47+
"OPTIONS\n"
48+
" -file filename\n"
49+
" provide a single filename relative to the top inputdata directory\n"
50+
" -list filelist\n"
51+
" provide a file that contains a list of filenames to import. all filenames\n"
52+
" in the list are relative to the top inputdata area.\n"
53+
" -inputdata inputdata_dir\n"
54+
" change the default local top level inputdata directory\n"
55+
" -help\n"
56+
" get help about this tool\n"
57+
)
58+
59+
parser = argparse.ArgumentParser(
60+
prog="rimport",
61+
description=description,
62+
formatter_class=PlainHelpFormatter,
63+
add_help=False, # preserve original -help only
64+
usage=synopsis,
65+
)
66+
67+
# Mutually exclusive: -file or -list (one required)
68+
group = parser.add_mutually_exclusive_group(required=True)
69+
group.add_argument("-file", dest="file", metavar="filename", help=argparse.SUPPRESS)
70+
group.add_argument("-list", dest="filelist", metavar="filelist", help=argparse.SUPPRESS)
71+
72+
parser.add_argument(
73+
"-inputdata",
74+
dest="inputdata",
75+
metavar="inputdata_dir",
76+
default=os.path.join("/glade","campaign","cesm","cesmdata","cseg","inputdata"),
77+
help=argparse.SUPPRESS,
78+
)
79+
80+
# Provide -help to mirror legacy behavior (no -h)
81+
parser.add_argument("-help", action="help", help=argparse.SUPPRESS)
82+
83+
return parser
84+
85+
86+
def read_filelist(list_path: Path) -> List[str]:
87+
"""Read list file, ignoring blank lines and comments starting with #."""
88+
lines: List[str] = []
89+
with list_path.open("r", encoding="utf-8") as f:
90+
for raw in f:
91+
line = raw.strip()
92+
if not line or line.startswith("#"):
93+
continue
94+
lines.append(line)
95+
return lines
96+
97+
98+
def resolve_paths(root: Path, relnames: Iterable[str]) -> List[Path]:
99+
paths: List[Path] = []
100+
for name in relnames:
101+
p = (root / name).resolve() if not Path(name).is_absolute() else Path(name).resolve()
102+
paths.append(p)
103+
return paths
104+
105+
def stage_data(src: Path, inputdata_root: Path, staging_root: Path) -> None:
106+
"""Stage a file by mirroring its path under `staging_root`.
107+
Destination path is computed by replacing the `args.inputdata` prefix of `src`
108+
with `staging_root`, i.e.:
109+
dst = staging_root / src.relative_to(inputdata_root)
110+
111+
Guardrails:
112+
* Raise if `src` is a *live* symlink ("already published").
113+
* Raise if `src` is a broken symlink or is outside the inputdata root.
114+
"""
115+
if src.is_symlink() and src.exists():
116+
raise RuntimeError("File is already published.")
117+
if src.is_symlink() and not src.exists():
118+
raise RuntimeError(f"Source is a broken symlink: {src}")
119+
if not src.exists():
120+
raise FileNotFoundError(f"source not found: {src}")
121+
122+
try:
123+
rel = src.resolve().relative_to(inputdata_root.resolve())
124+
except ValueError:
125+
if "d651077" in str(src):
126+
raise RuntimeError(f"Source file {src.name} is already published.")
127+
else:
128+
raise RuntimeError(f"source not under inputdata root: {src} not in {inputdata_root}")
129+
130+
dst = staging_root / rel
131+
dst.parent.mkdir(parents=True, exist_ok=True)
132+
shutil.copy2(src, dst)
133+
print(f"[rimport] staged {src} -> {dst}")
134+
135+
def ensure_running_as(target_user: str, argv: list[str]) -> None:
136+
"""If not running as `target_user`, re-exec via sudo -u target_user (handles 2FA via PAM)."""
137+
try:
138+
target_uid = pwd.getpwnam(target_user).pw_uid
139+
except KeyError:
140+
print(f"rimport: target user '{target_user}' not found on this system", file=sys.stderr)
141+
raise SystemExit(2)
142+
143+
if os.geteuid() != target_uid:
144+
if not sys.stdin.isatty():
145+
print("rimport: need interactive TTY to authenticate as 'cesmdata' (2FA).\n"
146+
" Try: sudo -u cesmdata rimport …", file=sys.stderr)
147+
raise SystemExit(2)
148+
# Re-exec under target user; this invokes sudo’s normal password/2FA flow.
149+
os.execvp("sudo", ["sudo", "-u", target_user, "--"] + argv)
150+
151+
def safe_mvandlink(src: Path, dst: Path) -> None:
152+
dst.parent.mkdir(parents=True, exist_ok=True)
153+
# Move (handles cross-filesystem with copy2+remove under the hood)
154+
# This preserves metadata similarly to copy2 when crossing devices.
155+
moved_to = Path(shutil.move(str(src), str(final_dst)))
156+
# Create the symlink at the original src path
157+
link_target = str(moved_to)
158+
os.symlink(link_target, src)
159+
160+
def get_staging_root() -> Path:
161+
"""Return the staging root. Uses $RIMPORT_STAGING if set, otherwise
162+
creates a sibling directory named '<inputdata_root>.staging'."""
163+
env = os.getenv("RIMPORT_STAGING")
164+
if env:
165+
return Path(env).expanduser().resolve()
166+
return Path("/glade/campaign/collections/gdex/data/d651077/cesmdata/inputdata")
167+
168+
169+
def main(argv: List[str] | None = None) -> int:
170+
parser = build_parser()
171+
args = parser.parse_args(argv)
172+
173+
# Ensure we are running as the cesmdata account before touching the tree
174+
# Comment out the next line if you prefer to run `sudox -u cesmdata rimport …` explicitly.
175+
ensure_running_as("cesmdata", sys.argv)
176+
177+
root = Path(args.inputdata).expanduser().resolve()
178+
if not root.exists():
179+
print(f"rimport: inputdata directory does not exist: {root}", file=sys.stderr)
180+
return 2
181+
182+
# Determine the list of relative filenames to handle
183+
if args.file is not None:
184+
relnames = [args.file]
185+
else:
186+
list_path = Path(args.filelist).expanduser().resolve()
187+
if not list_path.exists():
188+
print(f"rimport: list file not found: {list_path}", file=sys.stderr)
189+
return 2
190+
relnames = read_filelist(list_path)
191+
if not relnames:
192+
print(f"rimport: no filenames found in list: {list_path}", file=sys.stderr)
193+
return 2
194+
195+
# Resolve to full paths (keep accepting absolute names too)
196+
paths = resolve_paths(root, relnames)
197+
staging_root = get_staging_root()
198+
# Execute the new action per file
199+
errors = 0
200+
for p in paths:
201+
try:
202+
stage_data(p, root, staging_root)
203+
except Exception as e: # Keep CLI robust for batch runs
204+
errors += 1
205+
print(f"rimport: error processing {p}: {e}", file=sys.stderr)
206+
207+
return 0 if errors == 0 else 1
208+
209+
210+
if __name__ == "__main__":
211+
raise SystemExit(main())

0 commit comments

Comments
 (0)