@@ -35,7 +35,7 @@ def always(self, message, *args, **kwargs):
3535logging .Logger .always = always
3636
3737
38- def _handle_non_dir_entry (entry , user_uid ):
38+ def _handle_non_dir_entry (entry : os . DirEntry , user_uid : int ):
3939 """
4040 Check if a non-directory entry is owned by the user and should be processed.
4141
@@ -61,6 +61,38 @@ def _handle_non_dir_entry(entry, user_uid):
6161 return None
6262
6363
64+ def _handle_non_dir_str (path : str , user_uid : int ):
65+ """
66+ Check if a non-directory string is owned by the user and should be processed. This should only
67+ ever be needed if the user specified a file to process on the command line. Because we don't
68+ expect users to process large numbers of files at once in this way, it's okay if this function
69+ isn't performance-optimized.
70+
71+ Args:
72+ path (str): A filesystem path.
73+ user_uid (int): The UID of the user whose files to find.
74+
75+ Returns:
76+ str or None: The absolute path to the file if it's owned by the user
77+ and is a regular file (not a symlink), otherwise None.
78+ """
79+ # Is this even owned by the user?
80+ if os .stat (path , follow_symlinks = False ).st_uid == user_uid :
81+
82+ is_file = os .path .isfile (path )
83+ is_symlink = os .path .islink (path )
84+
85+ # Log about skipping symlinks
86+ if is_symlink :
87+ logger .debug ("Skipping symlink: %s" , path )
88+
89+ # Return if it's a file (and not a symlink)
90+ elif is_file :
91+ return path
92+
93+ return None
94+
95+
6496def handle_non_dir (var , user_uid , inputdata_root ):
6597 """
6698 Check if a non-directory is owned by the user and should be processed. Passes var to a
@@ -79,14 +111,22 @@ def handle_non_dir(var, user_uid, inputdata_root):
79111 TypeError: If var is not a DirEntry-like object.
80112 ValueError: If the file path is not under inputdata_root.
81113 """
114+ logger .debug ("starting handle_non_dir()" )
115+
116+ # Handle a variable of type str.
117+ if isinstance (var , str ):
118+ logger .debug ("isinstance(var, str)" )
119+ file_path = _handle_non_dir_str (var , user_uid )
82120
121+ # Handle a variable of type like os.DirEntry.
83122 # Fall back to duck typing: If var has the required DirEntry methods and members, treat it as a
84123 # DirEntry. This is necessary for this conditional to work with the MockDirEntry type used in
85124 # testing. ("If it looks, walks, and quacks like a duck...")
86- if isinstance (var , os .DirEntry ) or all (
125+ elif isinstance (var , os .DirEntry ) or all (
87126 hasattr (var , m ) for m in ["stat" , "is_file" , "is_symlink" , "path" ]
88127 ):
89128 file_path = _handle_non_dir_entry (var , user_uid )
129+
90130 else :
91131 raise TypeError (
92132 f"Unsure how to handle non-directory variable of type { type (var )} "
@@ -101,15 +141,15 @@ def handle_non_dir(var, user_uid, inputdata_root):
101141 return file_path
102142
103143
104- def find_owned_files_scandir (directory , user_uid , inputdata_root = DEFAULT_SOURCE_ROOT ):
144+ def find_owned_files_scandir (item , user_uid , inputdata_root = DEFAULT_SOURCE_ROOT ):
105145 """
106146 Efficiently find all files owned by a specific user using os.scandir().
107147
108148 This is more efficient than os.walk() because os.scandir() caches stat
109149 information during directory traversal, reducing system calls.
110150
111151 Args:
112- directory (str): The root directory to search.
152+ item (str): The root directory to search, or the file to check .
113153 user_uid (int): The UID of the user whose files to find.
114154 inputdata_root (str): The root of the directory tree containing CESM input data.
115155
@@ -120,7 +160,7 @@ def find_owned_files_scandir(directory, user_uid, inputdata_root=DEFAULT_SOURCE_
120160 ValueError: If any file found is not under inputdata_root.
121161 """
122162 try :
123- with os .scandir (directory ) as entries :
163+ with os .scandir (item ) as entries :
124164 for entry in entries :
125165 try :
126166 # Recursively process directories (not following symlinks)
@@ -139,26 +179,30 @@ def find_owned_files_scandir(directory, user_uid, inputdata_root=DEFAULT_SOURCE_
139179 logger .debug ("Error accessing %s: %s. Skipping." , entry .path , e )
140180 continue
141181
182+ except NotADirectoryError :
183+ if (file_path := handle_non_dir (item , user_uid , inputdata_root )) is not None :
184+ yield file_path
185+
142186 except (OSError , PermissionError ) as e :
143- logger .debug ("Error accessing %s: %s. Skipping." , directory , e )
187+ logger .warning ("Error accessing %s: %s. Skipping." , item , e )
144188
145189
146190def replace_files_with_symlinks (
147- source_dir , target_dir , username , inputdata_root = DEFAULT_SOURCE_ROOT , dry_run = False
191+ item_to_process , target_dir , username , inputdata_root = DEFAULT_SOURCE_ROOT , dry_run = False
148192):
149193 """
150194 Finds files owned by a specific user in a source directory tree,
151195 deletes them, and replaces them with symbolic links to the same
152196 relative path in a target directory tree.
153197
154198 Args:
155- source_dir (str): The root of the directory tree to search for files .
199+ item_to_process (str): The root directory to search, or the file to process .
156200 target_dir (str): The root of the directory tree containing the new files.
157201 inputdata_root (str): The root of the directory tree containing CESM input data.
158202 username (str): The name of the user whose files will be processed.
159203 dry_run (bool): If True, only show what would be done without making changes.
160204 """
161- source_dir = os .path .abspath (source_dir )
205+ item_to_process = os .path .abspath (item_to_process )
162206 target_dir = os .path .abspath (target_dir )
163207
164208 # Get the user ID (UID) for the specified username
@@ -175,38 +219,38 @@ def replace_files_with_symlinks(
175219 "Searching for files owned by '%s' (UID: %s) in '%s'..." ,
176220 username ,
177221 user_uid ,
178- source_dir ,
222+ item_to_process ,
179223 )
180224
181225 # Use efficient scandir-based search
182- for file_path in find_owned_files_scandir (source_dir , user_uid , inputdata_root ):
183- replace_one_file_with_symlink (
184- source_dir , target_dir , file_path , dry_run = dry_run
185- )
226+ for file_path in find_owned_files_scandir (item_to_process , user_uid , inputdata_root ):
227+ replace_one_file_with_symlink (inputdata_root , target_dir , file_path , dry_run = dry_run )
186228
187229
188- def replace_one_file_with_symlink (source_dir , target_dir , file_path , dry_run = False ):
230+ def replace_one_file_with_symlink (
231+ inputdata_root , target_dir , file_path , dry_run = False
232+ ):
189233 """
190234 Given a file, replaces it with a symbolic link to the same relative path in a target directory
191235 tree.
192236
193237 Args:
194- source_dir (str): The root of the directory tree to search for files .
238+ inputdata_root (str): The root of the directory tree containing CESM input data .
195239 target_dir (str): The root of the directory tree containing the new files.
196240 file_path (str): The path of the file to be replaced.
197241 dry_run (bool): If True, only show what would be done without making changes.
198242 """
199243 logger .info ("Found owned file: %s" , file_path )
200244
201245 # Determine the relative path and the new link's destination
202- relative_path = os .path .relpath (file_path , source_dir )
246+ relative_path = os .path .relpath (file_path , inputdata_root )
203247 link_target = os .path .join (target_dir , relative_path )
204248
205249 # Check if the target file actually exists
206250 if not os .path .exists (link_target ):
207251 logger .warning (
208- "Warning: Corresponding file not found in '%s' for '%s'. Skipping." ,
209- target_dir ,
252+ "Warning: Corresponding file '%s' not found for '%s'. Skipping." ,
253+ link_target ,
210254 file_path ,
211255 )
212256 return
@@ -242,9 +286,9 @@ def replace_one_file_with_symlink(source_dir, target_dir, file_path, dry_run=Fal
242286 logger .error ("Error creating symlink for %s: %s. Skipping." , link_name , e )
243287
244288
245- def validate_directory (path ):
289+ def validate_paths (path , check_is_dir = False ):
246290 """
247- Validate that one or more paths exist and are directories .
291+ Validate that one or more paths exist.
248292
249293 Args:
250294 path (str or list): The path to validate, or a list of such paths.
@@ -253,27 +297,43 @@ def validate_directory(path):
253297 str or list: The absolute path(s) if valid.
254298
255299 Raises:
256- argparse.ArgumentTypeError: If a path doesn't exist or is not a directory .
300+ argparse.ArgumentTypeError: If a path doesn't exist.
257301 """
258302 if isinstance (path , list ):
259303 result = []
260304 for item in path :
261- result .append (validate_directory (item ))
305+ result .append (validate_paths (item , check_is_dir = check_is_dir ))
262306 return result
263307
264308 if not os .path .exists (path ):
265- raise argparse .ArgumentTypeError (f"Directory '{ path } ' does not exist" )
266- if not os .path .isdir (path ):
309+ raise argparse .ArgumentTypeError (f"'{ path } ' does not exist" )
310+ if check_is_dir and not os .path .isdir (path ):
267311 raise argparse .ArgumentTypeError (f"'{ path } ' is not a directory" )
268312 return os .path .abspath (path )
269313
270314
315+ def validate_directory (path ):
316+ """
317+ Validate that one or more directories exist.
318+
319+ Args:
320+ path (str or list): The directory to validate, or a list of such directories.
321+
322+ Returns:
323+ str or list: The absolute path(s) if valid.
324+
325+ Raises:
326+ argparse.ArgumentTypeError: If a path doesn't exist.
327+ """
328+ return validate_paths (path , check_is_dir = True )
329+
330+
271331def parse_arguments ():
272332 """
273333 Parse command-line arguments.
274334
275335 Returns:
276- argparse.Namespace: Parsed arguments containing source_root ,
336+ argparse.Namespace: Parsed arguments containing items_to_process ,
277337 target_root, and verbosity settings.
278338 """
279339 parser = argparse .ArgumentParser (
@@ -282,11 +342,12 @@ def parse_arguments():
282342 )
283343 )
284344 parser .add_argument (
285- "source_root " ,
345+ "items_to_process " ,
286346 nargs = "*" ,
287347 default = DEFAULT_SOURCE_ROOT ,
348+ type = validate_paths ,
288349 help = (
289- f"One or more directories to search for files (default: { DEFAULT_SOURCE_ROOT } )"
350+ f"One or more ( directories to search for) files (default: { DEFAULT_SOURCE_ROOT } )"
290351 ),
291352 )
292353 parser .add_argument (
@@ -356,13 +417,13 @@ def process_args(args):
356417 else :
357418 args .log_level = logging .INFO
358419
359- # Ensure that source_root is a list
360- if hasattr (args , "source_root " ) and not isinstance (args .source_root , list ):
361- args .source_root = [args .source_root ]
420+ # Ensure that items_to_process is a list
421+ if hasattr (args , "items_to_process " ) and not isinstance (args .items_to_process , list ):
422+ args .items_to_process = [args .items_to_process ]
362423
363- # Check that every item in source_root is a child of inputdata_root
364- if hasattr (args , "source_root " ): # Sometimes doesn't if we're testing
365- for item in args .source_root :
424+ # Check that every item in items_to_process is a child of inputdata_root
425+ if hasattr (args , "items_to_process " ): # Sometimes doesn't if we're testing
426+ for item in args .items_to_process :
366427 if not Path (item ).is_relative_to (args .inputdata_root ):
367428 raise argparse .ArgumentTypeError (
368429 f"Item '{ item } ' not under inputdata root '{ args .inputdata_root } '"
@@ -378,6 +439,7 @@ def process_args(args):
378439
379440
380441def main ():
442+ # pylint: disable=missing-function-docstring
381443
382444 args = parse_arguments ()
383445
@@ -388,7 +450,7 @@ def main():
388450 start_time = time .time ()
389451
390452 # --- Execution ---
391- for item in args .source_root :
453+ for item in args .items_to_process :
392454 replace_files_with_symlinks (
393455 item ,
394456 args .target_root ,
0 commit comments