@@ -34,7 +34,49 @@ def always(self, message, *args, **kwargs):
3434logging .Logger .always = always
3535
3636
37- def find_and_replace_owned_files (source_dir , target_dir , username , dry_run = False ):
37+ def find_owned_files_scandir (directory , user_uid ):
38+ """
39+ Efficiently find all files owned by a specific user using os.scandir().
40+
41+ This is more efficient than os.walk() because os.scandir() caches stat
42+ information during directory traversal, reducing system calls.
43+
44+ Args:
45+ directory (str): The root directory to search.
46+ user_uid (int): The UID of the user whose files to find.
47+
48+ Yields:
49+ str: Absolute paths to files owned by the user.
50+ """
51+ try :
52+ with os .scandir (directory ) as entries :
53+ for entry in entries :
54+ try :
55+ # Check if it's a file (not following symlinks)
56+ if entry .is_file (follow_symlinks = False ):
57+ # Get stat info (cached by scandir, very efficient)
58+ stat_info = entry .stat (follow_symlinks = False )
59+
60+ if stat_info .st_uid == user_uid :
61+ yield entry .path
62+
63+ # Recursively process directories (not following symlinks)
64+ elif entry .is_dir (follow_symlinks = False ):
65+ yield from find_owned_files_scandir (entry .path , user_uid )
66+
67+ # Skip symlinks
68+ elif entry .is_symlink ():
69+ logger .info ("Skipping symlink: %s" , entry .path )
70+
71+ except (OSError , PermissionError ) as e :
72+ logger .debug ("Error accessing %s: %s. Skipping." , entry .path , e )
73+ continue
74+
75+ except (OSError , PermissionError ) as e :
76+ logger .debug ("Error accessing %s: %s. Skipping." , directory , e )
77+
78+
79+ def replace_files_with_symlinks (source_dir , target_dir , username , dry_run = False ):
3880 """
3981 Finds files owned by a specific user in a source directory tree,
4082 deletes them, and replaces them with symbolic links to the same
@@ -66,70 +108,52 @@ def find_and_replace_owned_files(source_dir, target_dir, username, dry_run=False
66108 source_dir ,
67109 )
68110
69- for dirpath , _ , filenames in os .walk (source_dir ):
70- for filename in filenames :
71- file_path = os .path .join (dirpath , filename )
72-
73- # Use os.stat().st_uid to get the file's owner UID
74- try :
75- if os .path .islink (file_path ):
76- logger .info ("Skipping symlink: %s" , file_path )
77- continue
78-
79- file_uid = os .stat (file_path ).st_uid
80- except FileNotFoundError :
81- continue # Skip if file was deleted during traversal
82-
83- if file_uid == user_uid :
84- logger .info ("Found owned file: %s" , file_path )
85-
86- # Determine the relative path and the new link's destination
87- relative_path = os .path .relpath (file_path , source_dir )
88- link_target = os .path .join (target_dir , relative_path )
89-
90- # Check if the target file actually exists
91- if not os .path .exists (link_target ):
92- logger .warning (
93- "Warning: Corresponding file not found in '%s' "
94- "for '%s'. Skipping." ,
95- target_dir ,
96- file_path ,
97- )
98- continue
99-
100- # Get the link name
101- link_name = file_path
102-
103- if dry_run :
104- logger .info (
105- "[DRY RUN] Would create symbolic link: %s -> %s" ,
106- link_name ,
107- link_target ,
108- )
109- continue
110-
111- # Remove the original file
112- try :
113- os .rename (link_name , link_name + ".tmp" )
114- logger .info ("Deleted original file: %s" , link_name )
115- except OSError as e :
116- logger .error ("Error deleting file %s: %s. Skipping." , link_name , e )
117- continue
118-
119- # Create the symbolic link, handling necessary parent directories
120- try :
121- # Create parent directories for the link if they don't exist
122- os .makedirs (os .path .dirname (link_name ), exist_ok = True )
123- os .symlink (link_target , link_name )
124- os .remove (link_name + ".tmp" )
125- logger .info (
126- "Created symbolic link: %s -> %s" , link_name , link_target
127- )
128- except OSError as e :
129- os .rename (link_name + ".tmp" , link_name )
130- logger .error (
131- "Error creating symlink for %s: %s. Skipping." , link_name , e
132- )
111+ # Use efficient scandir-based search
112+ for file_path in find_owned_files_scandir (source_dir , user_uid ):
113+ logger .info ("Found owned file: %s" , file_path )
114+
115+ # Determine the relative path and the new link's destination
116+ relative_path = os .path .relpath (file_path , source_dir )
117+ link_target = os .path .join (target_dir , relative_path )
118+
119+ # Check if the target file actually exists
120+ if not os .path .exists (link_target ):
121+ logger .warning (
122+ "Warning: Corresponding file not found in '%s' for '%s'. Skipping." ,
123+ target_dir ,
124+ file_path ,
125+ )
126+ continue
127+
128+ # Get the link name
129+ link_name = file_path
130+
131+ if dry_run :
132+ logger .info (
133+ "[DRY RUN] Would create symbolic link: %s -> %s" ,
134+ link_name ,
135+ link_target ,
136+ )
137+ continue
138+
139+ # Remove the original file
140+ try :
141+ os .rename (link_name , link_name + ".tmp" )
142+ logger .info ("Deleted original file: %s" , link_name )
143+ except OSError as e :
144+ logger .error ("Error deleting file %s: %s. Skipping." , link_name , e )
145+ continue
146+
147+ # Create the symbolic link, handling necessary parent directories
148+ try :
149+ # Create parent directories for the link if they don't exist
150+ os .makedirs (os .path .dirname (link_name ), exist_ok = True )
151+ os .symlink (link_target , link_name )
152+ os .remove (link_name + ".tmp" )
153+ logger .info ("Created symbolic link: %s -> %s" , link_name , link_target )
154+ except OSError as e :
155+ os .rename (link_name + ".tmp" , link_name )
156+ logger .error ("Error creating symlink for %s: %s. Skipping." , link_name , e )
133157
134158
135159def validate_directory (path ):
@@ -242,7 +266,7 @@ def main():
242266 start_time = time .time ()
243267
244268 # --- Execution ---
245- find_and_replace_owned_files (
269+ replace_files_with_symlinks (
246270 args .source_root , args .target_root , my_username , dry_run = args .dry_run
247271 )
248272
0 commit comments