77import re
88import shutil
99import subprocess
10+ from dataclasses import dataclass
1011from datetime import datetime
12+ from typing import Any
1113
1214import dlstbx .util .symlink
1315from dlstbx import schemas
1416from dlstbx .wrapper import Wrapper
1517
1618
19+ @dataclass
20+ class PeakData :
21+ density : float
22+ rmsd : float
23+ xyz : tuple [float , float , float ]
24+
25+
1726class MetalIdWrapper (Wrapper ):
1827 _logger_name = "dlstbx.wrap.metal_id"
1928
20- def parse_peak_data (self , peak_data_file ) :
21- peak_data = []
29+ def parse_peak_data (self , peak_data_file : pathlib . Path ) -> list [ PeakData ] :
30+ peak_data : list [ PeakData ] = []
2231 with open (peak_data_file , "r" ) as file :
2332 for line in file :
2433 match = re .match (
25- r"Peak \d+: Electron Density = ([\d.]+) e/Å\^3, RMSD = ([\d.]+), XYZ = \(([\d.]+), ([\d.]+), ([\d.]+)\)" ,
34+ r"Peak \d+: Electron Density = ([\d.]+) e/Å\^3, RMSD = ([\d.]+), XYZ = \((-? [\d.]+), (-? [\d.]+), (-? [\d.]+)\)" ,
2635 line ,
2736 )
2837 if match :
29- electron_density = float (match .group (1 ))
38+ density = float (match .group (1 ))
3039 rmsd = float (match .group (2 ))
3140 xyz = (
3241 float (match .group (3 )),
3342 float (match .group (4 )),
3443 float (match .group (5 )),
3544 )
36- peak_data .append (
37- {"electron_density" : electron_density , "rmsd" : rmsd , "xyz" : xyz }
38- )
45+ peak_data .append (PeakData (density = density , rmsd = rmsd , xyz = xyz ))
3946 return peak_data
4047
4148 def send_results_to_ispyb (
4249 self ,
43- peak_data ,
44- metal_id_command ,
45- dimple_log_file ,
46- results_directory ,
47- start_time ,
48- ):
49- scaling_id = self .params .get ("scaling_id" , [])
50- if len (scaling_id ) != 1 :
51- self .log .info (f"Scaling ID { scaling_id } provided" )
52- self .log .error (
53- "Exactly one scaling_id must be provided - cannot insert metal_id results to ISPyB"
54- )
55- return False
56- scaling_id = scaling_id [0 ]
57-
58- if not dimple_log_file .is_file ():
59- self .log .error (
60- f"dimple log file '{ dimple_log_file } ' not found - cannot insert metal_id results to ISPyB"
61- )
62- return False
50+ peak_data : list [PeakData ],
51+ metal_id_command : str ,
52+ dimple_log_file : pathlib .Path ,
53+ results_directory : pathlib .Path ,
54+ start_time : datetime ,
55+ scaling_id : int ,
56+ ) -> dict [str , Any ]:
6357 self .log .info (
6458 f"Autoproc_prog_id: '{ self .recwrap .environment .get ('ispyb_autoprocprogram_id' )} '"
6559 )
@@ -82,12 +76,12 @@ def send_results_to_ispyb(
8276 blobs = []
8377 for n_peak , peak in enumerate (peak_data , start = 1 ):
8478 self .log .info (
85- f"Adding blob { n_peak } to ispyb results - Density: { peak [ 'electron_density' ] } , rmsd: { peak [ ' rmsd' ] } , xyz: { peak [ ' xyz' ] } "
79+ f"Adding blob { n_peak } to ispyb results - Density: { peak . density } , rmsd: { peak . rmsd } , xyz: { peak . xyz } "
8680 )
8781 blobs .append (
8882 schemas .Blob (
89- xyz = peak [ " xyz" ] ,
90- height = peak [ "electron_density" ] ,
83+ xyz = peak . xyz ,
84+ height = peak . density ,
9185 # nearest_atom=nearest_atom,
9286 # nearest_atom_distance=distance,
9387 map_type = "difference" , # TODO change this to anomalous_difference once enum exists.
@@ -106,20 +100,18 @@ def send_results_to_ispyb(
106100 )
107101
108102 attachments = []
103+
104+ primary_result_files = self .params .get ("primary_result_files" , {})
109105 self .log .info ("Adding attachments for upload to ispyb" )
110106 for f in results_directory .iterdir ():
111- if f .suffix not in [".map" , ".log" , ".py" , ".pha" , ".pdb" , ".dat" ]:
112- self .log .info (f"Skipping file { f .name } " )
113- continue
114- elif f .suffix in [".map" , ".pdb" , ".dat" ]:
115- file_type = "result"
116- importance_rank = 1
117- elif f .suffix in [".pha" , ".mtz" ]:
107+ if f .name in primary_result_files :
108+ file_type = primary_result_files [f .name ]["type" ]
109+ importance_rank = primary_result_files [f .name ]["rank" ]
110+ elif f .suffix in [".map" , ".pdb" , ".dat" , ".pha" , ".mtz" ]:
118111 file_type = "result"
119112 importance_rank = 2
120113 else :
121- file_type = "log"
122- importance_rank = 3
114+ continue
123115
124116 attachments .append (
125117 schemas .Attachment (
@@ -132,6 +124,16 @@ def send_results_to_ispyb(
132124 )
133125 self .log .info (f"Added { f .name } as an attachment" )
134126
127+ if getattr (self , "final_directory" , None ):
128+ for att in attachments :
129+ if att .file_name in primary_result_files :
130+ shutil .copy (att .file_path / att .file_name , self .final_directory )
131+ att .file_path = self .final_directory
132+ for blob in blobs :
133+ if blob .filepath and blob .view1 :
134+ shutil .copy (blob .filepath / blob .view1 , self .final_directory )
135+ blob .filepath = self .final_directory
136+
135137 ispyb_results = {
136138 "mxmrrun" : json .loads (mxmrrun .model_dump_json ()),
137139 "blobs" : [json .loads (blob .model_dump_json ()) for blob in blobs ],
@@ -152,6 +154,15 @@ def run(self):
152154 # Get parameters from the recipe file
153155 self .params = self .recwrap .recipe_step ["job_parameters" ]
154156
157+ scaling_id = self .params .get ("scaling_id" , [])
158+ if len (scaling_id ) != 1 :
159+ self .log .info (f"Scaling ID { scaling_id } provided" )
160+ self .log .error (
161+ "Exactly one scaling_id must be provided - cannot run metal_id"
162+ )
163+ return False
164+ scaling_id = scaling_id [0 ]
165+
155166 src_mtz_files = self .params .get ("data" , [])
156167 if not src_mtz_files :
157168 self .log .error ("Could not identify on what data to run" )
@@ -224,33 +235,55 @@ def run(self):
224235 )
225236
226237 self .log .debug ("Reading in peak data" )
227- peak_data = self .parse_peak_data (output_directory / "found_peaks.dat" )
238+ peak_file = output_directory / "found_peaks.dat"
239+ if not peak_file .is_file ():
240+ self .log .info ("Metal_ID: No peaks found" )
241+ peak_data = []
242+ else :
243+ peak_data = self .parse_peak_data (peak_file )
228244
229245 for f in output_directory .iterdir ():
230- self .log .info (f"Searching for files to copy. Current file is : { f } " )
246+ self .log .debug (f"Searching for files to copy. Current file is : { f } " )
231247 if f .is_dir ():
232248 continue
233249 if f .name .startswith ("." ):
234250 continue
235- if any (f .suffix == skipext for skipext in [".r3d" ]):
236- continue
237- self .log .info ("Copying file" )
251+ self .log .debug ("Copying file" )
238252 shutil .copy (f , results_directory )
239253
240- if self .params .get ("create_symlink" ):
254+ symlink = self .params .get ("create_symlink" )
255+ if isinstance (symlink , list ):
256+ symlink = symlink [0 ]
257+ if symlink :
241258 dlstbx .util .symlink .create_parent_symlink (
242- os .fspath (output_directory ), self . params [ "create_symlink" ]
259+ os .fspath (output_directory ), symlink
243260 )
244261 dlstbx .util .symlink .create_parent_symlink (
245- os .fspath (results_directory ), self . params [ "create_symlink" ]
262+ os .fspath (results_directory ), symlink
246263 )
247264
248265 self .log .info ("Sending results to ISPyB" )
249266
250267 dimple_log = working_directory / "metal_id" / "dimple_below" / "dimple.log"
268+ if not dimple_log .is_file ():
269+ self .log .error (
270+ f"dimple log file '{ dimple_log } ' not found - cannot insert metal_id results to ISPyB"
271+ )
272+ return False
273+
274+ if pipeine_final_params := self .params .get ("pipeline-final" , []):
275+ self .final_directory = pathlib .Path (pipeine_final_params ["path" ])
276+ self .final_directory .mkdir (parents = True , exist_ok = True )
277+ if self .params .get ("create_symlink" ):
278+ dlstbx .util .symlink .create_parent_symlink (self .final_directory , symlink )
251279
252280 ispyb_results = self .send_results_to_ispyb (
253- peak_data , metal_id_command , dimple_log , results_directory , start_time
281+ peak_data ,
282+ metal_id_command ,
283+ dimple_log ,
284+ results_directory ,
285+ start_time ,
286+ scaling_id ,
254287 )
255288
256289 self .log .info (f"Sending { str (ispyb_results )} to ispyb service" )
0 commit comments