@@ -57,7 +57,7 @@ class PanDDAParameters(pydantic.BaseModel):
5757 automatic : Optional [bool ] = False
5858 comment : Optional [str ] = None
5959 scaling_id : list [int ]
60- timeout : float = pydantic .Field (default = 60 , alias = "timeout-minutes" )
60+ timeout : float = pydantic .Field (default = 120 , alias = "timeout-minutes" )
6161 backoff_delay : float = pydantic .Field (default = 45 , alias = "backoff-delay" )
6262 backoff_max_try : int = pydantic .Field (default = 30 , alias = "backoff-max-try" )
6363 backoff_multiplier : float = pydantic .Field (default = 1.1 , alias = "backoff-multiplier" )
@@ -69,7 +69,7 @@ class PanDDA_PostParameters(pydantic.BaseModel):
6969 automatic : Optional [bool ] = False
7070 comment : Optional [str ] = None
7171 scaling_id : list [int ]
72- processed_directory : str
72+ processing_directory : str
7373 timeout : float = pydantic .Field (default = 60 , alias = "timeout-minutes" )
7474
7575
@@ -242,7 +242,7 @@ def trigger_pandda_xchem(
242242 proposal_number = proposal .proposalNumber
243243 proposal_string = proposal_code + proposal_number
244244
245- # TEMPORARY FILTER
245+ # TEMPORARY PROPOSAL FILTER
246246 allowed_proposals = ["lb42888" , "sw44107" , "lb36049" ]
247247
248248 # 0. Check that this is an XChem expt & locate .SQLite database
@@ -265,7 +265,7 @@ def trigger_pandda_xchem(
265265 location = int (query .one ()[1 ])
266266 container_code = query .one ()[2 ]
267267
268- # Get user defined spacegroup
268+ # Get the user defined spacegroup
269269 query = (
270270 session .query (Crystal .spaceGroup )
271271 .join (BLSample , BLSample .crystalId == Crystal .crystalId )
@@ -296,7 +296,7 @@ def trigger_pandda_xchem(
296296 # match_yaml = expt_yaml
297297 self .log .info (f"Found user yaml for dtag { dtag } at { yaml_file } " )
298298
299- # account for potentially multiple labxchem visits for a single target
299+ # account for potentially multiple labxchem visits for a single target in proposal
300300 if len (match_dirs ) == 1 :
301301 match_dir = match_dirs [0 ]
302302 elif len (match_dirs ) > 1 :
@@ -365,6 +365,7 @@ def trigger_pandda_xchem(
365365 return {"success" : True }
366366 else :
367367 xchem_visit_dir = match_dir
368+ processing_dir = xchem_visit_dir / "processing"
368369 # user_settings = match_yaml["autoprocessing"]
369370
370371 if xchem_visit_dir :
@@ -377,12 +378,7 @@ def trigger_pandda_xchem(
377378 )
378379 return {"success" : True }
379380
380- processing_dir = xchem_visit_dir / "processing"
381- db = processing_dir / "database" / "soakDBDataFile.sqlite"
382- processed_dir = xchem_visit_dir / "processed"
383-
384381 # 1. Trigger when all upstream pipelines & related dimple jobs have finished
385-
386382 program_list = [
387383 "xia2 dials" ,
388384 "xia2 3dii" ,
@@ -560,6 +556,11 @@ def trigger_pandda_xchem(
560556
561557 if not df_filteredbysg .empty :
562558 df = df_filteredbysg
559+ n_success_upstream = len (df )
560+ self .log .info (
561+ f"There are { n_success_upstream } successful upstream jobs (excl fast-dp) in the user defined spacegroup { user_sg } \
562+ selecting the best one based on I/sigI*completeness * #unique reflections, from the most recent processing batch"
563+ )
563564
564565 # rank datasets by I/sigI*completeness*# unique reflections
565566 df ["heuristic" ] = (
@@ -571,7 +572,7 @@ def trigger_pandda_xchem(
571572 df = df [["autoProcScalingId" , "heuristic" ]].copy ()
572573 scaling_ids = df ["autoProcScalingId" ].tolist ()
573574
574- # find associated dimple jobs from scaling_ids
575+ # find associated dimple jobs from scaling_ids, take most recent batch if reprocessing
575576 query = (
576577 (
577578 session .query (
@@ -610,17 +611,21 @@ def trigger_pandda_xchem(
610611 )
611612 return {"success" : True }
612613
613- n_success_upstream = len (df )
614- n_success_dimple = len (df2 )
614+ # mark a new batch whenever the gap is >= 12 hours
615+ df2 = df2 .sort_values ("processingStartTime" ).reset_index (drop = True )
616+ df2 ["time_diff" ] = df2 ["processingStartTime" ].diff ()
617+ df2 ["batch" ] = (df2 ["time_diff" ] >= pd .Timedelta (hours = 12 )).cumsum () + 1
618+ recent_batch = df2 [df2 ["batch" ] == df2 ["batch" ].max ()].copy ()
615619
616- self .log .info (
617- f"There are { n_success_upstream } successful upstream jobs (excl fast-dp) & { n_success_dimple } successful dimple jobs, \
618- selecting the best one based on heuristic: I/sigI*completeness * #unique reflections"
619- )
620-
621- df2 ["parameterValue" ] = pd .to_numeric (df2 ["parameterValue" ]).astype ("Int64" )
620+ recent_batch ["parameterValue" ] = pd .to_numeric (
621+ recent_batch ["parameterValue" ]
622+ ).astype ("Int64" )
622623 df3 = pd .merge (
623- df2 , df , left_on = "parameterValue" , right_on = "autoProcScalingId" , how = "inner"
624+ recent_batch ,
625+ df ,
626+ left_on = "parameterValue" ,
627+ right_on = "autoProcScalingId" ,
628+ how = "inner" ,
624629 ).sort_values ("heuristic" , ascending = False )
625630
626631 if df3 .empty :
@@ -653,16 +658,18 @@ def trigger_pandda_xchem(
653658
654659 # 2. Read XChem SQLite database for ligand info
655660
661+ db_master = processing_dir / "database" / "soakDBDataFile.sqlite"
662+
656663 try :
657- conn = sqlite3 .connect (f"file:{ db } ?mode=ro" , uri = True , timeout = 10 )
664+ conn = sqlite3 .connect (f"file:{ db_master } ?mode=ro" , uri = True , timeout = 10 )
658665 df = pd .read_sql_query (
659666 f"SELECT * from mainTable WHERE Puck = '{ container_code } ' AND PuckPosition = { location } AND CrystalName = '{ dtag } '" ,
660667 conn ,
661668 )
662669
663670 except Exception as e :
664671 self .log .info (
665- f"Exiting PanDDA2/Pipedream trigger: Exception whilst reading ligand information from { db } for dtag { dtag } , dcid { dcid } : { e } "
672+ f"Exiting PanDDA2/Pipedream trigger: Exception whilst reading ligand information from { db_master } for dtag { dtag } , dcid { dcid } : { e } "
666673 )
667674 return {"success" : True }
668675
@@ -672,7 +679,7 @@ def trigger_pandda_xchem(
672679
673680 if len (df ) != 1 :
674681 self .log .info (
675- f"Exiting PanDDA2/Pipedream trigger: Unique row in .sqlite for dtag { dtag } , puck { container_code } , puck position { location } cannot be found in { db } , skipping dcid { dcid } "
682+ f"Exiting PanDDA2/Pipedream trigger: Unique row in .sqlite for dtag { dtag } , puck { container_code } , puck position { location } cannot be found in { db_master } , skipping dcid { dcid } "
676683 )
677684 return {"success" : True }
678685
@@ -697,7 +704,8 @@ def trigger_pandda_xchem(
697704 return {"success" : True }
698705
699706 # 3. Create dataset directory structure
700- analysis_dir = processed_dir / "analysis"
707+ auto_dir = processing_dir / "auto"
708+ analysis_dir = auto_dir / "analysis"
701709 pandda_dir = analysis_dir / "pandda2"
702710 model_dir = pandda_dir / "model_building"
703711 dataset_dir = model_dir / dtag
@@ -736,16 +744,16 @@ def trigger_pandda_xchem(
736744 smi_file .write (CompoundSMILES )
737745
738746 # 4. Job launch logic
739-
740747 recipe_parameters = {
741748 "dcid" : dcid ,
742- "processed_directory" : str (processed_dir ),
749+ "xchem_visit_dir" : str (xchem_visit_dir ),
750+ "processing_directory" : str (processing_dir ),
743751 "model_directory" : str (model_dir ),
744752 "dtag" : dtag ,
745753 "n_datasets" : 1 ,
746754 "scaling_id" : scaling_id ,
747755 "comparator_threshold" : comparator_threshold ,
748- "database_path" : str (db ),
756+ "database_path" : str (db_master ),
749757 "upstream_mtz" : pathlib .Path (upstream_mtz ).parts [- 1 ],
750758 "smiles" : str (CompoundSMILES ),
751759 }
@@ -769,6 +777,7 @@ def trigger_pandda_xchem(
769777
770778 with open (model_dir / ".batch.json" , "w" ) as f :
771779 json .dump (dataset_list , f )
780+ # cannot pass as ispyb_parameter
772781
773782 self .log .info (
774783 f"{ dataset_count } = comparator dataset threshold of { comparator_threshold } , launching PanDDA2 array job"
@@ -821,7 +830,7 @@ def trigger_pandda_xchem_post(
821830
822831 dcid = parameters .dcid
823832 scaling_id = parameters .scaling_id [0 ]
824- processed_directory = pathlib .Path (parameters .processed_directory )
833+ processing_directory = pathlib .Path (parameters .processing_directory )
825834
826835 _ , ispyb_info = dlstbx .ispybtbx .ispyb_filter ({}, {"ispyb_dcid" : dcid }, session )
827836 visit = ispyb_info .get ("ispyb_visit" , "" )
@@ -868,7 +877,7 @@ def trigger_pandda_xchem_post(
868877
869878 recipe_parameters = {
870879 "dcid" : dcid , #
871- "processed_directory " : str (processed_directory ),
880+ "processing_directory " : str (processing_directory ),
872881 "scaling_id" : scaling_id ,
873882 }
874883
0 commit comments