1414import threading
1515from importlib .metadata import entry_points
1616from pathlib import Path
17- from typing import OrderedDict , Type
17+ from typing import Type
1818
1919from murfey .client .context import Context
2020from murfey .client .destinations import find_longest_data_directory
@@ -78,10 +78,10 @@ def __init__(
7878
7979 # SPA & Tomo-specific attributes
8080 self ._extension : str = ""
81- self ._unseen_xml : list = []
82- self . _batch_store : dict = {}
83- self . _force_mdoc_metadata = force_mdoc_metadata
84- self ._mdoc_for_reading : Path | None = None
81+ self ._processing_params_found : bool = (
82+ False # Have the processing parameters been collected from the metadata?
83+ )
84+ # self._force_mdoc_metadata = force_mdoc_metadata # Seems deprecated
8585 self ._serialem = serialem
8686 self .parameters_model : (
8787 Type [ProcessingParametersSPA ] | Type [ProcessingParametersTomo ] | None
@@ -94,10 +94,6 @@ def _find_extension(self, file_path: Path) -> bool:
9494 """
9595 Identifies the file extension and stores that information in the class.
9696 """
97- if "atlas" in file_path .parts :
98- self ._extension = file_path .suffix
99- return True
100-
10197 if (
10298 required_substrings := self ._murfey_config .get (
10399 "data_required_substrings" , {}
@@ -125,14 +121,6 @@ def _find_extension(self, file_path: Path) -> bool:
125121 if subframe_path := mdoc_data_block .get ("SubFramePath" ):
126122 self ._extension = Path (subframe_path ).suffix
127123 return True
128- # Check for LIF files and TXRM files separately
129- elif (
130- file_path .suffix == ".lif"
131- or file_path .suffix == ".txrm"
132- or file_path .suffix == ".xrm"
133- ):
134- self ._extension = file_path .suffix
135- return True
136124 return False
137125
138126 def _find_context (self , file_path : Path ) -> bool :
@@ -337,6 +325,11 @@ def post_transfer(self, transferred_file: Path):
337325 )
338326
339327 def _analyse_in_thread (self ):
328+ """
329+ Class function that will be executed by the '_thread' attribute. It will
330+ execute a while-loop in which is takes files of the queue and feeds them
331+ into the '_analyse' class function until '_halt_thread' is set to True.
332+ """
340333 logger .info ("Analyser thread started" )
341334 while not self ._halt_thread :
342335 transferred_file = self .queue .get ()
@@ -354,13 +347,18 @@ def _analyse_in_thread(self):
354347 self .notify (final = True )
355348
356349 def _analyse (self , transferred_file : Path ):
350+ """
351+ Class function that is called by '_analyse_in_thread'. It will identify
352+ the Context class to use based on the files inspected, then run different
353+ processing logic based on the context that was established.
354+ """
357355 if self ._limited :
358356 if (
359357 "Metadata" in transferred_file .parts
360358 or transferred_file .name == "EpuSession.dm"
361359 ) and not self ._context :
362360 if not (context := _get_context ("SPAMetadataContext" )):
363- return
361+ return None
364362 self ._context = context .load ()(
365363 "epu" ,
366364 self ._basepath ,
@@ -373,7 +371,7 @@ def _analyse(self, transferred_file: Path):
373371 or transferred_file .name == "Session.dm"
374372 ) and not self ._context :
375373 if not (context := _get_context ("TomographyMetadataContext" )):
376- return
374+ return None
377375 self ._context = context .load ()(
378376 "tomo" ,
379377 self ._basepath ,
@@ -382,114 +380,110 @@ def _analyse(self, transferred_file: Path):
382380 )
383381 self .post_transfer (transferred_file )
384382 else :
385- # Logic that doesn't require context determination
386- if not self ._serialem and (
387- self ._force_mdoc_metadata and transferred_file .suffix == ".mdoc"
388- ):
389- self ._mdoc_for_reading = transferred_file
390-
391383 # Try and determine context, and notify once when context is found
392384 if self ._context is None :
393385 # Exit early if the file can't be used to determine the context
394386 if not self ._find_context (transferred_file ):
395387 logger .debug (f"Couldn't find context for { str (transferred_file )!r} " )
396- return
388+ return None
397389 else :
398390 logger .info (f"Context found successfully using { transferred_file } " )
399391
400- # Trigger processing or metadata parsing according to the context
401- # Go through the straightforward ones first
402- if "CLEMContext" in str (self ._context ):
403- logger .debug (f"File { transferred_file .name !r} is part of CLEM workflow" )
404- self .post_transfer (transferred_file )
405- elif "FIBContext" in str (self ._context ):
406- logger .debug (
407- f"File { transferred_file .name !r} is part of the FIB workflow"
408- )
409- self .post_transfer (transferred_file )
410- elif "SXTContext" in str (self ._context ):
411- logger .debug (f"File { transferred_file .name !r} is an SXT file" )
412- self .post_transfer (transferred_file )
413- elif "AtlasContext" in str (self ._context ):
414- logger .debug (f"File { transferred_file .name !r} is part of the atlas" )
415- self .post_transfer (transferred_file )
416-
417- # Handle files with tomography and SPA context differently
418- elif (
419- any (
420- context in str (self ._context )
421- for context in (
422- "SPAContext" ,
423- "SPAMetadataContext" ,
424- "TomographyContext" ,
425- "TomographyMetadataContext" ,
426- )
427- )
428- and self ._context is not None
429- ):
430- context = str (self ._context ).split (" " )[0 ].split ("." )[- 1 ]
431-
432- dc_metadata : OrderedDict | None = None
433- if not self ._serialem and (
434- self ._force_mdoc_metadata
435- and transferred_file .suffix == ".mdoc"
436- or self ._mdoc_for_reading
392+ # Extra if-block for MyPy to verify that the context is set by this point
393+ if self ._context is None :
394+ logger .error ("Failed to set context even after finding context" )
395+ return None
396+
397+ # Trigger processing and metadata parsing according to the context
398+ match self ._context .name :
399+ case (
400+ "CLEMContext"
401+ | "FIBContext"
402+ | "SPAMetadataContext"
403+ | "SXTContext"
404+ | "TomographyMetadataContext"
437405 ):
438- try :
439- dc_metadata = self ._context .gather_metadata (
440- self ._mdoc_for_reading or transferred_file ,
441- environment = self ._environment ,
442- )
443- except KeyError as e :
444- logger .error (
445- f"Metadata gathering failed with a key error for key: "
446- f"{ e .args [0 ]} "
447- )
448- raise e
449- # Set the mdoc field to None if no metadata was found
450- if not dc_metadata :
451- self ._mdoc_for_reading = None
452-
453- if not self ._extension or self ._unseen_xml :
454- # Early return if no extension was found
455- if not self ._find_extension (transferred_file ):
456- logger .warning (f"No extension found for { transferred_file } " )
457- return
458- else :
406+ logger .debug (
407+ f"File { transferred_file .name !r} transferred with context { self ._context .name } "
408+ )
409+ self .post_transfer (transferred_file )
410+ case "SPAContext" :
411+ logger .debug (f"File { transferred_file .name !r} is part of the atlas" )
412+ self .post_transfer (transferred_file )
413+
414+ # Find extension
415+ if not self ._extension :
416+ if not self ._find_extension (transferred_file ):
417+ logger .warning (f"No extension found for { transferred_file } " )
418+ return None
459419 logger .info (
460420 f"Extension found successfully for { transferred_file } "
461421 )
462-
463- logger .debug (
464- f"Transferring file { str (transferred_file )} with context { context !r} "
465- )
466- self .post_transfer (transferred_file )
467-
468- if not dc_metadata and transferred_file .suffix != ".mdoc" :
469- try :
470- dc_metadata = self ._context .gather_metadata (
471- self ._mdoc_for_reading or self ._xml_file (transferred_file ),
472- environment = self ._environment ,
473- )
474- except KeyError as e :
475- logger .error (
476- f"Metadata gathering failed with a key error for key: { e .args [0 ]} "
422+ if not self ._processing_params_found :
423+ # Try and gather the metadata from each file passing through
424+ # Once gathered, set the attribute to True and don't repeat again
425+ try :
426+ dc_metadata = self ._context .gather_metadata (
427+ self ._xml_file (transferred_file ),
428+ environment = self ._environment ,
429+ )
430+ except (KeyError , ValueError ) as e :
431+ logger .error (
432+ f"Metadata gathering failed with the following error: { e } "
433+ )
434+ dc_metadata = None
435+ if dc_metadata :
436+ self ._processing_params_found = True
437+ if dc_metadata .get ("file_extension" ):
438+ self ._extension = dc_metadata ["file_extension" ]
439+ else :
440+ dc_metadata ["file_extension" ] = self ._extension
441+ dc_metadata ["acquisition_software" ] = (
442+ self ._context ._acquisition_software
443+ )
444+ self .notify (dc_metadata )
445+
446+ case "TomographyContext" :
447+ logger .debug (f"File { transferred_file .name !r} is part of the atlas" )
448+ self .post_transfer (transferred_file )
449+
450+ # Find extension
451+ if not self ._extension :
452+ if not self ._find_extension (transferred_file ):
453+ logger .warning (f"No extension found for { transferred_file } " )
454+ return None
455+ logger .info (
456+ f"Extension found successfully for { transferred_file } "
477457 )
478- raise e
479- if not dc_metadata or not self ._force_mdoc_metadata :
480- self ._mdoc_for_reading = None
481- self ._unseen_xml .append (transferred_file )
482- if dc_metadata :
483- self ._unseen_xml = []
484- if dc_metadata .get ("file_extension" ):
485- self ._extension = dc_metadata ["file_extension" ]
486- else :
487- dc_metadata ["file_extension" ] = self ._extension
488- dc_metadata ["acquisition_software" ] = (
489- self ._context ._acquisition_software
490- )
491- self .notify (dc_metadata )
492- return
458+ if (
459+ not self ._processing_params_found
460+ and transferred_file .suffix == ".mdoc"
461+ ):
462+ # Try and gather the metadata from a passing .mdoc file
463+ # When gathered, set the attribute to True and don't repeat again
464+ try :
465+ dc_metadata = self ._context .gather_metadata (
466+ transferred_file ,
467+ environment = self ._environment ,
468+ )
469+ except (KeyError , ValueError ) as e :
470+ logger .error (
471+ f"Metadata gathering failed with the following error: { e } "
472+ )
473+ dc_metadata = None
474+ if dc_metadata :
475+ self ._processing_params_found = True
476+ if dc_metadata .get ("file_extension" ):
477+ self ._extension = dc_metadata ["file_extension" ]
478+ else :
479+ dc_metadata ["file_extension" ] = self ._extension
480+ dc_metadata ["acquisition_software" ] = (
481+ self ._context ._acquisition_software
482+ )
483+ self .notify (dc_metadata )
484+ case _:
485+ logger .warning (f"Unknown context provided: { str (self ._context )} " )
486+ return None
493487
494488 def _xml_file (self , data_file : Path ) -> Path :
495489 if not self ._environment :
0 commit comments