Skip to content

Commit a51a5b4

Browse files
committed
Major rewrite of the '_analyse' function:
* Disentangled the processing logic for the 4 SPA- and Tomography-related contexts * Replaced matching Contexts using 'str(self._context)' with 'self._context.name' * Replaced giant if-else blocks with match-case logic * Use explicit returns
1 parent 24fee4a commit a51a5b4

1 file changed

Lines changed: 108 additions & 114 deletions

File tree

src/murfey/client/analyser.py

Lines changed: 108 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import threading
1515
from importlib.metadata import entry_points
1616
from pathlib import Path
17-
from typing import OrderedDict, Type
17+
from typing import Type
1818

1919
from murfey.client.context import Context
2020
from murfey.client.destinations import find_longest_data_directory
@@ -78,10 +78,10 @@ def __init__(
7878

7979
# SPA & Tomo-specific attributes
8080
self._extension: str = ""
81-
self._unseen_xml: list = []
82-
self._batch_store: dict = {}
83-
self._force_mdoc_metadata = force_mdoc_metadata
84-
self._mdoc_for_reading: Path | None = None
81+
self._processing_params_found: bool = (
82+
False # Have the processing parameters been collected from the metadata?
83+
)
84+
# self._force_mdoc_metadata = force_mdoc_metadata # Seems deprecated
8585
self._serialem = serialem
8686
self.parameters_model: (
8787
Type[ProcessingParametersSPA] | Type[ProcessingParametersTomo] | None
@@ -94,10 +94,6 @@ def _find_extension(self, file_path: Path) -> bool:
9494
"""
9595
Identifies the file extension and stores that information in the class.
9696
"""
97-
if "atlas" in file_path.parts:
98-
self._extension = file_path.suffix
99-
return True
100-
10197
if (
10298
required_substrings := self._murfey_config.get(
10399
"data_required_substrings", {}
@@ -125,14 +121,6 @@ def _find_extension(self, file_path: Path) -> bool:
125121
if subframe_path := mdoc_data_block.get("SubFramePath"):
126122
self._extension = Path(subframe_path).suffix
127123
return True
128-
# Check for LIF files and TXRM files separately
129-
elif (
130-
file_path.suffix == ".lif"
131-
or file_path.suffix == ".txrm"
132-
or file_path.suffix == ".xrm"
133-
):
134-
self._extension = file_path.suffix
135-
return True
136124
return False
137125

138126
def _find_context(self, file_path: Path) -> bool:
@@ -337,6 +325,11 @@ def post_transfer(self, transferred_file: Path):
337325
)
338326

339327
def _analyse_in_thread(self):
328+
"""
329+
Class function that will be executed by the '_thread' attribute. It will
330+
execute a while-loop in which is takes files of the queue and feeds them
331+
into the '_analyse' class function until '_halt_thread' is set to True.
332+
"""
340333
logger.info("Analyser thread started")
341334
while not self._halt_thread:
342335
transferred_file = self.queue.get()
@@ -354,13 +347,18 @@ def _analyse_in_thread(self):
354347
self.notify(final=True)
355348

356349
def _analyse(self, transferred_file: Path):
350+
"""
351+
Class function that is called by '_analyse_in_thread'. It will identify
352+
the Context class to use based on the files inspected, then run different
353+
processing logic based on the context that was established.
354+
"""
357355
if self._limited:
358356
if (
359357
"Metadata" in transferred_file.parts
360358
or transferred_file.name == "EpuSession.dm"
361359
) and not self._context:
362360
if not (context := _get_context("SPAMetadataContext")):
363-
return
361+
return None
364362
self._context = context.load()(
365363
"epu",
366364
self._basepath,
@@ -373,7 +371,7 @@ def _analyse(self, transferred_file: Path):
373371
or transferred_file.name == "Session.dm"
374372
) and not self._context:
375373
if not (context := _get_context("TomographyMetadataContext")):
376-
return
374+
return None
377375
self._context = context.load()(
378376
"tomo",
379377
self._basepath,
@@ -382,114 +380,110 @@ def _analyse(self, transferred_file: Path):
382380
)
383381
self.post_transfer(transferred_file)
384382
else:
385-
# Logic that doesn't require context determination
386-
if not self._serialem and (
387-
self._force_mdoc_metadata and transferred_file.suffix == ".mdoc"
388-
):
389-
self._mdoc_for_reading = transferred_file
390-
391383
# Try and determine context, and notify once when context is found
392384
if self._context is None:
393385
# Exit early if the file can't be used to determine the context
394386
if not self._find_context(transferred_file):
395387
logger.debug(f"Couldn't find context for {str(transferred_file)!r}")
396-
return
388+
return None
397389
else:
398390
logger.info(f"Context found successfully using {transferred_file}")
399391

400-
# Trigger processing or metadata parsing according to the context
401-
# Go through the straightforward ones first
402-
if "CLEMContext" in str(self._context):
403-
logger.debug(f"File {transferred_file.name!r} is part of CLEM workflow")
404-
self.post_transfer(transferred_file)
405-
elif "FIBContext" in str(self._context):
406-
logger.debug(
407-
f"File {transferred_file.name!r} is part of the FIB workflow"
408-
)
409-
self.post_transfer(transferred_file)
410-
elif "SXTContext" in str(self._context):
411-
logger.debug(f"File {transferred_file.name!r} is an SXT file")
412-
self.post_transfer(transferred_file)
413-
elif "AtlasContext" in str(self._context):
414-
logger.debug(f"File {transferred_file.name!r} is part of the atlas")
415-
self.post_transfer(transferred_file)
416-
417-
# Handle files with tomography and SPA context differently
418-
elif (
419-
any(
420-
context in str(self._context)
421-
for context in (
422-
"SPAContext",
423-
"SPAMetadataContext",
424-
"TomographyContext",
425-
"TomographyMetadataContext",
426-
)
427-
)
428-
and self._context is not None
429-
):
430-
context = str(self._context).split(" ")[0].split(".")[-1]
431-
432-
dc_metadata: OrderedDict | None = None
433-
if not self._serialem and (
434-
self._force_mdoc_metadata
435-
and transferred_file.suffix == ".mdoc"
436-
or self._mdoc_for_reading
392+
# Extra if-block for MyPy to verify that the context is set by this point
393+
if self._context is None:
394+
logger.error("Failed to set context even after finding context")
395+
return None
396+
397+
# Trigger processing and metadata parsing according to the context
398+
match self._context.name:
399+
case (
400+
"CLEMContext"
401+
| "FIBContext"
402+
| "SPAMetadataContext"
403+
| "SXTContext"
404+
| "TomographyMetadataContext"
437405
):
438-
try:
439-
dc_metadata = self._context.gather_metadata(
440-
self._mdoc_for_reading or transferred_file,
441-
environment=self._environment,
442-
)
443-
except KeyError as e:
444-
logger.error(
445-
f"Metadata gathering failed with a key error for key: "
446-
f"{e.args[0]}"
447-
)
448-
raise e
449-
# Set the mdoc field to None if no metadata was found
450-
if not dc_metadata:
451-
self._mdoc_for_reading = None
452-
453-
if not self._extension or self._unseen_xml:
454-
# Early return if no extension was found
455-
if not self._find_extension(transferred_file):
456-
logger.warning(f"No extension found for {transferred_file}")
457-
return
458-
else:
406+
logger.debug(
407+
f"File {transferred_file.name!r} transferred with context {self._context.name}"
408+
)
409+
self.post_transfer(transferred_file)
410+
case "SPAContext":
411+
logger.debug(f"File {transferred_file.name!r} is part of the atlas")
412+
self.post_transfer(transferred_file)
413+
414+
# Find extension
415+
if not self._extension:
416+
if not self._find_extension(transferred_file):
417+
logger.warning(f"No extension found for {transferred_file}")
418+
return None
459419
logger.info(
460420
f"Extension found successfully for {transferred_file}"
461421
)
462-
463-
logger.debug(
464-
f"Transferring file {str(transferred_file)} with context {context!r}"
465-
)
466-
self.post_transfer(transferred_file)
467-
468-
if not dc_metadata and transferred_file.suffix != ".mdoc":
469-
try:
470-
dc_metadata = self._context.gather_metadata(
471-
self._mdoc_for_reading or self._xml_file(transferred_file),
472-
environment=self._environment,
473-
)
474-
except KeyError as e:
475-
logger.error(
476-
f"Metadata gathering failed with a key error for key: {e.args[0]}"
422+
if not self._processing_params_found:
423+
# Try and gather the metadata from each file passing through
424+
# Once gathered, set the attribute to True and don't repeat again
425+
try:
426+
dc_metadata = self._context.gather_metadata(
427+
self._xml_file(transferred_file),
428+
environment=self._environment,
429+
)
430+
except (KeyError, ValueError) as e:
431+
logger.error(
432+
f"Metadata gathering failed with the following error: {e}"
433+
)
434+
dc_metadata = None
435+
if dc_metadata:
436+
self._processing_params_found = True
437+
if dc_metadata.get("file_extension"):
438+
self._extension = dc_metadata["file_extension"]
439+
else:
440+
dc_metadata["file_extension"] = self._extension
441+
dc_metadata["acquisition_software"] = (
442+
self._context._acquisition_software
443+
)
444+
self.notify(dc_metadata)
445+
446+
case "TomographyContext":
447+
logger.debug(f"File {transferred_file.name!r} is part of the atlas")
448+
self.post_transfer(transferred_file)
449+
450+
# Find extension
451+
if not self._extension:
452+
if not self._find_extension(transferred_file):
453+
logger.warning(f"No extension found for {transferred_file}")
454+
return None
455+
logger.info(
456+
f"Extension found successfully for {transferred_file}"
477457
)
478-
raise e
479-
if not dc_metadata or not self._force_mdoc_metadata:
480-
self._mdoc_for_reading = None
481-
self._unseen_xml.append(transferred_file)
482-
if dc_metadata:
483-
self._unseen_xml = []
484-
if dc_metadata.get("file_extension"):
485-
self._extension = dc_metadata["file_extension"]
486-
else:
487-
dc_metadata["file_extension"] = self._extension
488-
dc_metadata["acquisition_software"] = (
489-
self._context._acquisition_software
490-
)
491-
self.notify(dc_metadata)
492-
return
458+
if (
459+
not self._processing_params_found
460+
and transferred_file.suffix == ".mdoc"
461+
):
462+
# Try and gather the metadata from a passing .mdoc file
463+
# When gathered, set the attribute to True and don't repeat again
464+
try:
465+
dc_metadata = self._context.gather_metadata(
466+
transferred_file,
467+
environment=self._environment,
468+
)
469+
except (KeyError, ValueError) as e:
470+
logger.error(
471+
f"Metadata gathering failed with the following error: {e}"
472+
)
473+
dc_metadata = None
474+
if dc_metadata:
475+
self._processing_params_found = True
476+
if dc_metadata.get("file_extension"):
477+
self._extension = dc_metadata["file_extension"]
478+
else:
479+
dc_metadata["file_extension"] = self._extension
480+
dc_metadata["acquisition_software"] = (
481+
self._context._acquisition_software
482+
)
483+
self.notify(dc_metadata)
484+
case _:
485+
logger.warning(f"Unknown context provided: {str(self._context)}")
486+
return None
493487

494488
def _xml_file(self, data_file: Path) -> Path:
495489
if not self._environment:

0 commit comments

Comments
 (0)