Updating job number handlers

Koldstream · Koldstream · commit 50b7aa74c117 · 2026-04-15T12:35:31.000Z
diff --git a/src/murfey/server/feedback.py b/src/murfey/server/feedback.py
@@ -45,10 +45,46 @@
 )
 from murfey.util.processing_params import default_spa_parameters, motion_corrected_mrc
 from murfey.util.tomo import midpoint
+from gemmi import cif
+from pipeliner.star_keys import GENERAL_BLOCK, JOB_COUNTER
 
 logger = logging.getLogger("murfey.server.feedback")
 
 
+def _current_pipeline_job_counter(visit_name: str) -> int:
+    """Return the next jobNNN Pipeliner will allocate for visit_name.
+
+    Reads the JOB_COUNTER value from default_pipeline.star so that
+    SPA feedback decisions are anchored to Pipeliner's actual state instead
+    of an independent integer counter that drifts.
+
+    Falls back to 7 (previous default) if the file is
+    missing — this preserves the previous behaviour for non Doppio runs
+    """
+    pipeline_file = Path(visit_name) / "default_pipeline.star"
+    if not pipeline_file.is_file():
+        return 7
+    try:
+        dp = cif.read_file(str(pipeline_file))
+        block = dp.find_block(GENERAL_BLOCK)
+        if block is None:
+            return 7
+        return int(block.find_value(JOB_COUNTER))
+    except Exception:
+        logger.warning(
+            "Failed to read JOB_COUNTER from %s — falling back to legacy job number",
+            pipeline_file,
+            exc_info=True,
+        )
+        return 7
+
+
+def _visit_name_for_session(session_id: int, _db) -> str:
+    """Return the visit (project directory) for a Murfey session id."""
+    session_row = _db.exec(select(db.Session).where(db.Session.id == session_id)).one()
+    return session_row.visit
+
+
 try:
     _url = url(get_security_config())
     engine = create_engine(_url)
@@ -373,9 +409,8 @@ def _release_2d_hold(message: dict, _db):
             "recipes": ["em-spa-class2d"],
         }
         if first_class2d.complete:
-            feedback_params.next_job += (
-                4 if default_spa_parameters.do_icebreaker_jobs else 3
-            )
+            visit_name = _visit_name_for_session(message["session_id"], _db)
+            feedback_params.next_job = _current_pipeline_job_counter(visit_name)
         feedback_params.rerun_class2d = False
         _db.add(feedback_params)
         if first_class2d.complete:
@@ -585,7 +620,9 @@ def _register_incomplete_2d_batch(message: dict, _db):
         _db.commit()
         _db.close()
         return
-    feedback_params.next_job = 10 if default_spa_parameters.do_icebreaker_jobs else 7
+    # Get next_job from the actual Pipeliner counter 
+    visit_name = _visit_name_for_session(message["session_id"], _db)
+    feedback_params.next_job = _current_pipeline_job_counter(visit_name)
     feedback_params.hold_class2d = True
     relion_options = dict(relion_params)
     other_options = dict(feedback_params)
@@ -735,15 +772,8 @@ def _register_complete_2d_batch(message: dict, _db):
                 murfey_ids, class2d_message["particles_file"], _app_id(pj_id, _db), _db
             )
     elif not feedback_params.class_selection_score:
-        # For the first batch, start a container and set the database to wait
-        job_number_after_first_batch = (
-            10 if default_spa_parameters.do_icebreaker_jobs else 7
-        )
-        if (
-            feedback_params.next_job is not None
-            and feedback_params.next_job < job_number_after_first_batch
-        ):
-            feedback_params.next_job = job_number_after_first_batch
+        visit_name = _visit_name_for_session(message["session_id"], _db)
+        feedback_params.next_job = _current_pipeline_job_counter(visit_name)
         if not feedback_params.star_combination_job:
             feedback_params.star_combination_job = feedback_params.next_job + (
                 3 if default_spa_parameters.do_icebreaker_jobs else 2
@@ -815,14 +845,14 @@ def _register_complete_2d_batch(message: dict, _db):
                 "processing_recipe", zocalo_message, new_connection=True
             )
         feedback_params.hold_class2d = True
-        feedback_params.next_job += (
-            4 if default_spa_parameters.do_icebreaker_jobs else 3
-        )
+        # next_job is re-anchored from Pipeliner on the next entry to this
+        # function — no manual increment needed.
         _db.add(feedback_params)
         _db.commit()
         _db.close()
     else:
-        # Send all other messages on to a container
+        visit_name = _visit_name_for_session(message["session_id"], _db)
+        feedback_params.next_job = _current_pipeline_job_counter(visit_name)
         if _db.exec(
             select(func.count(db.Class2DParameters.particles_file))
             .where(db.Class2DParameters.pj_id == pj_id)
@@ -889,9 +919,6 @@ def _register_complete_2d_batch(message: dict, _db):
             murfey.server._transport_object.send(
                 "processing_recipe", zocalo_message, new_connection=True
             )
-        feedback_params.next_job += (
-            3 if default_spa_parameters.do_icebreaker_jobs else 2
-        )
         _db.add(feedback_params)
         _db.commit()
         _db.close()
@@ -936,10 +963,9 @@ def _flush_class2d(
         .where(db.Class2DParameters.pj_id == pj_id)
         .where(db.Class2DParameters.complete)
     ).all()
-    if not feedback_params.next_job:
-        feedback_params.next_job = (
-            10 if default_spa_parameters.do_icebreaker_jobs else 7
-        )
+    # Check pipeliner counter
+    visit_name = _visit_name_for_session(session_id, _db)
+    feedback_params.next_job = _current_pipeline_job_counter(visit_name)
     if not feedback_params.star_combination_job:
         feedback_params.star_combination_job = feedback_params.next_job + (
             3 if default_spa_parameters.do_icebreaker_jobs else 2
@@ -1196,6 +1222,10 @@ def _register_3d_batch(message: dict, _db):
         .visit
     )
 
+    # Check Pipeliner's job counter 
+    feedback_params.next_job = _current_pipeline_job_counter(visit_name)
+    other_options["next_job"] = feedback_params.next_job
+
     provided_initial_model = _find_initial_model(visit_name, machine_config)
     if provided_initial_model and not feedback_params.initial_model:
         rescaled_initial_model_path = (
@@ -1218,7 +1248,6 @@ def _register_3d_batch(message: dict, _db):
         class3d_dir = (
             f"{class3d_message['class3d_dir']}{(feedback_params.next_job + 1):03}"
         )
-        feedback_params.next_job += 1
         _db.add(feedback_params)
         _db.commit()
 
@@ -1253,7 +1282,6 @@ def _register_3d_batch(message: dict, _db):
         _db.close()
     elif not feedback_params.initial_model:
         # For the first batch, start a container and set the database to wait
-        next_job = feedback_params.next_job
         class3d_dir = (
             f"{class3d_message['class3d_dir']}{(feedback_params.next_job + 1):03}"
         )
@@ -1273,8 +1301,6 @@ def _register_3d_batch(message: dict, _db):
         )
 
         feedback_params.hold_class3d = True
-        next_job += 2
-        feedback_params.next_job = next_job
         zocalo_message: dict = {
             "parameters": {
                 "particles_file": class3d_message["particles_file"],
@@ -1534,6 +1560,11 @@ def _register_refinement(message: dict, _db):
             db.ClassificationFeedbackParameters.pj_id == pj_id_params
         )
     ).one()
+
+    # Re-anchor next_job to Pipeliner's actual counter so the predicted
+    # Refine3D / MaskCreate / PostProcess slots line up with reality.
+    visit_name = _visit_name_for_session(message["session_id"], _db)
+    feedback_params.next_job = _current_pipeline_job_counter(visit_name)
     other_options = dict(feedback_params)
 
     if feedback_params.hold_refine:
@@ -1564,7 +1595,6 @@ def _register_refinement(message: dict, _db):
                 .where(db.RefineParameters.tag == "symmetry")
             ).one()
         except SQLAlchemyError:
-            next_job = feedback_params.next_job
             refine_dir = f"{message['refine_dir']}{(feedback_params.next_job + 2):03}"
             refined_grp_uuid = _murfey_id(message["program_id"], _db)[0]
             refined_class_uuid = _murfey_id(message["program_id"], _db)[0]
@@ -1605,14 +1635,6 @@ def _register_refinement(message: dict, _db):
                 _db=_db,
             )
 
-            if relion_options["symmetry"] == "C1":
-                # Extra Refine, Mask, PostProcess beyond for determined symmetry
-                next_job += 8
-            else:
-                # Select and Extract particles, then Refine, Mask, PostProcess
-                next_job += 5
-            feedback_params.next_job = next_job
-
         zocalo_message: dict = {
             "parameters": {
                 "refine_job_dir": refine_params.refine_dir,
diff --git a/src/murfey/util/processing_params.py b/src/murfey/util/processing_params.py
@@ -1,3 +1,5 @@
+import logging
+import os
 from datetime import datetime
 from functools import lru_cache
 from pathlib import Path
@@ -6,55 +8,89 @@
 from pydantic import BaseModel
 from werkzeug.utils import secure_filename
 
-from gemmi import cif  
-from pipeliner.star_keys import GENERAL_BLOCK, JOB_COUNTER
+from pipeliner.project_graph import ProjectGraph
 
 from murfey.util.config import MachineConfig, get_machine_config
 
-import os
+logger = logging.getLogger("murfey.util.processing_params")
 
-def get_current_job_number(visit_name: str, machine_config: MachineConfig) -> int:
-    if os.path.exists(visit_name):
-        default_pipeline_path = os.path.join(visit_name, "default_pipeline.star")
-    """elif machine_config.processed_directory_name:
-        core = Path(visit_name).parts[0]
-        extra_path = machine_config.processed_extra_directory
-        sub
-        default_pipeline_path = (core
-        / machine_config.processed_directory_name
-        / sub_dataset
-        / extra_path)"""
-    if os.path.exists(default_pipeline_path):
-        dp = cif.read_file(default_pipeline_path)  
-        dp_job_counter = dp.find_block(GENERAL_BLOCK).find_value(JOB_COUNTER)  
-        current_counter = int(dp_job_counter)
-        return current_counter
-    
-    return 2  # Default to job002 if the file doesn't exist or the value is not found
 
-def motion_corrected_mrc(
-    input_movie: Path, visit_name: str, machine_config: MachineConfig
-):
-    movie = os.path.basename(input_movie)
+_DEFAULT_MOTIONCORR_FALLBACK = "job002"
+
+
+@lru_cache(maxsize=16)
+def _job_dir_for_alias_cached(
+    visit_name: str, alias: str, mtime_ns: int
+) -> Optional[str]:
+    """Read default_pipeline.star and return the jobNNN for the given alias.
 
-    """ if not os.path.exists(visit_name):
-    parts = [secure_filename(p) for p in input_movie.parts]
-    visit_idx = parts.index(visit_name)
-    core = Path("/") / Path(*parts[: visit_idx + 1])
-    ppath = Path("/") / Path(*parts)
-    if machine_config.process_multiple_datasets:
-        sub_dataset = ppath.relative_to(core).parts[0]
-    else:
-        sub_dataset = ""
-    extra_path = machine_config.processed_extra_directory
+    Returns None on any failure (missing file, graph read error, alias
+    not found). The mtime_ns argument is a cache key — when Pipeliner rewrites
+    default_pipeline.star its mtime changes and the next call falls through
+    to a fresh read.
     """
+    project_dir = Path(visit_name)
+    pipeline_file = project_dir / "default_pipeline.star"
+    if not pipeline_file.is_file():
+        return None
+    try:
+        with ProjectGraph(pipeline_dir=project_dir, read_only=True) as graph:
+            for proc in graph.process_list:
+                proc_alias = getattr(proc, "alias", None)
+                if proc_alias and proc_alias.rstrip("/").endswith(alias):
+                    # proc.name is e.g. "MotionCorr/job003/"
+                    return Path(proc.name).name
+    except Exception:
+        logger.error(
+            "ProjectGraph read failed while looking up alias %r in %s",
+            alias,
+            pipeline_file,
+            exc_info=True,
+        )
+        return None
+    return None
+
 
-    #job_number = get_current_job_number(visit_name, machine_config)
+def _job_dir_for_alias(visit_name: str, alias: str) -> str:
+    """Return the Pipeliner jobNNN for alias in the given project.
 
+    visit_name must be an path to the project directory.
+    Falls back to the positional default job002 and logs a warning so
+    drift from the live pipeline is visible in the logs instead of silent.
+    """
+    project_dir = Path(visit_name).resolve()
+    pipeline_file = project_dir / "default_pipeline.star"
+    try:
+        mtime_ns = pipeline_file.stat().st_mtime_ns
+    except FileNotFoundError:
+        logger.warning(
+            "default_pipeline.star missing at %s — falling back to %s for alias %r",
+            pipeline_file,
+            _DEFAULT_MOTIONCORR_FALLBACK,
+            alias,
+        )
+        return _DEFAULT_MOTIONCORR_FALLBACK
+    job_dir = _job_dir_for_alias_cached(str(project_dir), alias, mtime_ns)
+    if job_dir is None:
+        logger.warning(
+            "Alias %r not found in %s — falling back to %s",
+            alias,
+            pipeline_file,
+            _DEFAULT_MOTIONCORR_FALLBACK,
+        )
+        return _DEFAULT_MOTIONCORR_FALLBACK
+    return job_dir
+
+
+def motion_corrected_mrc(
+    input_movie: Path, visit_name: str, machine_config: MachineConfig
+):
+    movie = os.path.basename(input_movie)
+    job_dir = _job_dir_for_alias(visit_name, "Live_motioncorr")
     mrc_out = (
         Path(visit_name)
         / "MotionCorr"
-        / f"job002"
+        / job_dir
         / "Movies"
         / str(movie + "_motion_corrected.mrc")
     )