Skip to content

Commit 665d160

Browse files
committed
Propagate returncodes of external processes
If external subprocesses such as mafft got killed (e.g. by SLURM) because of OOM, the main FastOMA python process did simply raise an Exception and terminated with exit code 1. This prevented nextflow from restarting it with more memory/time. We now report properly the exit code of Mafft and FastTree and exit the python code with the same exit code (stored in WrapperError)
1 parent 3a3b950 commit 665d160

5 files changed

Lines changed: 58 additions & 10 deletions

File tree

FastOMA/_infer_subhog.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from ._utils_subhog import MSAFilter, MSAFilterElbow, MSAFilterTrimAL
3131

3232
from .zoo.utils import unique
33+
from .zoo.wrappers import WrapperError
3334

3435
low_so_detection = True # detection of proteins with low species overlap score in gene tree
3536
fragment_detection = True # this also need to be consistent in _hog_class.py
@@ -76,10 +77,15 @@ def read_infer_xml_rhog(rhogid, inferhog_concurrent_on, pickles_rhog_folder, pi
7677
species_names_rhog = list(set(species_names_rhog))
7778
logger.info("Number of unique species in rHOG " + rhogid + " is " + str(len(species_names_rhog)) + ".")
7879

79-
if inferhog_concurrent_on: # for big HOG we use parallelization at the level taxonomic level using concurrent
80-
infer_hogs_concurrent(species_tree, rhogid, pickles_subhog_folder_all, rhogs_fa_folder, conf_infer_subhhogs)
81-
else:
82-
infer_hogs_for_rhog_levels_recursively(species_tree, rhogid, pickles_subhog_folder_all, rhogs_fa_folder, conf_infer_subhhogs)
80+
try:
81+
if inferhog_concurrent_on: # for big HOG we use parallelization at the level taxonomic level using concurrent
82+
infer_hogs_concurrent(species_tree, rhogid, pickles_subhog_folder_all, rhogs_fa_folder, conf_infer_subhhogs)
83+
else:
84+
infer_hogs_for_rhog_levels_recursively(species_tree, rhogid, pickles_subhog_folder_all, rhogs_fa_folder, conf_infer_subhhogs)
85+
except WrapperError as e:
86+
logger.exception("Error of external tool during subhog inference: %s", str(e))
87+
sys.exit(getattr(e, "exit_code", 1))
88+
8389

8490
##### Now read the final pickle file for this rootHOG
8591
root_node_name = species_tree.name

FastOMA/zoo/utils.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,4 +109,18 @@ def unique(seq):
109109
return [x for x in seq if x not in seen and not seen.add(x)]
110110

111111

112-
112+
def summarize_long_message(message: str, head_chars: int = 1000, tail_lines: int = 50) -> str:
113+
"""
114+
Summarize a potentially long messge string.
115+
Shows first head_chars characters and last tail_lines lines with ellipsis.
116+
"""
117+
output = message or ""
118+
# Get last tail_lines
119+
lines = output.splitlines()
120+
tail = "\n".join(lines[-tail_lines:]) if len(lines) > tail_lines else "\n".join(lines)
121+
# Get first head_chars
122+
head = output[:head_chars] + ("…" if len(output) > head_chars else "")
123+
if len(lines) > tail_lines or len(output) > head_chars:
124+
return f"{head}\n...\n{tail}"
125+
else:
126+
return output

FastOMA/zoo/wrappers/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
class WrapperError(Exception):
2-
pass
2+
def __init__(self, message, exit_code=1):
3+
super().__init__(message)
4+
self.exit_code = exit_code
35

46

FastOMA/zoo/wrappers/aligners/mafft.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from .base_aligner import Aligner, AlignmentInput, DataType
88
from ...seq_utils.utils import iter_seqrecs_from_any
99
from ...wrappers import WrapperError
10+
from ...utils import summarize_long_message
1011
from ..options import StringOption, FlagOption, IntegerOption, FloatOption, MultiOption, OptionSet
1112
import tempfile
1213
import logging
@@ -120,7 +121,8 @@ def __call__(self, *args, **kwargs):
120121
logger.debug('Output of Mafft: stdout={}; stderr={}'.format(output, error))
121122
if len(output) == 0 and len(error) > 0:
122123
logger.warning('is MAFFT_BINARIES set correctly: {}'.format(os.getenv('MAFFT_BINARIES', '')))
123-
raise WrapperError('Mafft did not compute any alignments. StdErr: {}'.format(error))
124+
logger.warning("Mafft did not compute any alignments. StdErr:\n%s", summarize_long_message(error))
125+
raise WrapperError('Mafft did not compute any alignments')
124126
self.result = self._read_result(output) # store result
125127
self.stdout = output
126128
self.stderr = error
@@ -140,6 +142,18 @@ def _call(self, filename, *args, **kwargs):
140142
"""
141143
self.cli('{} {}'.format(self.command(), filename),
142144
wait=True)
145+
146+
ret = self.cli.process.returncode
147+
if ret != 0:
148+
logger.error('Mafft returned non-zero exit status: {}'.format(ret))
149+
logger.error('Output of Mafft:\n\n%s\nstdout=\n%s\n{}\n\n%s\nstderr=\n%s\n{}\n\n',
150+
"=" * 30, "=" * 30, summarize_long_message(self.cli.get_stdout()),
151+
"=" * 30, "=" * 30, summarize_long_message(self.cli.get_stderr()))
152+
if ret < 0:
153+
sig = -ret
154+
raise WrapperError(f'Mafft was terminated by signal {sig}', exit_code=128 + sig)
155+
else:
156+
raise WrapperError(f'Mafft exited with code {ret}', exit_code=ret)
143157
return self.cli.get_stdout(), self.cli.get_stderr()
144158

145159
def command(self):

FastOMA/zoo/wrappers/treebuilders/fasttree.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from ..abstract_cli import AbstractCLI
1616
from ..options import OptionSet, StringOption, IntegerOption
17+
from ...utils import summarize_long_message
1718
from ...file_utils import TempFile, TempDir
1819

1920
logger = logging.getLogger(__name__)
@@ -89,13 +90,24 @@ def _call(self, filename, *args, **kwargs):
8990
self.returncode = self.cli.process.returncode
9091

9192
if self.returncode != 0:
93+
self.stdout = self.cli.get_stdout()
9294
self.stderr = self.cli.get_stderr()
9395
last_error_line = self.stderr.split('\n')[-1].strip()
94-
msg = f"Fasttree failed on {filename}: {last_error_line}"
96+
logger.error('FastTree returned non-zero exit status: {}'.format(self.returncode))
97+
logger.error('Output of FastTree:\n\n%s\nstdout=\n%s\n{}\n\n%s\nstderr=\n%s\n{}\n\n',
98+
"=" * 30, "=" * 30, summarize_long_message(self.stdout),
99+
"=" * 30, "=" * 30, summarize_long_message(self.stderr))
100+
if self.returncode < 0:
101+
termination = f"was terminated by signal {-self.returncode}"
102+
exit_code = 128 - self.returncode
103+
else:
104+
termination = f"exited with code {self.returncode}"
105+
exit_code = self.returncode
106+
msg = f"Fasttree {termination} on {filename}: {last_error_line}"
95107
logger.error(msg)
96-
raise WrapperError(msg, self.stderr)
108+
raise WrapperError(msg, exit_code=exit_code)
97109

98-
return (self.cli.get_stdout(), self.cli.get_stderr())
110+
return self.cli.get_stdout(), self.cli.get_stderr()
99111

100112
def command(self):
101113
return str(self.options)

0 commit comments

Comments
 (0)