diff --git a/src/cloudai/systems/standalone/standalone_system.py b/src/cloudai/systems/standalone/standalone_system.py index 51c146a42..57d99487d 100644 --- a/src/cloudai/systems/standalone/standalone_system.py +++ b/src/cloudai/systems/standalone/standalone_system.py @@ -1,5 +1,5 @@ # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES -# Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -78,6 +78,22 @@ def kill(self, job: BaseJob) -> None: Args: job (BaseJob): The job to be terminated. """ - cmd = f"kill -9 {job.id}" - logging.debug(f"Executing termination command for job {job.id}: {cmd}") + try: + pid = int(str(job.id).strip()) + except ValueError: + logging.warning( + "Skipping termination for standalone job %s because it is not a valid process ID.", + job.id, + ) + return + + if pid <= 0: + logging.warning( + "Skipping termination for standalone job %s because it does not reference a launched process.", + job.id, + ) + return + + cmd = f"kill -9 {pid}" + logging.debug(f"Executing termination command for job {pid}: {cmd}") self.cmd_shell.execute(cmd) diff --git a/tests/systems/standalone/test_system.py b/tests/systems/standalone/test_system.py index 9dd252026..3d66a8eb6 100644 --- a/tests/systems/standalone/test_system.py +++ b/tests/systems/standalone/test_system.py @@ -108,3 +108,39 @@ def test_kill_job(mock_execute, standalone_system, standalone_job): kill_command = f"kill -9 {standalone_job.id}" mock_execute.assert_called_once_with(kill_command) + + +@pytest.mark.parametrize("job_id", [0, "0", "00", "+0", "-0", -1, "-1", "not-a-pid"]) +@patch("cloudai.util.CommandShell.execute") +def test_kill_job_skips_invalid_pid(mock_execute, standalone_system, mock_test, job_id): + """ + Test that standalone dry-run sentinel IDs and invalid PIDs are not killed. + + Args: + mock_execute (MagicMock): Mocked CommandShell execute method. + standalone_system (StandaloneSystem): Instance of the system under test. + mock_test (Test): The mock test instance associated with the job. + job_id (int | str): Job ID that must not be killed. + """ + job = StandaloneJob(mock_test, id=job_id) + + standalone_system.kill(job) + + mock_execute.assert_not_called() + + +@patch("cloudai.util.CommandShell.execute") +def test_kill_job_normalizes_numeric_pid(mock_execute, standalone_system, mock_test): + """ + Test that standalone termination uses a validated numeric process ID. + + Args: + mock_execute (MagicMock): Mocked CommandShell execute method. + standalone_system (StandaloneSystem): Instance of the system under test. + mock_test (Test): The mock test instance associated with the job. + """ + job = StandaloneJob(mock_test, id="0012345") + + standalone_system.kill(job) + + mock_execute.assert_called_once_with("kill -9 12345")