diff --git a/process_runner.py b/process_runner.py index 1affd13..6d51098 100644 --- a/process_runner.py +++ b/process_runner.py @@ -56,9 +56,26 @@ async def _recv(self, timeout: float = 30.0) -> dict[str, Any]: assert self._proc and self._proc.stdout line = await asyncio.wait_for(self._proc.stdout.readline(), timeout=timeout) if not line: - raise EOFError("MCP process closed stdout") + # The subprocess closed stdout — usually means it crashed. Drain + # stderr (best-effort, non-blocking) so the caller sees the actual + # cause rather than a bare "closed stdout". + stderr_tail = await self._drain_stderr_tail() + suffix = f"\nsubprocess stderr (tail): {stderr_tail}" if stderr_tail else "" + raise EOFError(f"MCP process closed stdout{suffix}") return json.loads(line) + async def _drain_stderr_tail(self, max_bytes: int = 4096) -> str: + """Return up to ``max_bytes`` of buffered stderr from the subprocess.""" + if not self._proc or not self._proc.stderr: + return "" + try: + data = await asyncio.wait_for( + self._proc.stderr.read(max_bytes), timeout=2.0 + ) + except (asyncio.TimeoutError, Exception): + return "" + return data.decode(errors="replace").strip() + async def _start(self) -> None: env = self._build_env() self._proc = await asyncio.create_subprocess_exec( diff --git a/server.py b/server.py index 665d535..27b73a4 100755 --- a/server.py +++ b/server.py @@ -81,6 +81,11 @@ ADVERTISED_NAME_SEP = "__" +# Maximum wall-clock time (seconds) a single build command may run before +# materialize_repository gives up. Protects against users pasting a +# long-running server start (e.g. `npm run start:dev`) into build_commands. +BUILD_COMMAND_TIMEOUT = int(os.environ.get("MCPPROXY_BUILD_TIMEOUT", "600")) + # --------------------------------------------------------------------------- # Pure helpers @@ -362,7 +367,20 @@ def materialize_repository(spec: dict[str, Any]) -> None: if not cmd: continue print(f"Running build command in {workdir}: {cmd}") - subprocess.run(shlex.split(cmd), cwd=workdir, check=True) + try: + subprocess.run( + shlex.split(cmd), + cwd=workdir, + check=True, + timeout=BUILD_COMMAND_TIMEOUT, + ) + except subprocess.TimeoutExpired: + raise RuntimeError( + f"Build command timed out after {BUILD_COMMAND_TIMEOUT}s: {cmd!r}. " + "Build commands must terminate — if this is a long-running " + "server command (e.g. `npm run start:dev`), move it to the " + "Spawn command field instead." + ) except Exception as exc: print(f"materialize_repository error in {source_path}: {exc}") traceback.print_exc() @@ -577,9 +595,27 @@ def register_builtin_tools() -> None: register_builtin_tools() +# One bad provider must not crash startup — log and continue. Providers whose +# setup fails (e.g. a build command exits non-zero) are still registered if +# register_provider succeeded; broken tool invocations will surface the error +# at call time rather than preventing the whole server from coming up. for provider_spec in load_provider_specs(CONFIG_DIR): - register_provider(provider_spec) - run_provider_setup(provider_spec) + source_path = provider_spec.get("_config_path", "") + try: + register_provider(provider_spec) + except Exception as exc: + print(f"Skipping provider {source_path} — register_provider failed: {exc}") + traceback.print_exc() + continue + try: + run_provider_setup(provider_spec) + except Exception as exc: + print( + f"Provider {source_path}: setup failed ({exc}). " + "Tools are registered but their subprocess may not work until the " + "build / requirements / setup_commands are fixed (see the editor)." + ) + traceback.print_exc() # --------------------------------------------------------------------------- diff --git a/tests/test_process_runner.py b/tests/test_process_runner.py index 9c252fe..efb0859 100644 --- a/tests/test_process_runner.py +++ b/tests/test_process_runner.py @@ -1,4 +1,6 @@ """Tests for process_runner.py — cwd threading and session keying.""" +import pytest + import process_runner @@ -75,3 +77,30 @@ def test_no_env_keys_skips_file_read(self, tmp_path, monkeypatch): s = process_runner.ProcessSession("echo hi") env = s._build_env() # must not raise assert isinstance(env, dict) + + +class TestIntrospectStderrCapture: + """When the subprocess crashes during the handshake, the error message + should include stderr so the user can see the cause.""" + + @pytest.mark.asyncio + async def test_eof_error_includes_stderr_tail(self): + class FakeStdout: + async def readline(self): + return b"" + class FakeStderr: + def __init__(self, data): + self._data = data + async def read(self, n): + d, self._data = self._data[:n], self._data[n:] + return d + + session = process_runner.ProcessSession("does-not-matter") + class _Proc: + stdout = FakeStdout() + stderr = FakeStderr(b"Environment validation failed: KEY: Required\n") + session._proc = _Proc() + + with pytest.raises(EOFError) as exc_info: + await session._recv(timeout=1.0) + assert "Environment validation failed" in str(exc_info.value) diff --git a/tests/test_server.py b/tests/test_server.py index 6478700..3b7356c 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -990,3 +990,38 @@ async def call_tool(self, *a, **kw): return {"ok": True} import asyncio asyncio.run(entry["handler"](None)) assert captured["env_keys"] == ["A", "B"] + + +class TestBuildCommandTimeout: + """A build command that hangs must not deadlock server startup.""" + + def test_timeout_raises_helpful_runtime_error(self, tmp_path: Path): + import subprocess as sp + wd = str(tmp_path / "wd") + spec = { + "_config_path": str(tmp_path / "r.yaml"), + "repository": { + "url": "https://example.com/r.git", + "workdir": wd, + "build_commands": ["sleep 99999"], + }, + } + + def fake_run(args, **kwargs): + if args[0:2] == ["git", "clone"]: + Path(args[3]).mkdir(parents=True, exist_ok=True) + class _R: returncode = 0 + return _R() + # Build command — simulate a hang by raising TimeoutExpired. + raise sp.TimeoutExpired(args, kwargs.get("timeout", 1)) + + with patch("server.subprocess.run", side_effect=fake_run): + with pytest.raises(RuntimeError, match="Build command timed out"): + materialize_repository(spec) + + def test_timeout_constant_overridable_via_env(self, monkeypatch): + # The constant is read at module import — we can't easily re-import + # in the same process, but the symbol should exist and be an int. + import server as srv + assert isinstance(srv.BUILD_COMMAND_TIMEOUT, int) + assert srv.BUILD_COMMAND_TIMEOUT > 0