diff --git a/.dockerignore b/.dockerignore index 73321bf..fdec83a 100755 --- a/.dockerignore +++ b/.dockerignore @@ -17,5 +17,7 @@ __pycache__/ .github/ *.md -# Playwright MCP browser data +# Volume bind-mount targets — never copy into the build context +files/ +repos/ .playwright-mcp/ diff --git a/.gitignore b/.gitignore index 7072f33..a9256c5 100755 --- a/.gitignore +++ b/.gitignore @@ -31,5 +31,9 @@ dist/ # Local tool configs (not committed — mount into Docker at runtime) tools/ -# Playwright MCP browser data +# Local bind-mount targets for docker-compose.override.yml +files/ +repos/ + +# Legacy Playwright MCP output directory (replaced by ./files in docker-compose) .playwright-mcp/ diff --git a/Dockerfile b/Dockerfile index 3e2ba43..ace5117 100755 --- a/Dockerfile +++ b/Dockerfile @@ -27,6 +27,8 @@ COPY handlers/ ./handlers/ ENV MCP_TOOL_CONFIG_DIR=/app/tools ENV MCP_ENV_FILE=/app/.env +ENV MCPPROXY_FILES_DIR=/app/files +ENV MCPPROXY_REPOS_DIR=/app/repos EXPOSE 8888 8889 diff --git a/README.md b/README.md index f180c43..0f9774c 100755 --- a/README.md +++ b/README.md @@ -22,8 +22,8 @@ maintain separately, no changes to `server.py` needed when adding new tools. Two **built-in tools** (`mcpproxy__listfiles` and `mcpproxy__getfile`) are always registered without any YAML config. They give LLMs read-only access to a configurable directory -(default: `.playwright-mcp`) — useful for retrieving screenshots and snapshots produced -by package providers such as Playwright MCP. +(default: `/app/files`, mountable as a Docker volume) — useful for retrieving screenshots +and snapshots produced by package providers such as Playwright MCP. ## Tool names advertised to the LLM @@ -208,6 +208,27 @@ docker run -d --rm \ ghcr.io/billjr99/mcpproxy:latest ``` +**Run with persistent caches and artefacts** — add named volumes so cloned repos, +package caches, and provider output files survive container restarts: + +```bash +docker run -d --rm \ + -p 8888:8888 -p 8889:8889 \ + --env-file .env \ + -v "$(pwd)/tools":/app/tools \ + -v mcpproxy-files:/app/files \ + -v mcpproxy-repos:/app/repos \ + -v mcpproxy-cache:/root/.cache \ + -v mcpproxy-npm:/root/.npm \ + -v mcpproxy-uv-tools:/root/.local/share/uv \ + --name mcpproxy \ + ghcr.io/billjr99/mcpproxy:latest +``` + +Every volume above is optional — omit any subset and that path falls back to the +container's ephemeral writable layer. See **[Volumes & caching](#volumes--caching)** +below for what each one covers and the cold-start speedup it provides. + MCP endpoint: **`http://localhost:8888/mcp`** Web UI & OpenAI-compatible tools endpoint: **`http://localhost:8889`** @@ -312,6 +333,29 @@ Docker Compose reads `.env` via `env_file:`. The file is never copied into the i MCP_HOST_PORT=9000 UI_HOST_PORT=9001 docker compose up ``` +### Volumes & caching + +`docker-compose.yml` declares six named volumes. Only the first is required — +the rest persist caches and artefacts that would otherwise be re-downloaded +or re-built on every fresh container. + +| Container path | Volume | Holds | Without it (cold start) | +|---|---|---|---| +| `/app/tools` | `mcpproxy-tools` | Provider YAML configs | **Required** — the proxy has nothing to serve. | +| `/app/files` | `mcpproxy-files` | Provider output artefacts (Playwright screenshots, snapshots, …) surfaced via `mcpproxy__listfiles` / `mcpproxy__getfile` | Files vanish on container removal. | +| `/app/repos` | `mcpproxy-repos` | Cloned git workdirs + their build artefacts (`node_modules`, `dist`, …) for repository-mode providers | Re-clones and re-runs every `build_commands` on each start (seconds to several minutes per repo). | +| `/root/.cache` | `mcpproxy-cache` | XDG caches: pip wheels, uv wheels, Playwright browser binaries (`ms-playwright`) | pip/uvx re-download wheels; `npx playwright install chrome` re-fetches ~150 MB. | +| `/root/.npm` | `mcpproxy-npm` | npm/npx package cache | npx re-downloads packages from the npm registry on first call. | +| `/root/.local/share/uv` | `mcpproxy-uv-tools` | uvx per-tool venvs | uvx re-creates per-tool venvs from cached wheels. | + +In dev (`docker-compose.override.yml`), `mcpproxy-tools`, `mcpproxy-files`, and +`mcpproxy-repos` are replaced with bind mounts (`./tools`, `./files`, `./repos`) so +you can inspect or edit them from the host. The three cache volumes remain named +volumes even in dev — they're opaque package-manager state, not files you read. + +For ephemeral / CI runs, drop any subset of volumes — the proxy still works, +just slower on the first tool call after each cold start. + --- ## Connecting AI clients to this MCP server @@ -673,10 +717,13 @@ installed binary: ```yaml # ── npx (Node.js, no install needed) ───────────────────────────────────────── package: - command: npx @playwright/mcp@latest --headless --isolated + command: npx @playwright/mcp@latest --headless --isolated --output-dir /app/files/playwright setup_commands: - npx playwright install chrome # installs browser on every startup + # (cached in /root/.cache/ms-playwright via the + # mcpproxy-cache volume — only re-downloads on + # a fresh, unmounted container) tools: # Populated automatically when the wizard introspects the command — or fill manually @@ -849,16 +896,13 @@ commands, and the auto-discovered env keys list. |---|---|---| | `MCPPROXY_REPOS_DIR` | `/app/repos` | Base directory for cloned repos. | -For persistent build caches across container restarts, mount this directory as a -volume: +The default `docker-compose.yml` mounts the `mcpproxy-repos` named volume here +(or `./repos` in dev via the override file) so cloned trees and their build +artefacts (`node_modules`, `dist`, …) survive container restarts. See +[Volumes & caching](#volumes--caching) for the full list. -```yaml -volumes: - - mcpproxy-repos:/app/repos -``` - -Without a mount, every container start re-clones and re-builds — exactly what's wanted -for an ephemeral / disposable container. +Drop the volume entry for ephemeral / disposable containers — every container +start will re-clone and re-build into the container's writable layer. #### Lifecycle on container restart @@ -955,9 +999,22 @@ config file required: | `mcpproxy__listfiles` | List files and subdirectories inside the files base directory | | `mcpproxy__getfile` | Read a file from the files base directory (UTF-8 text or base64) | -**Default base directory:** `.playwright-mcp` relative to the server's working directory -(i.e. `/app/.playwright-mcp` inside Docker). Override with the `MCPPROXY_FILES_DIR` -environment variable. +**Default base directory:** `/app/files` inside Docker (mounted as the +`mcpproxy-files` named volume, or `./files` in dev — see +[Volumes & caching](#volumes--caching)). Override with the `MCPPROXY_FILES_DIR` +environment variable. `run_local.sh` automatically sets it to `./files` under the +repo root when running outside Docker. + +Each package provider should write its artefacts under its own subdirectory of +the base — e.g. Playwright is launched with +`npx @playwright/mcp@latest … --output-dir /app/files/playwright` so screenshots +land at `/app/files/playwright/screenshot.png`. + +> **Note (migrating from earlier versions):** the default was previously +> `.playwright-mcp` (relative to the cwd, i.e. `/app/.playwright-mcp` inside +> Docker). If you have a custom `tools/playwright.yaml`, either add the +> `--output-dir /app/files/playwright` flag to its spawn command, or set +> `MCPPROXY_FILES_DIR=/app/.playwright-mcp` to keep the old layout. Only files **inside** the base directory are accessible — path-traversal attempts (`../`) are rejected. @@ -965,10 +1022,11 @@ Only files **inside** the base directory are accessible — path-traversal attem #### Example workflow with Playwright 1. Ask the LLM to navigate to a page and take a screenshot via the Playwright MCP provider. -2. Playwright writes `screenshot.png` to `.playwright-mcp/`. -3. Ask the LLM to call `mcpproxy__listfiles` — it returns the file list. -4. Ask the LLM to call `mcpproxy__getfile` with `path="screenshot.png"` — it returns the - PNG as a base64 string that the LLM can describe or pass to a vision model. +2. Playwright writes `screenshot.png` to `/app/files/playwright/` (because its spawn + command includes `--output-dir /app/files/playwright`). +3. Ask the LLM to call `mcpproxy__listfiles` with `path="playwright"` — it returns the file list. +4. Ask the LLM to call `mcpproxy__getfile` with `path="playwright/screenshot.png"` — it returns + the PNG as a base64 string that the LLM can describe or pass to a vision model. #### `mcpproxy__listfiles` parameters @@ -994,13 +1052,16 @@ Returns an object with `ok`, `path`, `size`, `content`, and `encoding`. MCPPROXY_FILES_DIR=/app/data ``` -Or mount a volume at the target path so files persist across container restarts: +Or mount a different volume / host directory at the target path: ```yaml volumes: - - ./playwright-output:/app/.playwright-mcp + - ./playwright-output:/app/files # bind-mount host dir at the default location ``` +By default `docker-compose.yml` mounts the named volume `mcpproxy-files` at +`/app/files`, and `docker-compose.override.yml` swaps that for `./files` in dev. + --- ### YAML provider reference @@ -1018,7 +1079,7 @@ code: | # Python source — executed once at startup # Supports any command: npx, uvx, python -m, or an installed binary. package: - command: string # e.g. "npx @playwright/mcp@latest --isolated" + command: string # e.g. "npx @playwright/mcp@latest --isolated --output-dir /app/files/playwright" # "uvx mcp-server-fetch" # "python -m mcp_server_github" # "mcp-server-github" diff --git a/builtin_tools.py b/builtin_tools.py index 14d76c9..b671dae 100644 --- a/builtin_tools.py +++ b/builtin_tools.py @@ -5,9 +5,9 @@ mcpproxy__listfiles List files/directories inside the mcpproxy files dir. mcpproxy__getfile Read a file from the mcpproxy files dir (text or base64). -The *base directory* defaults to ``.playwright-mcp`` (relative to the server's -working directory) and can be overridden at runtime with the -``MCPPROXY_FILES_DIR`` environment variable. Only files **inside** the base +The *base directory* defaults to ``/app/files`` (mounted as a Docker volume so +artefacts persist across container restarts) and can be overridden at runtime with +the ``MCPPROXY_FILES_DIR`` environment variable. Only files **inside** the base directory are accessible — path-traversal attempts are rejected. """ @@ -23,7 +23,7 @@ def _base_dir() -> Path: Evaluated on each call so that tests can override MCPPROXY_FILES_DIR with monkeypatch without restarting the process. """ - raw = os.environ.get("MCPPROXY_FILES_DIR", ".playwright-mcp") + raw = os.environ.get("MCPPROXY_FILES_DIR", "/app/files") return Path(raw).resolve() diff --git a/config.py b/config.py index 7695dec..e27a737 100644 --- a/config.py +++ b/config.py @@ -7,10 +7,12 @@ SERVER_NAME = os.environ.get("MCP_SERVER_NAME", "local-config-driven-mcp") # Base directory exposed by the built-in mcpproxy__listfiles / mcpproxy__getfile tools. -# Defaults to .playwright-mcp (relative to the server's working directory) so that -# screenshots and snapshots produced by the Playwright MCP package provider are -# immediately accessible. Override with MCPPROXY_FILES_DIR. -FILES_DIR = Path(os.environ.get("MCPPROXY_FILES_DIR", ".playwright-mcp")) +# Defaults to /app/files inside Docker so the directory can be mounted as a volume to +# persist screenshots, snapshots, and other artefacts produced by package providers +# (e.g. Playwright MCP writing under /app/files/playwright when launched with +# `--output-dir /app/files/playwright`). Override with MCPPROXY_FILES_DIR (run_local.sh +# sets it to ./files for local non-Docker runs). +FILES_DIR = Path(os.environ.get("MCPPROXY_FILES_DIR", "/app/files")) # Base directory where repository providers clone their git repos. Each # provider gets a subdirectory named after the provider (e.g. /app/repos/linkedin). diff --git a/docker-compose.override.yml b/docker-compose.override.yml index 2cc4fc1..672e9d5 100755 --- a/docker-compose.override.yml +++ b/docker-compose.override.yml @@ -1,11 +1,23 @@ -# Development override — replaces named volumes with local bind mounts. +# Development override — replaces user-facing named volumes with local bind mounts. # Docker Compose merges this file automatically when you run `docker compose up`. # Do NOT commit a .env file; copy .env.example → .env and fill in your secrets. # +# Bind-mounted in dev (so you can inspect / edit on the host): +# ./tools → /app/tools — provider YAML configs you're editing +# ./files → /app/files — screenshots, snapshots, anything providers emit +# ./repos → /app/repos — cloned repo workdirs (build artefacts visible locally) +# +# Left as named volumes in dev (cleanliness — these are opaque caches): +# mcpproxy-cache → /root/.cache +# mcpproxy-npm → /root/.npm +# mcpproxy-uv-tools → /root/.local/share/uv +# # handlers/ is baked into the image and does not need a mount. services: mcp-host: volumes: - ./tools:/app/tools + - ./files:/app/files + - ./repos:/app/repos - ./.env:/app/.env diff --git a/docker-compose.yml b/docker-compose.yml index 033bf25..eeece99 100755 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,4 @@ -# Base compose file — uses named Docker volumes for tools. +# Base compose file — uses named Docker volumes for tools, artefacts, and caches. # Suitable for production / CI / any environment without the repo checked out. # # handlers/ is baked into the image — no volume needed. @@ -6,6 +6,24 @@ # Populate tools volume before first run: # docker run --rm -v mcpproxy-tools:/dst -v "$(pwd)/tools":/src alpine cp -r /src/. /dst/ # +# Volumes +# ─────── +# mcpproxy-tools /app/tools — provider YAML configs (required) +# mcpproxy-files /app/files — provider output artefacts surfaced via +# mcpproxy__listfiles / mcpproxy__getfile +# (e.g. Playwright screenshots under +# /app/files/playwright) +# mcpproxy-repos /app/repos — git workdirs + build artefacts for +# repository-mode providers +# mcpproxy-cache /root/.cache — XDG cache (pip wheels, uv wheels, +# Playwright browser binaries, …) +# mcpproxy-npm /root/.npm — npm/npx package cache +# mcpproxy-uv-tools /root/.local/share/uv — uvx per-tool venvs +# +# Every cache/artefact volume is optional in spirit — remove the entry and the +# container falls back to ephemeral storage on its writable layer (re-clones, +# re-downloads, re-builds on each fresh container). +# # For local development, docker-compose.override.yml is merged automatically. services: @@ -19,9 +37,21 @@ services: environment: MCP_TOOL_CONFIG_DIR: "/app/tools" MCP_ENV_FILE: "/app/.env" + MCPPROXY_FILES_DIR: "/app/files" + MCPPROXY_REPOS_DIR: "/app/repos" volumes: - mcpproxy-tools:/app/tools + - mcpproxy-files:/app/files + - mcpproxy-repos:/app/repos + - mcpproxy-cache:/root/.cache + - mcpproxy-npm:/root/.npm + - mcpproxy-uv-tools:/root/.local/share/uv - ./.env:/app/.env volumes: mcpproxy-tools: + mcpproxy-files: + mcpproxy-repos: + mcpproxy-cache: + mcpproxy-npm: + mcpproxy-uv-tools: diff --git a/run_local.sh b/run_local.sh index d128401..2295cc7 100755 --- a/run_local.sh +++ b/run_local.sh @@ -327,6 +327,10 @@ set +a export MCP_TOOL_CONFIG_DIR="$ROOT_DIR/tools" # local path always wins export MCP_SERVER_NAME="${MCP_SERVER_NAME:-mcpproxy}" export MCP_ENV_FILE="$ENV_FILE" +# Local-friendly defaults for the files / repos directories. In Docker these +# default to /app/files and /app/repos (see Dockerfile + docker-compose.yml). +export MCPPROXY_FILES_DIR="${MCPPROXY_FILES_DIR:-$ROOT_DIR/files}" +export MCPPROXY_REPOS_DIR="${MCPPROXY_REPOS_DIR:-$ROOT_DIR/repos}" unset MCP_REPOS_DIR # no longer used # ───────────────────────────────────────────────────────────────────────────── diff --git a/server.py b/server.py index 27b73a4..8721c45 100755 --- a/server.py +++ b/server.py @@ -512,7 +512,7 @@ def register_builtin_tools() -> None: """Register the mcpproxy__listfiles and mcpproxy__getfile utility tools. These tools expose read-only access to the files directory (default: - ``.playwright-mcp``, override with ``MCPPROXY_FILES_DIR``). They are + ``/app/files``, override with ``MCPPROXY_FILES_DIR``). They are always registered regardless of what YAML providers are loaded, giving LLMs a way to retrieve screenshots, JSON snapshots, and other files produced by package providers such as the Playwright MCP server. @@ -525,7 +525,7 @@ def register_builtin_tools() -> None: "name": "mcpproxy__listfiles", "description": ( "List files and directories inside the mcpproxy files directory " - "(default: .playwright-mcp, override with MCPPROXY_FILES_DIR). " + "(default: /app/files, override with MCPPROXY_FILES_DIR). " "Use this to discover screenshots, JSON snapshots, and other files " "produced by package providers such as the Playwright MCP server. " "Pass a subdirectory path to drill down." @@ -553,7 +553,7 @@ def register_builtin_tools() -> None: "name": "mcpproxy__getfile", "description": ( "Read the contents of a file from the mcpproxy files directory " - "(default: .playwright-mcp). " + "(default: /app/files). " "Returns UTF-8 text for text files (JSON, HTML, Markdown, …) or " "base64-encoded bytes for binary files (PNG screenshots, …). " "Use mcpproxy__listfiles first to discover available file paths." diff --git a/tests/test_builtin_tools.py b/tests/test_builtin_tools.py index 1a38351..48dba96 100644 --- a/tests/test_builtin_tools.py +++ b/tests/test_builtin_tools.py @@ -1,7 +1,7 @@ """Tests for builtin_tools.py — mcpproxy__listfiles and mcpproxy__getfile. These tests monkeypatch MCPPROXY_FILES_DIR to a fresh temp directory so -they never touch the real .playwright-mcp directory. +they never touch the real files directory (default /app/files in Docker). """ import base64 import os @@ -325,3 +325,24 @@ def test_register_builtin_tools_succeeds(self, monkeypatch): def test_builtin_tools_exported(self): from builtin_tools import get_file, list_files, _base_dir, _safe_resolve assert all(callable(f) for f in (get_file, list_files, _base_dir, _safe_resolve)) + + +# --------------------------------------------------------------------------- +# Default base directory (/app/files) +# --------------------------------------------------------------------------- + +class TestDefaultBaseDir: + """Verify the default files directory is /app/files (mountable as a Docker volume).""" + + def test_default_is_app_files(self, monkeypatch): + monkeypatch.delenv("MCPPROXY_FILES_DIR", raising=False) + from builtin_tools import _base_dir + assert _base_dir() == Path("/app/files").resolve() + + def test_config_default_matches(self, monkeypatch): + """config.FILES_DIR is re-imported under the same default.""" + monkeypatch.delenv("MCPPROXY_FILES_DIR", raising=False) + import importlib + import config + importlib.reload(config) + assert config.FILES_DIR == Path("/app/files") diff --git a/tests/test_mcp_client.sh b/tests/test_mcp_client.sh index bf868bd..a639a37 100755 --- a/tests/test_mcp_client.sh +++ b/tests/test_mcp_client.sh @@ -899,7 +899,7 @@ def _extract(rpc): # 1. Call mcpproxy__listfiles (root directory) listing = _extract(_call_tool('mcpproxy__listfiles', {})) entries = listing.get('entries', []) -base_dir = listing.get('base_dir', '.playwright-mcp') +base_dir = listing.get('base_dir', '/app/files') # 2. Fetch every file entry files_fetched = [] @@ -936,7 +936,7 @@ python3 -c " import json, sys try: d = json.load(open('${FILES_DATA}', encoding='utf-8')) - base_dir = d.get('base_dir', '.playwright-mcp') + base_dir = d.get('base_dir', '/app/files') entries = d.get('entries', []) files = d.get('files', []) @@ -993,7 +993,7 @@ files_data = json.loads(Path(sys.argv[3]).read_text(encoding='utf-8')) out = Path(sys.argv[4]) model = sys.argv[5] -base_dir = files_data.get('base_dir', '.playwright-mcp') +base_dir = files_data.get('base_dir', '/app/files') files = files_data.get('files', []) sections = []