From 16e09f63394359b83eff2c64cb4394214210b6be Mon Sep 17 00:00:00 2001
From: mogery <mogery@sideguide.dev>
Date: Tue, 21 Apr 2026 18:15:39 +0100
Subject: [PATCH 01/10] feat: parse command

---
 src/commands/parse.ts | 239 ++++++++++++++++++++++++++++++++++++++++++
 src/index.ts          |  94 +++++++++++++++++
 src/types/parse.ts    |  42 ++++++++
 3 files changed, 375 insertions(+)
 create mode 100644 src/commands/parse.ts
 create mode 100644 src/types/parse.ts

diff --git a/src/commands/parse.ts b/src/commands/parse.ts
new file mode 100644
index 000000000..1fd481fc9
--- /dev/null
+++ b/src/commands/parse.ts
@@ -0,0 +1,239 @@
+/**
+ * Parse command implementation
+ *
+ * Uploads a local file to the Firecrawl /v2/parse endpoint and returns the
+ * parsed document in the requested format(s). Supported file types:
+ *   .html, .htm, .pdf, .docx, .doc, .odt, .rtf, .xlsx, .xls
+ */
+
+import * as fs from 'fs';
+import * as path from 'path';
+import type { FormatOption } from '@mendable/firecrawl-js';
+import type { ParseOptions, ParseResult } from '../types/parse';
+import type { ScrapeFormat } from '../types/scrape';
+import { getClient } from '../utils/client';
+import { getConfig, validateConfig } from '../utils/config';
+import { handleScrapeOutput } from '../utils/output';
+
+const DEFAULT_API_URL = 'https://api.firecrawl.dev';
+
+/** File extensions accepted by /v2/parse (mirrors the API controller). */
+const SUPPORTED_EXTENSIONS = new Set([
+  '.html',
+  '.htm',
+  '.pdf',
+  '.docx',
+  '.doc',
+  '.odt',
+  '.rtf',
+  '.xlsx',
+  '.xls',
+]);
+
+/**
+ * Best-effort content-type lookup so the API's kind detector has a hint
+ * even if the extension is ambiguous.
+ */
+const CONTENT_TYPE_BY_EXT: Record<string, string> = {
+  '.html': 'text/html',
+  '.htm': 'text/html',
+  '.pdf': 'application/pdf',
+  '.docx':
+    'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+  '.doc': 'application/msword',
+  '.odt': 'application/vnd.oasis.opendocument.text',
+  '.rtf': 'application/rtf',
+  '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+  '.xls': 'application/vnd.ms-excel',
+};
+
+function outputTiming(
+  options: ParseOptions,
+  requestStartTime: number,
+  requestEndTime: number,
+  error?: Error | unknown
+): void {
+  if (!options.timing) return;
+
+  const duration = requestEndTime - requestStartTime;
+  const info: Record<string, string> = {
+    file: options.file,
+    requestTime: new Date(requestStartTime).toISOString(),
+    duration: `${duration}ms`,
+    status: error ? 'error' : 'success',
+  };
+  if (error) {
+    info.error = error instanceof Error ? error.message : 'Unknown error';
+  }
+  console.error('Timing:', JSON.stringify(info, null, 2));
+}
+
+/**
+ * Build the `formats` array sent to the API (mirrors scrape's behavior).
+ */
+function buildFormats(options: ParseOptions): FormatOption[] {
+  const formats: FormatOption[] = [];
+
+  if (options.formats && options.formats.length > 0) {
+    formats.push(...options.formats);
+  }
+
+  if (options.query) {
+    formats.push({ type: 'query', prompt: options.query } as any);
+  }
+
+  if (formats.length === 0) {
+    formats.push('markdown');
+  }
+
+  return formats;
+}
+
+/**
+ * Build the JSON `options` payload uploaded alongside the file.
+ */
+function buildOptionsPayload(options: ParseOptions): Record<string, unknown> {
+  const payload: Record<string, unknown> = {
+    formats: buildFormats(options),
+    integration: 'cli',
+  };
+
+  if (options.onlyMainContent !== undefined) {
+    payload.onlyMainContent = options.onlyMainContent;
+  }
+  if (options.includeTags && options.includeTags.length > 0) {
+    payload.includeTags = options.includeTags;
+  }
+  if (options.excludeTags && options.excludeTags.length > 0) {
+    payload.excludeTags = options.excludeTags;
+  }
+  if (options.timeout !== undefined) {
+    payload.timeout = options.timeout;
+  }
+  if (options.location) {
+    payload.location = options.location;
+  }
+
+  return payload;
+}
+
+/**
+ * Execute the parse command by POSTing a multipart upload to /v2/parse.
+ */
+export async function executeParse(
+  options: ParseOptions
+): Promise<ParseResult> {
+  const filePath = path.resolve(options.file);
+
+  if (!fs.existsSync(filePath)) {
+    return {
+      success: false,
+      error: `File not found: ${options.file}`,
+    };
+  }
+
+  const stat = fs.statSync(filePath);
+  if (!stat.isFile()) {
+    return {
+      success: false,
+      error: `Not a file: ${options.file}`,
+    };
+  }
+
+  const ext = path.extname(filePath).toLowerCase();
+  if (!SUPPORTED_EXTENSIONS.has(ext)) {
+    return {
+      success: false,
+      error:
+        `Unsupported file type "${ext || '(none)'}". ` +
+        `Supported extensions: ${[...SUPPORTED_EXTENSIONS].join(', ')}`,
+    };
+  }
+
+  // Ensure auth/url is resolved through the same config pipeline scrape uses.
+  if (options.apiKey || options.apiUrl) {
+    getClient({ apiKey: options.apiKey, apiUrl: options.apiUrl });
+  }
+
+  const config = getConfig();
+  const apiKey = options.apiKey || config.apiKey;
+  validateConfig(apiKey);
+
+  const apiUrl = (options.apiUrl || config.apiUrl || DEFAULT_API_URL).replace(
+    /\/$/,
+    ''
+  );
+
+  const buffer = fs.readFileSync(filePath);
+  const filename = path.basename(filePath);
+  const contentType = CONTENT_TYPE_BY_EXT[ext] ?? 'application/octet-stream';
+
+  const form = new FormData();
+  form.append(
+    'file',
+    new Blob([new Uint8Array(buffer)], { type: contentType }),
+    filename
+  );
+  form.append('options', JSON.stringify(buildOptionsPayload(options)));
+
+  const requestStartTime = Date.now();
+
+  try {
+    const response = await fetch(`${apiUrl}/v2/parse`, {
+      method: 'POST',
+      headers: apiKey ? { Authorization: `Bearer ${apiKey}` } : undefined,
+      body: form,
+    });
+
+    const requestEndTime = Date.now();
+    outputTiming(options, requestStartTime, requestEndTime);
+
+    const payload = (await response.json().catch(() => ({}))) as any;
+
+    if (!response.ok || payload?.success === false) {
+      const message =
+        payload?.error ||
+        `HTTP ${response.status}: ${response.statusText || 'Request failed'}`;
+      return { success: false, error: message };
+    }
+
+    return {
+      success: true,
+      data: payload?.data ?? payload,
+    };
+  } catch (error) {
+    const requestEndTime = Date.now();
+    outputTiming(options, requestStartTime, requestEndTime, error);
+    return {
+      success: false,
+      error: error instanceof Error ? error.message : 'Unknown error occurred',
+    };
+  }
+}
+
+/**
+ * Handle parse command output. Reuses the scrape output formatter since the
+ * /v2/parse response shape matches /v2/scrape.
+ */
+export async function handleParseCommand(options: ParseOptions): Promise<void> {
+  const result = await executeParse(options);
+
+  if (options.query && result.success && result.data?.answer) {
+    const { writeOutput } = await import('../utils/output');
+    writeOutput(result.data.answer, options.output, !!options.output);
+    return;
+  }
+
+  const effectiveFormats: ScrapeFormat[] =
+    options.formats && options.formats.length > 0
+      ? [...options.formats]
+      : ['markdown'];
+
+  handleScrapeOutput(
+    result,
+    effectiveFormats,
+    options.output,
+    options.pretty,
+    options.json
+  );
+}
diff --git a/src/index.ts b/src/index.ts
index 1faec0138..87076889b 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -16,6 +16,7 @@ import { configure, viewConfig } from './commands/config';
 import { handleCreditUsageCommand } from './commands/credit-usage';
 import { handleCrawlCommand } from './commands/crawl';
 import { handleMapCommand } from './commands/map';
+import { handleParseCommand } from './commands/parse';
 import { handleSearchCommand } from './commands/search';
 import { handleAgentCommand } from './commands/agent';
 import {
@@ -61,6 +62,7 @@ const AUTH_REQUIRED_COMMANDS = [
   'download',
   'crawl',
   'map',
+  'parse',
   'search',
   'agent',
   'browser',
@@ -498,6 +500,97 @@ function createMapCommand(): Command {
   return mapCmd;
 }
 
+/**
+ * Create and configure the parse command
+ */
+function createParseCommand(): Command {
+  const parseCmd = new Command('parse')
+    .description(
+      'Parse a local file (HTML, PDF, DOCX, DOC, ODT, RTF, XLSX, XLS) into markdown, HTML, links, JSON, and more. Uses /v2/parse.'
+    )
+    .argument('<file>', 'Path to the local file to parse')
+    .option('-H, --html', 'Output raw HTML (shortcut for --format html)')
+    .option(
+      '-f, --format <formats>',
+      'Output format(s). Multiple formats can be specified with commas (e.g., "markdown,links"). Available: markdown, html, rawHtml, links, images, summary, json, attributes. Single format outputs raw content; multiple formats output JSON.'
+    )
+    .option('--only-main-content', 'Include only main content', false)
+    .option('-S, --summary', 'Output summary (shortcut for --format summary)')
+    .option('--include-tags <tags>', 'Comma-separated list of tags to include')
+    .option('--exclude-tags <tags>', 'Comma-separated list of tags to exclude')
+    .option(
+      '--timeout <ms>',
+      'Timeout in milliseconds for the parse job',
+      parseInt
+    )
+    .option(
+      '-Q, --query <prompt>',
+      'Ask a question about the parsed content (query format)'
+    )
+    .option(
+      '-k, --api-key <key>',
+      'Firecrawl API key (overrides global --api-key)'
+    )
+    .option('--api-url <url>', 'API URL (overrides global --api-url)')
+    .option('-o, --output <path>', 'Output file path (default: stdout)')
+    .option('--json', 'Output as JSON format', false)
+    .option('--pretty', 'Pretty print JSON output', false)
+    .option(
+      '--timing',
+      'Show request timing and other useful information',
+      false
+    )
+    .addHelpText(
+      'after',
+      `
+Examples:
+  $ firecrawl parse ./report.pdf
+  $ firecrawl parse ./report.pdf -f markdown,links
+  $ firecrawl parse ./page.html -H
+  $ firecrawl parse ./contract.docx --only-main-content
+  $ firecrawl parse ./report.pdf -Q "What is the total revenue?"
+  $ firecrawl parse ./report.pdf --json --pretty -o report.json
+
+Supported file types: .html, .htm, .pdf, .docx, .doc, .odt, .rtf, .xlsx, .xls
+Max upload size: 50 MB
+`
+    )
+    .action(async (file: string, options) => {
+      let format: string | undefined;
+      if (options.html) {
+        format = 'html';
+      } else if (options.summary) {
+        format = 'summary';
+      } else if (options.format) {
+        format = options.format;
+      }
+
+      const scrapeOptions = parseScrapeOptions({
+        ...options,
+        url: 'file://' + file,
+        format: format ?? 'markdown',
+      });
+
+      await handleParseCommand({
+        file,
+        formats: scrapeOptions.formats,
+        onlyMainContent: scrapeOptions.onlyMainContent,
+        includeTags: scrapeOptions.includeTags,
+        excludeTags: scrapeOptions.excludeTags,
+        timeout: options.timeout,
+        apiKey: options.apiKey,
+        apiUrl: options.apiUrl,
+        output: options.output,
+        pretty: options.pretty,
+        json: options.json,
+        timing: options.timing,
+        query: options.query,
+      });
+    });
+
+  return parseCmd;
+}
+
 /**
  * Create and configure the search command
  */
@@ -1185,6 +1278,7 @@ Examples:
 // Add core commands to main program
 program.addCommand(createCrawlCommand());
 program.addCommand(createMapCommand());
+program.addCommand(createParseCommand());
 program.addCommand(createSearchCommand());
 program.addCommand(createAgentCommand());
 program.addCommand(createInteractCommand());
diff --git a/src/types/parse.ts b/src/types/parse.ts
new file mode 100644
index 000000000..7499ef541
--- /dev/null
+++ b/src/types/parse.ts
@@ -0,0 +1,42 @@
+/**
+ * Types and interfaces for the parse command
+ */
+
+import type { ScrapeFormat, ScrapeLocation } from './scrape';
+
+export interface ParseOptions {
+  /** Local file path to parse */
+  file: string;
+  /** Output format(s) */
+  formats?: ScrapeFormat[];
+  /** Include only main content */
+  onlyMainContent?: boolean;
+  /** Include tags */
+  includeTags?: string[];
+  /** Exclude tags */
+  excludeTags?: string[];
+  /** Timeout in milliseconds for the parse job */
+  timeout?: number;
+  /** API key for Firecrawl */
+  apiKey?: string;
+  /** API URL for Firecrawl */
+  apiUrl?: string;
+  /** Output file path */
+  output?: string;
+  /** Pretty print JSON output */
+  pretty?: boolean;
+  /** Force JSON output */
+  json?: boolean;
+  /** Show request timing */
+  timing?: boolean;
+  /** Location for geo-targeted parsing (typically unused for local files) */
+  location?: ScrapeLocation;
+  /** Ask a question about the parsed content (query format) */
+  query?: string;
+}
+
+export interface ParseResult {
+  success: boolean;
+  data?: any;
+  error?: string;
+}

From 198b539648d7f0fd40ee7bb03400974468127aad Mon Sep 17 00:00:00 2001
From: Developers Digest <124798203+developersdigest@users.noreply.github.com>
Date: Wed, 22 Apr 2026 09:21:01 -0400
Subject: [PATCH 02/10] add firecrawl-parse skill

covers local file parsing (pdf/docx/xlsx/etc) with summary and query shortcuts. registers in firecrawl-cli hub.
---
 skills/firecrawl-cli/SKILL.md   |  2 +
 skills/firecrawl-parse/SKILL.md | 69 +++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)
 create mode 100644 skills/firecrawl-parse/SKILL.md

diff --git a/skills/firecrawl-cli/SKILL.md b/skills/firecrawl-cli/SKILL.md
index 4664c6207..8d7e6a7c7 100644
--- a/skills/firecrawl-cli/SKILL.md
+++ b/skills/firecrawl-cli/SKILL.md
@@ -62,6 +62,7 @@ Follow this escalation pattern:
 | AI-powered data extraction  | `agent`               | Need structured data from complex sites                   |
 | Interact with a page        | `scrape` + `interact` | Content requires clicks, form fills, pagination, or login |
 | Download a site to files    | `download`            | Save an entire site as local files                        |
+| Parse a local file          | `parse`               | File on disk (PDF, DOCX, XLSX, etc.) — not a URL          |
 
 For detailed command reference, run `firecrawl <command> --help`.
 
@@ -85,6 +86,7 @@ For detailed command reference, run `firecrawl <command> --help`.
 - **AI-powered structured extraction from complex sites** -> [firecrawl-agent](../firecrawl-agent/SKILL.md)
 - **Clicks, forms, login, pagination, or post-scrape browser actions** -> [firecrawl-interact](../firecrawl-interact/SKILL.md)
 - **Downloading a site to local files** -> [firecrawl-download](../firecrawl-download/SKILL.md)
+- **Parsing a local file (PDF, DOCX, XLSX, HTML, etc.)** -> [firecrawl-parse](../firecrawl-parse/SKILL.md)
 - **Install, auth, or setup problems** -> [rules/install.md](rules/install.md)
 - **Output handling and safe file-reading patterns** -> [rules/security.md](rules/security.md)
 - **Integrating Firecrawl into an app, adding `FIRECRAWL_API_KEY` to `.env`, or choosing endpoint usage in product code** -> use the `firecrawl-build` skills (already installed alongside this CLI skill)
diff --git a/skills/firecrawl-parse/SKILL.md b/skills/firecrawl-parse/SKILL.md
new file mode 100644
index 000000000..3a38b5711
--- /dev/null
+++ b/skills/firecrawl-parse/SKILL.md
@@ -0,0 +1,69 @@
+---
+name: firecrawl-parse
+description: |
+  Convert a local file (PDF, DOCX, DOC, ODT, RTF, XLSX, XLS, HTML) into clean markdown, HTML, or structured JSON. Use this skill when the user points at a file on disk and wants its content extracted — says "parse this PDF", "convert this Word doc", "read this file", "extract text from", "PDF to markdown", "DOCX to markdown", or provides a local path (not a URL). Also supports AI summary and query ("what does this PDF say about X?"). Use this instead of `scrape` for anything on the local filesystem.
+allowed-tools:
+  - Bash(firecrawl *)
+  - Bash(npx firecrawl *)
+---
+
+# firecrawl parse
+
+Parse a local file into clean, LLM-optimized markdown. Supported formats: **HTML, PDF, DOCX, DOC, ODT, RTF, XLSX, XLS**.
+
+## When to use
+
+- You have a file on disk (not a URL) and want its text
+- User drops a PDF/DOCX and asks what it says, or to summarize it
+- You need markdown from a Word doc, spreadsheet, or PDF to feed into other tools
+- Use `scrape` instead when the source is a URL
+
+## Quick start
+
+```bash
+# Basic — PDF/DOCX/etc. to markdown
+firecrawl parse ./paper.pdf -o .firecrawl/paper.md
+
+# Summary shortcut (AI-generated overview)
+firecrawl parse ./paper.pdf -S -o .firecrawl/summary.md
+
+# Ask a question about the doc
+firecrawl parse ./paper.pdf -Q "What are the main conclusions?"
+
+# Multiple formats → JSON bundle (markdown + links + summary + metadata)
+firecrawl parse ./paper.pdf -f markdown,links,summary --pretty -o .firecrawl/paper.json
+
+# Raw HTML output
+firecrawl parse ./paper.pdf -H -o .firecrawl/paper.html
+```
+
+## Options
+
+| Option                   | Description                                                                                         |
+| ------------------------ | --------------------------------------------------------------------------------------------------- |
+| `-f, --format <formats>` | Output formats (comma-separated): markdown, html, rawHtml, links, images, summary, json, attributes |
+| `-S, --summary`          | Shortcut for `--format summary` (AI summary)                                                        |
+| `-H, --html`             | Shortcut for `--format html` (raw HTML)                                                             |
+| `-Q, --query <prompt>`   | Ask a question about the parsed content                                                             |
+| `--only-main-content`    | Strip boilerplate, main content only                                                                |
+| `--include-tags <tags>`  | Only include these HTML tags                                                                        |
+| `--exclude-tags <tags>`  | Exclude these HTML tags                                                                             |
+| `--timeout <ms>`         | Timeout for the parse job                                                                           |
+| `-o, --output <path>`    | Output file path (default: stdout)                                                                  |
+| `--json`                 | Force JSON output                                                                                   |
+| `--pretty`               | Pretty-print JSON                                                                                   |
+| `--timing`               | Show request duration                                                                               |
+
+## Tips
+
+- **Scrape vs parse**: `scrape` takes a URL, `parse` takes a local file path. A remote PDF URL can still go through `scrape`.
+- **Single vs multi format**: one `--format` value returns raw content; multiple return JSON with keys for each format.
+- **Quote paths with spaces**: `firecrawl parse "./My Doc.pdf"`.
+- **PDFs may return empty `links`/`images`** — PDF structure doesn't always carry link/image metadata like HTML does. That's expected, not a failure.
+- **Large docs**: parse time scales with file size. A ~50-page PDF takes ~10s. Use `--timing` to check.
+- **Query vs save-and-grep**: `-Q` is convenient for single questions. For deeper analysis, save to file first, then `grep`/read the markdown.
+
+## See also
+
+- [firecrawl-scrape](../firecrawl-scrape/SKILL.md) — same idea but for URLs
+- [firecrawl-download](../firecrawl-download/SKILL.md) — bulk save a site as local files (which you can then parse)

From 031020fa243ab24784e27fb2f87560c1099926db Mon Sep 17 00:00:00 2001
From: Developers Digest <124798203+developersdigest@users.noreply.github.com>
Date: Wed, 22 Apr 2026 09:28:08 -0400
Subject: [PATCH 03/10] lead parse skill with markdown, demote format matrix

markdown is the 90% use case. moved formats table to secondary 'other formats' section. kept format availability caveat (links/images empty for pdfs).
---
 skills/firecrawl-parse/SKILL.md | 66 +++++++++++++++++----------------
 1 file changed, 35 insertions(+), 31 deletions(-)

diff --git a/skills/firecrawl-parse/SKILL.md b/skills/firecrawl-parse/SKILL.md
index 3a38b5711..c3f78568d 100644
--- a/skills/firecrawl-parse/SKILL.md
+++ b/skills/firecrawl-parse/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: firecrawl-parse
 description: |
-  Convert a local file (PDF, DOCX, DOC, ODT, RTF, XLSX, XLS, HTML) into clean markdown, HTML, or structured JSON. Use this skill when the user points at a file on disk and wants its content extracted — says "parse this PDF", "convert this Word doc", "read this file", "extract text from", "PDF to markdown", "DOCX to markdown", or provides a local path (not a URL). Also supports AI summary and query ("what does this PDF say about X?"). Use this instead of `scrape` for anything on the local filesystem.
+  Convert a local file (PDF, DOCX, DOC, ODT, RTF, XLSX, XLS, HTML) into clean markdown. Use this skill when the user points at a file on disk and wants its content — says "parse this PDF", "convert this Word doc", "read this file", "extract text from", "PDF to markdown", "DOCX to markdown", or provides a local path (not a URL). Also supports AI summary and Q&A. Use this instead of `scrape` for anything on the local filesystem.
 allowed-tools:
   - Bash(firecrawl *)
   - Bash(npx firecrawl *)
@@ -9,61 +9,65 @@ allowed-tools:
 
 # firecrawl parse
 
-Parse a local file into clean, LLM-optimized markdown. Supported formats: **HTML, PDF, DOCX, DOC, ODT, RTF, XLSX, XLS**.
+Turn any local document into clean markdown. Supported file types: **PDF, DOCX, DOC, ODT, RTF, XLSX, XLS, HTML**.
 
 ## When to use
 
-- You have a file on disk (not a URL) and want its text
+- You have a file on disk (not a URL) and want its text as markdown
 - User drops a PDF/DOCX and asks what it says, or to summarize it
-- You need markdown from a Word doc, spreadsheet, or PDF to feed into other tools
+- You need a Word doc, spreadsheet, or PDF as markdown to feed into other tools
 - Use `scrape` instead when the source is a URL
 
 ## Quick start
 
 ```bash
-# Basic — PDF/DOCX/etc. to markdown
+# Any file → clean markdown
 firecrawl parse ./paper.pdf -o .firecrawl/paper.md
 
-# Summary shortcut (AI-generated overview)
+# AI summary
 firecrawl parse ./paper.pdf -S -o .firecrawl/summary.md
 
 # Ask a question about the doc
 firecrawl parse ./paper.pdf -Q "What are the main conclusions?"
-
-# Multiple formats → JSON bundle (markdown + links + summary + metadata)
-firecrawl parse ./paper.pdf -f markdown,links,summary --pretty -o .firecrawl/paper.json
-
-# Raw HTML output
-firecrawl parse ./paper.pdf -H -o .firecrawl/paper.html
 ```
 
+That covers almost every case. The rest below is for when you need more.
+
 ## Options
 
-| Option                   | Description                                                                                         |
-| ------------------------ | --------------------------------------------------------------------------------------------------- |
-| `-f, --format <formats>` | Output formats (comma-separated): markdown, html, rawHtml, links, images, summary, json, attributes |
-| `-S, --summary`          | Shortcut for `--format summary` (AI summary)                                                        |
-| `-H, --html`             | Shortcut for `--format html` (raw HTML)                                                             |
-| `-Q, --query <prompt>`   | Ask a question about the parsed content                                                             |
-| `--only-main-content`    | Strip boilerplate, main content only                                                                |
-| `--include-tags <tags>`  | Only include these HTML tags                                                                        |
-| `--exclude-tags <tags>`  | Exclude these HTML tags                                                                             |
-| `--timeout <ms>`         | Timeout for the parse job                                                                           |
-| `-o, --output <path>`    | Output file path (default: stdout)                                                                  |
-| `--json`                 | Force JSON output                                                                                   |
-| `--pretty`               | Pretty-print JSON                                                                                   |
-| `--timing`               | Show request duration                                                                               |
+| Option                 | Description                                      |
+| ---------------------- | ------------------------------------------------ |
+| `-S, --summary`        | AI-generated summary (shortcut for `-f summary`) |
+| `-Q, --query <prompt>` | Ask a question about the parsed content          |
+| `-o, --output <path>`  | Output file path (default: stdout)               |
+| `--only-main-content`  | Strip boilerplate                                |
+| `--timing`             | Show request duration                            |
+
+## Other formats
+
+Default output is markdown. Pass `-f` to request alternates or bundles:
+
+```bash
+firecrawl parse ./paper.pdf -f html -o paper.html        # cleaned HTML
+firecrawl parse ./page.html -f markdown,links,images \   # JSON bundle
+  --pretty -o page.json
+```
+
+- `markdown` (default), `html`, `rawHtml`, `summary` — work on every file type
+- `links`, `images` — work on HTML input; **return empty arrays for PDF/DOCX** (those formats don't carry link/image structure)
+- Multiple formats → JSON output keyed by format name
+- For structured/schema-based extraction, use `firecrawl agent` instead
 
 ## Tips
 
-- **Scrape vs parse**: `scrape` takes a URL, `parse` takes a local file path. A remote PDF URL can still go through `scrape`.
-- **Single vs multi format**: one `--format` value returns raw content; multiple return JSON with keys for each format.
+- **Scrape vs parse**: `scrape` takes a URL, `parse` takes a local file path.
 - **Quote paths with spaces**: `firecrawl parse "./My Doc.pdf"`.
-- **PDFs may return empty `links`/`images`** — PDF structure doesn't always carry link/image metadata like HTML does. That's expected, not a failure.
-- **Large docs**: parse time scales with file size. A ~50-page PDF takes ~10s. Use `--timing` to check.
-- **Query vs save-and-grep**: `-Q` is convenient for single questions. For deeper analysis, save to file first, then `grep`/read the markdown.
+- **Credits scale with PDF pages**: ~1 credit per page. HTML is typically 1 credit flat.
+- **Parse time**: ~10s for a 50-page PDF. Use `--timing` to measure.
+- **Query vs save-and-grep**: `-Q` is great for a single question. For deeper analysis, save to markdown first, then `grep` or read the file.
 
 ## See also
 
 - [firecrawl-scrape](../firecrawl-scrape/SKILL.md) — same idea but for URLs
+- [firecrawl-agent](../firecrawl-agent/SKILL.md) — structured data extraction with a schema
 - [firecrawl-download](../firecrawl-download/SKILL.md) — bulk save a site as local files (which you can then parse)

From 8985ddce01c44b9b30949004b2159260c829c76a Mon Sep 17 00:00:00 2001
From: Developers Digest <124798203+developersdigest@users.noreply.github.com>
Date: Wed, 22 Apr 2026 09:34:08 -0400
Subject: [PATCH 04/10] bias parse skill toward save-to-file, drop weak formats

default pattern: parse to .firecrawl/ then grep/read. removed links/images/rawhtml from skill - kept markdown/html/summary (the formats that work cleanly on all supported file types).
---
 skills/firecrawl-parse/SKILL.md | 59 +++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 17 deletions(-)

diff --git a/skills/firecrawl-parse/SKILL.md b/skills/firecrawl-parse/SKILL.md
index c3f78568d..c876e86b6 100644
--- a/skills/firecrawl-parse/SKILL.md
+++ b/skills/firecrawl-parse/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: firecrawl-parse
 description: |
-  Convert a local file (PDF, DOCX, DOC, ODT, RTF, XLSX, XLS, HTML) into clean markdown. Use this skill when the user points at a file on disk and wants its content — says "parse this PDF", "convert this Word doc", "read this file", "extract text from", "PDF to markdown", "DOCX to markdown", or provides a local path (not a URL). Also supports AI summary and Q&A. Use this instead of `scrape` for anything on the local filesystem.
+  Convert a local file (PDF, DOCX, DOC, ODT, RTF, XLSX, XLS, HTML) into clean markdown saved to disk. Use this skill when the user points at a file on disk and wants its content — says "parse this PDF", "convert this Word doc", "read this file", "extract text from", "PDF to markdown", "DOCX to markdown", or provides a local path (not a URL). Also supports AI summary and Q&A. Use this instead of `scrape` for anything on the local filesystem.
 allowed-tools:
   - Bash(firecrawl *)
   - Bash(npx firecrawl *)
@@ -9,7 +9,7 @@ allowed-tools:
 
 # firecrawl parse
 
-Turn any local document into clean markdown. Supported file types: **PDF, DOCX, DOC, ODT, RTF, XLSX, XLS, HTML**.
+Turn any local document into clean markdown on disk. Supported file types: **PDF, DOCX, DOC, ODT, RTF, XLSX, XLS, HTML**.
 
 ## When to use
 
@@ -18,17 +18,39 @@ Turn any local document into clean markdown. Supported file types: **PDF, DOCX,
 - You need a Word doc, spreadsheet, or PDF as markdown to feed into other tools
 - Use `scrape` instead when the source is a URL
 
+## Always save to a file
+
+**Default pattern**: parse to a file in `.firecrawl/`, then read or `grep` it. Don't stream full parsed content into the conversation — parsed docs can be hundreds of KB and blow up context windows.
+
+```bash
+mkdir -p .firecrawl
+
+# Always use -o. Name the output after the source file.
+firecrawl parse ./paper.pdf -o .firecrawl/paper.md
+```
+
+Add `.firecrawl/` to `.gitignore`.
+
+After parsing, work with the file incrementally:
+
+```bash
+wc -l .firecrawl/paper.md                # size check first
+head -50 .firecrawl/paper.md             # preview
+grep -n "conclusion" .firecrawl/paper.md # targeted lookup
+```
+
 ## Quick start
 
 ```bash
-# Any file → clean markdown
+# File → markdown on disk
 firecrawl parse ./paper.pdf -o .firecrawl/paper.md
 
-# AI summary
-firecrawl parse ./paper.pdf -S -o .firecrawl/summary.md
+# AI summary to its own file
+firecrawl parse ./paper.pdf -S -o .firecrawl/paper-summary.md
 
-# Ask a question about the doc
-firecrawl parse ./paper.pdf -Q "What are the main conclusions?"
+# Q&A — small answers are okay in stdout, but save if you might reuse
+firecrawl parse ./paper.pdf -Q "What are the main conclusions?" \
+  -o .firecrawl/paper-conclusions.md
 ```
 
 That covers almost every case. The rest below is for when you need more.
@@ -39,7 +61,7 @@ That covers almost every case. The rest below is for when you need more.
 | ---------------------- | ------------------------------------------------ |
 | `-S, --summary`        | AI-generated summary (shortcut for `-f summary`) |
 | `-Q, --query <prompt>` | Ask a question about the parsed content          |
-| `-o, --output <path>`  | Output file path (default: stdout)               |
+| `-o, --output <path>`  | Output file path — **always use this**           |
 | `--only-main-content`  | Strip boilerplate                                |
 | `--timing`             | Show request duration                            |
 
@@ -48,23 +70,26 @@ That covers almost every case. The rest below is for when you need more.
 Default output is markdown. Pass `-f` to request alternates or bundles:
 
 ```bash
-firecrawl parse ./paper.pdf -f html -o paper.html        # cleaned HTML
-firecrawl parse ./page.html -f markdown,links,images \   # JSON bundle
-  --pretty -o page.json
+# Cleaned HTML instead of markdown
+firecrawl parse ./paper.pdf -f html -o .firecrawl/paper.html
+
+# Markdown + summary together (JSON bundle)
+firecrawl parse ./paper.pdf -f markdown,summary --pretty \
+  -o .firecrawl/paper-bundle.json
 ```
 
-- `markdown` (default), `html`, `rawHtml`, `summary` — work on every file type
-- `links`, `images` — work on HTML input; **return empty arrays for PDF/DOCX** (those formats don't carry link/image structure)
-- Multiple formats → JSON output keyed by format name
-- For structured/schema-based extraction, use `firecrawl agent` instead
+Formats: `markdown` (default), `html`, `summary`. Multiple formats → JSON output keyed by format name.
+
+For structured/schema-based extraction, use `firecrawl agent` instead.
 
 ## Tips
 
 - **Scrape vs parse**: `scrape` takes a URL, `parse` takes a local file path.
-- **Quote paths with spaces**: `firecrawl parse "./My Doc.pdf"`.
+- **Quote paths with spaces**: `firecrawl parse "./My Doc.pdf" -o .firecrawl/mydoc.md`.
 - **Credits scale with PDF pages**: ~1 credit per page. HTML is typically 1 credit flat.
 - **Parse time**: ~10s for a 50-page PDF. Use `--timing` to measure.
-- **Query vs save-and-grep**: `-Q` is great for a single question. For deeper analysis, save to markdown first, then `grep` or read the file.
+- **Naming convention**: `.firecrawl/{source-basename}.md` — keeps outputs easy to find and re-use.
+- **Avoid redundant parses**: check `.firecrawl/` before re-parsing the same file.
 
 ## See also
 

From 0101fc2e686c72678241d53badfb874bafe598b8 Mon Sep 17 00:00:00 2001
From: Developers Digest <124798203+developersdigest@users.noreply.github.com>
Date: Wed, 22 Apr 2026 09:34:38 -0400
Subject: [PATCH 05/10] trim parse skill: less is more

cut to essentials. match density of firecrawl-map. dropped core-capability section, other-formats section, verbose tips.
---
 skills/firecrawl-parse/SKILL.md | 82 +++++++++------------------------
 1 file changed, 22 insertions(+), 60 deletions(-)

diff --git a/skills/firecrawl-parse/SKILL.md b/skills/firecrawl-parse/SKILL.md
index c876e86b6..fa5ec6fc3 100644
--- a/skills/firecrawl-parse/SKILL.md
+++ b/skills/firecrawl-parse/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: firecrawl-parse
 description: |
-  Convert a local file (PDF, DOCX, DOC, ODT, RTF, XLSX, XLS, HTML) into clean markdown saved to disk. Use this skill when the user points at a file on disk and wants its content — says "parse this PDF", "convert this Word doc", "read this file", "extract text from", "PDF to markdown", "DOCX to markdown", or provides a local path (not a URL). Also supports AI summary and Q&A. Use this instead of `scrape` for anything on the local filesystem.
+  Convert a local file (PDF, DOCX, DOC, ODT, RTF, XLSX, XLS, HTML) into clean markdown saved to disk. Use this skill when the user points at a file on disk and wants its content — says "parse this PDF", "convert this Word doc", "read this file", "extract text from", "PDF to markdown", or provides a local path (not a URL). Also supports AI summary and Q&A. Use this instead of `scrape` for local files.
 allowed-tools:
   - Bash(firecrawl *)
   - Bash(npx firecrawl *)
@@ -9,90 +9,52 @@ allowed-tools:
 
 # firecrawl parse
 
-Turn any local document into clean markdown on disk. Supported file types: **PDF, DOCX, DOC, ODT, RTF, XLSX, XLS, HTML**.
+Turn a local document into clean markdown on disk. Supports **PDF, DOCX, DOC, ODT, RTF, XLSX, XLS, HTML**.
 
 ## When to use
 
 - You have a file on disk (not a URL) and want its text as markdown
 - User drops a PDF/DOCX and asks what it says, or to summarize it
-- You need a Word doc, spreadsheet, or PDF as markdown to feed into other tools
 - Use `scrape` instead when the source is a URL
 
-## Always save to a file
+## Quick start
 
-**Default pattern**: parse to a file in `.firecrawl/`, then read or `grep` it. Don't stream full parsed content into the conversation — parsed docs can be hundreds of KB and blow up context windows.
+Always save to `.firecrawl/` with `-o` — parsed docs can be hundreds of KB and blow up context if streamed to stdout. Add `.firecrawl/` to `.gitignore`.
 
 ```bash
 mkdir -p .firecrawl
 
-# Always use -o. Name the output after the source file.
-firecrawl parse ./paper.pdf -o .firecrawl/paper.md
-```
-
-Add `.firecrawl/` to `.gitignore`.
-
-After parsing, work with the file incrementally:
-
-```bash
-wc -l .firecrawl/paper.md                # size check first
-head -50 .firecrawl/paper.md             # preview
-grep -n "conclusion" .firecrawl/paper.md # targeted lookup
-```
-
-## Quick start
-
-```bash
-# File → markdown on disk
+# File → markdown
 firecrawl parse ./paper.pdf -o .firecrawl/paper.md
 
-# AI summary to its own file
+# AI summary
 firecrawl parse ./paper.pdf -S -o .firecrawl/paper-summary.md
 
-# Q&A — small answers are okay in stdout, but save if you might reuse
+# Ask a question about the doc
 firecrawl parse ./paper.pdf -Q "What are the main conclusions?" \
-  -o .firecrawl/paper-conclusions.md
+  -o .firecrawl/paper-qa.md
 ```
 
-That covers almost every case. The rest below is for when you need more.
+Then `head`, `grep`, or incrementally read the file — don't load the whole thing at once.
 
 ## Options
 
-| Option                 | Description                                      |
-| ---------------------- | ------------------------------------------------ |
-| `-S, --summary`        | AI-generated summary (shortcut for `-f summary`) |
-| `-Q, --query <prompt>` | Ask a question about the parsed content          |
-| `-o, --output <path>`  | Output file path — **always use this**           |
-| `--only-main-content`  | Strip boilerplate                                |
-| `--timing`             | Show request duration                            |
-
-## Other formats
-
-Default output is markdown. Pass `-f` to request alternates or bundles:
-
-```bash
-# Cleaned HTML instead of markdown
-firecrawl parse ./paper.pdf -f html -o .firecrawl/paper.html
-
-# Markdown + summary together (JSON bundle)
-firecrawl parse ./paper.pdf -f markdown,summary --pretty \
-  -o .firecrawl/paper-bundle.json
-```
-
-Formats: `markdown` (default), `html`, `summary`. Multiple formats → JSON output keyed by format name.
-
-For structured/schema-based extraction, use `firecrawl agent` instead.
+| Option                 | Description                             |
+| ---------------------- | --------------------------------------- |
+| `-S, --summary`        | AI-generated summary                    |
+| `-Q, --query <prompt>` | Ask a question about the parsed content |
+| `-o, --output <path>`  | Output file path — **always use this**  |
+| `-f, --format <fmt>`   | `markdown` (default), `html`, `summary` |
+| `--only-main-content`  | Strip boilerplate                       |
+| `--timing`             | Show request duration                   |
 
 ## Tips
 
-- **Scrape vs parse**: `scrape` takes a URL, `parse` takes a local file path.
-- **Quote paths with spaces**: `firecrawl parse "./My Doc.pdf" -o .firecrawl/mydoc.md`.
-- **Credits scale with PDF pages**: ~1 credit per page. HTML is typically 1 credit flat.
-- **Parse time**: ~10s for a 50-page PDF. Use `--timing` to measure.
-- **Naming convention**: `.firecrawl/{source-basename}.md` — keeps outputs easy to find and re-use.
-- **Avoid redundant parses**: check `.firecrawl/` before re-parsing the same file.
+- Quote paths with spaces: `firecrawl parse "./My Doc.pdf" -o .firecrawl/mydoc.md`.
+- Credits: ~1 per PDF page; HTML is 1 flat.
+- Check `.firecrawl/` before re-parsing the same file.
 
 ## See also
 
-- [firecrawl-scrape](../firecrawl-scrape/SKILL.md) — same idea but for URLs
-- [firecrawl-agent](../firecrawl-agent/SKILL.md) — structured data extraction with a schema
-- [firecrawl-download](../firecrawl-download/SKILL.md) — bulk save a site as local files (which you can then parse)
+- [firecrawl-scrape](../firecrawl-scrape/SKILL.md) — same idea for URLs
+- [firecrawl-agent](../firecrawl-agent/SKILL.md) — structured extraction with a schema

From 33d6b3ba6493bfbfa2325b5c966452561cb1b876 Mon Sep 17 00:00:00 2001
From: Developers Digest <124798203+developersdigest@users.noreply.github.com>
Date: Wed, 22 Apr 2026 09:37:44 -0400
Subject: [PATCH 06/10] align parse skill with /v2/parse api ground truth

add .htm/.xhtml to supported list, add --timeout option. mirrors ParseFile/ParseOptions from firecrawl core (commit c7993d7).
---
 skills/firecrawl-parse/SKILL.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/skills/firecrawl-parse/SKILL.md b/skills/firecrawl-parse/SKILL.md
index fa5ec6fc3..6437782ff 100644
--- a/skills/firecrawl-parse/SKILL.md
+++ b/skills/firecrawl-parse/SKILL.md
@@ -9,7 +9,7 @@ allowed-tools:
 
 # firecrawl parse
 
-Turn a local document into clean markdown on disk. Supports **PDF, DOCX, DOC, ODT, RTF, XLSX, XLS, HTML**.
+Turn a local document into clean markdown on disk. Supports **PDF, DOCX, DOC, ODT, RTF, XLSX, XLS, HTML/HTM/XHTML**.
 
 ## When to use
 
@@ -46,6 +46,7 @@ Then `head`, `grep`, or incrementally read the file — don't load the whole thi
 | `-o, --output <path>`  | Output file path — **always use this**  |
 | `-f, --format <fmt>`   | `markdown` (default), `html`, `summary` |
 | `--only-main-content`  | Strip boilerplate                       |
+| `--timeout <ms>`       | Timeout for the parse job               |
 | `--timing`             | Show request duration                   |
 
 ## Tips

From 58df65673541d5a8f7c3b15334468d1a2403b402 Mon Sep 17 00:00:00 2001
From: Developers Digest <124798203+developersdigest@users.noreply.github.com>
Date: Wed, 22 Apr 2026 09:38:02 -0400
Subject: [PATCH 07/10] drop --only-main-content from parse skill

option exists on api but isn't meaningful for pdf/docx/xlsx - no nav/footer/sidebar chrome to strip on local documents.
---
 skills/firecrawl-parse/SKILL.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/skills/firecrawl-parse/SKILL.md b/skills/firecrawl-parse/SKILL.md
index 6437782ff..049cda24f 100644
--- a/skills/firecrawl-parse/SKILL.md
+++ b/skills/firecrawl-parse/SKILL.md
@@ -45,7 +45,6 @@ Then `head`, `grep`, or incrementally read the file — don't load the whole thi
 | `-Q, --query <prompt>` | Ask a question about the parsed content |
 | `-o, --output <path>`  | Output file path — **always use this**  |
 | `-f, --format <fmt>`   | `markdown` (default), `html`, `summary` |
-| `--only-main-content`  | Strip boilerplate                       |
 | `--timeout <ms>`       | Timeout for the parse job               |
 | `--timing`             | Show request duration                   |
 

From 1cbf76344d67b3d6ed2f6d52926758d44aeee51e Mon Sep 17 00:00:00 2001
From: Developers Digest <124798203+developersdigest@users.noreply.github.com>
Date: Wed, 22 Apr 2026 09:45:44 -0400
Subject: [PATCH 08/10] refine firecrawl-parse skill

---
 skills/firecrawl-parse/SKILL.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/skills/firecrawl-parse/SKILL.md b/skills/firecrawl-parse/SKILL.md
index 049cda24f..d6322b04f 100644
--- a/skills/firecrawl-parse/SKILL.md
+++ b/skills/firecrawl-parse/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: firecrawl-parse
 description: |
-  Convert a local file (PDF, DOCX, DOC, ODT, RTF, XLSX, XLS, HTML) into clean markdown saved to disk. Use this skill when the user points at a file on disk and wants its content — says "parse this PDF", "convert this Word doc", "read this file", "extract text from", "PDF to markdown", or provides a local path (not a URL). Also supports AI summary and Q&A. Use this instead of `scrape` for local files.
+  Efficiently extract and convert the contents of any local file—such as PDF, DOCX, DOC, ODT, RTF, XLSX, XLS, or HTML—into clean, well-formatted markdown saved to disk. Use this skill whenever the user requests to parse, read, or extract information from a file on their computer, including phrases like “parse this PDF”, “convert this document”, “read this file”, “extract text from”, or when a local file path (not a URL) is provided. This skill offers advanced options like generating AI-powered summaries and answering questions based on the file's content. Prefer this tool over `scrape` when handling local files to deliver precise, structured outputs for downstream tasks.
 allowed-tools:
   - Bash(firecrawl *)
   - Bash(npx firecrawl *)
@@ -35,7 +35,7 @@ firecrawl parse ./paper.pdf -Q "What are the main conclusions?" \
   -o .firecrawl/paper-qa.md
 ```
 
-Then `head`, `grep`, or incrementally read the file — don't load the whole thing at once.
+Then `head`, `grep`, `rg` etc., or incrementally read the file - don't load the whole thing at once.
 
 ## Options
 
@@ -53,8 +53,8 @@ Then `head`, `grep`, or incrementally read the file — don't load the whole thi
 - Quote paths with spaces: `firecrawl parse "./My Doc.pdf" -o .firecrawl/mydoc.md`.
 - Credits: ~1 per PDF page; HTML is 1 flat.
 - Check `.firecrawl/` before re-parsing the same file.
+- To check your credit balance (recommended for batch processing and similar workflows), use the `firecrawl credit-usage` command.
 
 ## See also
 
 - [firecrawl-scrape](../firecrawl-scrape/SKILL.md) — same idea for URLs
-- [firecrawl-agent](../firecrawl-agent/SKILL.md) — structured extraction with a schema

From 1f8add95eb7bcae210238ef52c11adfef69f6b52 Mon Sep 17 00:00:00 2001
From: Developers Digest <124798203+developersdigest@users.noreply.github.com>
Date: Wed, 22 Apr 2026 09:47:17 -0400
Subject: [PATCH 09/10] note 50 mb upload limit in parse skill

---
 skills/firecrawl-parse/SKILL.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/skills/firecrawl-parse/SKILL.md b/skills/firecrawl-parse/SKILL.md
index d6322b04f..f350cb2a9 100644
--- a/skills/firecrawl-parse/SKILL.md
+++ b/skills/firecrawl-parse/SKILL.md
@@ -51,6 +51,7 @@ Then `head`, `grep`, `rg` etc., or incrementally read the file - don't load the
 ## Tips
 
 - Quote paths with spaces: `firecrawl parse "./My Doc.pdf" -o .firecrawl/mydoc.md`.
+- Max upload size: **50 MB** per file.
 - Credits: ~1 per PDF page; HTML is 1 flat.
 - Check `.firecrawl/` before re-parsing the same file.
 - To check your credit balance (recommended for batch processing and similar workflows), use the `firecrawl credit-usage` command.

From aaca75fd1b76fee165f1b0c7df7506c8234d43fc Mon Sep 17 00:00:00 2001
From: Developers Digest <124798203+developersdigest@users.noreply.github.com>
Date: Wed, 22 Apr 2026 11:09:17 -0400
Subject: [PATCH 10/10] bump version to 1.16.0

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 27ab11144..fcc50c6a7 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "firecrawl-cli",
-  "version": "1.15.2",
+  "version": "1.16.0",
   "description": "Command-line interface for Firecrawl. Scrape, crawl, and extract data from any website directly from your terminal.",
   "main": "dist/index.js",
   "bin": {