From 1d3b130afb712ae14569f0ff29a25fc9dcd358b7 Mon Sep 17 00:00:00 2001
From: default <nobita2041@gmail.com>
Date: Tue, 20 Jan 2026 20:25:25 +0900
Subject: [PATCH 1/2] Fix array format output for links and images

The links and images formats return arrays from Firecrawl API,
but writeOutput expected a string, causing endsWith() to fail.

Now converts arrays to newline-separated strings for CLI-friendly output.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/utils/output.ts | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/utils/output.ts b/src/utils/output.ts
index d913f8d17..1373d97dd 100644
--- a/src/utils/output.ts
+++ b/src/utils/output.ts
@@ -25,14 +25,22 @@ function extractContent(data: any, format?: ScrapeFormat): string | null {
       return data.markdown || data[format] || null;
     }
 
-    // Handle links format
+    // Handle links format (array of URLs -> newline-separated string)
     if (format === 'links') {
-      return data.links || data[format] || null;
+      const links = data.links || data[format];
+      if (Array.isArray(links)) {
+        return links.join('\n');
+      }
+      return links || null;
     }
 
-    // Handle images format
+    // Handle images format (array of URLs -> newline-separated string)
     if (format === 'images') {
-      return data.images || data[format] || null;
+      const images = data.images || data[format];
+      if (Array.isArray(images)) {
+        return images.join('\n');
+      }
+      return images || null;
     }
 
     // Handle summary format

From 35941324261f386fd4821ad1b738084a17df2533 Mon Sep 17 00:00:00 2001
From: default <nobita2041@gmail.com>
Date: Tue, 20 Jan 2026 20:52:54 +0900
Subject: [PATCH 2/2] Add multiple format support for scrape command

- Allow comma-separated formats: --format markdown,links,images
- Single format outputs raw content (backward compatible)
- Multiple formats output JSON with all requested data
- Case-insensitive format input (rawHtml, RAWHTML both work)
- Add format validation with helpful error messages
- Update help text to clarify multiple format usage

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/__tests__/commands/scrape.test.ts |  30 ++++-
 src/commands/scrape.ts                |  33 ++++--
 src/index.ts                          |   4 +-
 src/types/scrape.ts                   |   4 +-
 src/utils/options.ts                  |  68 ++++++++++-
 src/utils/output.ts                   | 157 ++++++++++++++++----------
 6 files changed, 213 insertions(+), 83 deletions(-)

diff --git a/src/__tests__/commands/scrape.test.ts b/src/__tests__/commands/scrape.test.ts
index e054f1d9b..15b284f08 100644
--- a/src/__tests__/commands/scrape.test.ts
+++ b/src/__tests__/commands/scrape.test.ts
@@ -63,7 +63,7 @@ describe('executeScrape', () => {
 
       await executeScrape({
         url: 'https://example.com',
-        format: 'html',
+        formats: ['html'],
       });
 
       expect(mockClient.scrape).toHaveBeenCalledWith('https://example.com', {
@@ -97,7 +97,7 @@ describe('executeScrape', () => {
 
       await executeScrape({
         url: 'https://example.com',
-        format: 'markdown',
+        formats: ['markdown'],
         screenshot: true,
       });
 
@@ -172,7 +172,7 @@ describe('executeScrape', () => {
 
       await executeScrape({
         url: 'https://example.com',
-        format: 'markdown',
+        formats: ['markdown'],
         screenshot: true,
         onlyMainContent: true,
         waitFor: 3000,
@@ -256,21 +256,39 @@ describe('executeScrape', () => {
 
   describe('Type safety', () => {
     it('should accept valid ScrapeFormat types', async () => {
-      const formats: Array<'markdown' | 'html' | 'rawHtml' | 'links'> = [
+      const formatList: Array<'markdown' | 'html' | 'rawHtml' | 'links'> = [
         'markdown',
         'html',
         'rawHtml',
         'links',
       ];
 
-      for (const format of formats) {
+      for (const format of formatList) {
         mockClient.scrape.mockResolvedValue({ [format]: 'test' });
         const result = await executeScrape({
           url: 'https://example.com',
-          format,
+          formats: [format],
         });
         expect(result.success).toBe(true);
       }
     });
+
+    it('should accept multiple formats', async () => {
+      mockClient.scrape.mockResolvedValue({
+        markdown: '# Test',
+        links: ['http://a.com'],
+        images: ['http://img.com/a.png'],
+      });
+
+      const result = await executeScrape({
+        url: 'https://example.com',
+        formats: ['markdown', 'links', 'images'],
+      });
+
+      expect(result.success).toBe(true);
+      expect(mockClient.scrape).toHaveBeenCalledWith('https://example.com', {
+        formats: ['markdown', 'links', 'images'],
+      });
+    });
   });
 });
diff --git a/src/commands/scrape.ts b/src/commands/scrape.ts
index dcf95e2c8..cdd5d679a 100644
--- a/src/commands/scrape.ts
+++ b/src/commands/scrape.ts
@@ -3,7 +3,11 @@
  */
 
 import type { FormatOption } from '@mendable/firecrawl-js';
-import type { ScrapeOptions, ScrapeResult } from '../types/scrape';
+import type {
+  ScrapeOptions,
+  ScrapeResult,
+  ScrapeFormat,
+} from '../types/scrape';
 import { getClient } from '../utils/client';
 import { handleScrapeOutput } from '../utils/output';
 
@@ -51,15 +55,14 @@ export async function executeScrape(
   // Build scrape options
   const formats: FormatOption[] = [];
 
-  if (options.format) {
-    formats.push(options.format);
+  // Add requested formats
+  if (options.formats && options.formats.length > 0) {
+    formats.push(...options.formats);
   }
 
-  if (options.screenshot) {
-    // Add screenshot format if not already included
-    if (!formats.includes('screenshot')) {
-      formats.push('screenshot');
-    }
+  // Add screenshot format if requested and not already included
+  if (options.screenshot && !formats.includes('screenshot')) {
+    formats.push('screenshot');
   }
 
   // If no formats specified, default to markdown
@@ -123,5 +126,17 @@ export async function handleScrapeCommand(
   options: ScrapeOptions
 ): Promise<void> {
   const result = await executeScrape(options);
-  handleScrapeOutput(result, options.format, options.output, options.pretty);
+
+  // Determine effective formats for output handling
+  const effectiveFormats: ScrapeFormat[] =
+    options.formats && options.formats.length > 0
+      ? [...options.formats]
+      : ['markdown'];
+
+  // Add screenshot to effective formats if it was requested separately
+  if (options.screenshot && !effectiveFormats.includes('screenshot')) {
+    effectiveFormats.push('screenshot');
+  }
+
+  handleScrapeOutput(result, effectiveFormats, options.output, options.pretty);
 }
diff --git a/src/index.ts b/src/index.ts
index fc00ffab8..e41b72b88 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -54,8 +54,8 @@ function createScrapeCommand(): Command {
     )
     .option('-H, --html', 'Output raw HTML (shortcut for --format html)')
     .option(
-      '-f, --format <format>',
-      'Output format: markdown, html, rawHtml, links, images, screenshot, summary, changeTracking, json, attributes, branding',
+      '-f, --format <formats>',
+      'Output format(s). Multiple formats can be specified with commas (e.g., "markdown,links,images"). Available: markdown, html, rawHtml, links, images, screenshot, summary, changeTracking, json, attributes, branding. Single format outputs raw content; multiple formats output JSON.',
       'markdown'
     )
     .option('--only-main-content', 'Include only main content', false)
diff --git a/src/types/scrape.ts b/src/types/scrape.ts
index dca3a18ec..2adbaf36f 100644
--- a/src/types/scrape.ts
+++ b/src/types/scrape.ts
@@ -18,8 +18,8 @@ export type ScrapeFormat =
 export interface ScrapeOptions {
   /** URL to scrape */
   url: string;
-  /** Output format (markdown, html, etc.) */
-  format?: ScrapeFormat;
+  /** Output format(s) - single format or array of formats */
+  formats?: ScrapeFormat[];
   /** Include only main content */
   onlyMainContent?: boolean;
   /** Wait time before scraping (ms) */
diff --git a/src/utils/options.ts b/src/utils/options.ts
index dfd1f94df..67b0166c6 100644
--- a/src/utils/options.ts
+++ b/src/utils/options.ts
@@ -2,15 +2,79 @@
  * Option parsing utilities
  */
 
-import type { ScrapeOptions } from '../types/scrape';
+import type { ScrapeOptions, ScrapeFormat } from '../types/scrape';
+
+/**
+ * Valid scrape format values
+ */
+const VALID_FORMATS: ScrapeFormat[] = [
+  'markdown',
+  'html',
+  'rawHtml',
+  'links',
+  'images',
+  'screenshot',
+  'summary',
+  'changeTracking',
+  'json',
+  'attributes',
+  'branding',
+];
+
+/**
+ * Map from lowercase to correct camelCase format
+ */
+const FORMAT_MAP: Record<string, ScrapeFormat> = Object.fromEntries(
+  VALID_FORMATS.map((f) => [f.toLowerCase(), f])
+) as Record<string, ScrapeFormat>;
+
+/**
+ * Parse format string into array of ScrapeFormat
+ * Handles comma-separated values: "markdown,links,images"
+ * Case-insensitive input, returns correct camelCase for API
+ */
+export function parseFormats(formatString: string): ScrapeFormat[] {
+  const inputFormats = formatString
+    .split(',')
+    .map((f) => f.trim().toLowerCase())
+    .filter((f) => f.length > 0);
+
+  // Validate and map to correct casing
+  const invalidFormats: string[] = [];
+  const validFormats: ScrapeFormat[] = [];
+
+  for (const input of inputFormats) {
+    const mapped = FORMAT_MAP[input];
+    if (mapped) {
+      validFormats.push(mapped);
+    } else {
+      invalidFormats.push(input);
+    }
+  }
+
+  if (invalidFormats.length > 0) {
+    throw new Error(
+      `Invalid format(s): ${invalidFormats.join(', ')}. Valid formats are: ${VALID_FORMATS.join(', ')}`
+    );
+  }
+
+  // Remove duplicates while preserving order
+  return [...new Set(validFormats)];
+}
 
 /**
  * Convert commander options to ScrapeOptions
  */
 export function parseScrapeOptions(options: any): ScrapeOptions {
+  // Parse formats from comma-separated string
+  let formats: ScrapeFormat[] | undefined;
+  if (options.format) {
+    formats = parseFormats(options.format);
+  }
+
   return {
     url: options.url,
-    format: options.format,
+    formats,
     onlyMainContent: options.onlyMainContent,
     waitFor: options.waitFor,
     screenshot: options.screenshot,
diff --git a/src/utils/output.ts b/src/utils/output.ts
index 1373d97dd..cb822fe3f 100644
--- a/src/utils/output.ts
+++ b/src/utils/output.ts
@@ -4,62 +4,91 @@
 
 import * as fs from 'fs';
 import * as path from 'path';
-import type { ScrapeResult } from '../types/scrape';
-import type { ScrapeFormat } from '../types/scrape';
+import type { ScrapeResult, ScrapeFormat } from '../types/scrape';
+
+/**
+ * Text formats that can be output as raw content (curl-like)
+ */
+const RAW_TEXT_FORMATS: ScrapeFormat[] = [
+  'html',
+  'rawHtml',
+  'markdown',
+  'links',
+  'images',
+  'summary',
+];
 
 /**
  * Extract content from Firecrawl Document based on format
  */
-function extractContent(data: any, format?: ScrapeFormat): string | null {
+function extractContent(data: any, format: ScrapeFormat): string | null {
   if (!data) return null;
 
-  // If format is specified, try to extract that specific content
-  if (format) {
-    // Handle html/rawHtml formats - extract HTML content directly
-    if (format === 'html' || format === 'rawHtml') {
-      return data.html || data.rawHtml || data[format] || null;
-    }
+  // Handle html/rawHtml formats - extract HTML content directly
+  if (format === 'html' || format === 'rawHtml') {
+    return data.html || data.rawHtml || data[format] || null;
+  }
 
-    // Handle markdown format
-    if (format === 'markdown') {
-      return data.markdown || data[format] || null;
-    }
+  // Handle markdown format
+  if (format === 'markdown') {
+    return data.markdown || data[format] || null;
+  }
 
-    // Handle links format (array of URLs -> newline-separated string)
-    if (format === 'links') {
-      const links = data.links || data[format];
-      if (Array.isArray(links)) {
-        return links.join('\n');
-      }
-      return links || null;
+  // Handle links format (array of URLs -> newline-separated string)
+  if (format === 'links') {
+    const links = data.links || data[format];
+    if (Array.isArray(links)) {
+      return links.join('\n');
     }
+    return links || null;
+  }
 
-    // Handle images format (array of URLs -> newline-separated string)
-    if (format === 'images') {
-      const images = data.images || data[format];
-      if (Array.isArray(images)) {
-        return images.join('\n');
-      }
-      return images || null;
+  // Handle images format (array of URLs -> newline-separated string)
+  if (format === 'images') {
+    const images = data.images || data[format];
+    if (Array.isArray(images)) {
+      return images.join('\n');
     }
+    return images || null;
+  }
 
-    // Handle summary format
-    if (format === 'summary') {
-      return data.summary || data[format] || null;
-    }
+  // Handle summary format
+  if (format === 'summary') {
+    return data.summary || data[format] || null;
   }
 
-  // Fallback: try common content fields
-  if (typeof data === 'string') {
-    return data;
+  return null;
+}
+
+/**
+ * Extract multiple format contents from response data
+ */
+function extractMultipleFormats(
+  data: any,
+  formats: ScrapeFormat[]
+): Record<string, any> {
+  const result: Record<string, any> = {};
+
+  for (const format of formats) {
+    const key = format;
+
+    if (data[key] !== undefined) {
+      result[key] = data[key];
+    } else if (format === 'html' && data.rawHtml !== undefined) {
+      // Fallback for html -> rawHtml
+      result[key] = data.rawHtml;
+    } else if (format === 'rawHtml' && data.html !== undefined) {
+      // Fallback for rawHtml -> html
+      result[key] = data.html;
+    }
   }
 
-  // If it's an object, try to find string content
-  if (typeof data === 'object') {
-    return data.html || data.markdown || data.rawHtml || data.content || null;
+  // Always include metadata if present
+  if (data.metadata) {
+    result.metadata = data.metadata;
   }
 
-  return null;
+  return result;
 }
 
 /**
@@ -92,12 +121,15 @@ export function writeOutput(
 
 /**
  * Handle scrape result output
- * For text formats (html, markdown, etc.), outputs raw content directly
- * For complex formats, outputs JSON
+ *
+ * Output behavior:
+ * - Single text format (html, markdown, links, images, summary, rawHtml): raw content
+ * - Single complex format (screenshot, json, branding, etc.): JSON output
+ * - Multiple formats: JSON with all requested data
  */
 export function handleScrapeOutput(
   result: ScrapeResult,
-  format?: ScrapeFormat,
+  formats: ScrapeFormat[],
   outputPath?: string,
   pretty: boolean = false
 ): void {
@@ -111,42 +143,43 @@ export function handleScrapeOutput(
     return;
   }
 
-  // Text formats that should output raw content (curl-like)
-  const rawTextFormats: ScrapeFormat[] = [
-    'html',
-    'rawHtml',
-    'markdown',
-    'links',
-    'images',
-    'summary',
-  ];
-  const shouldOutputRaw = format && rawTextFormats.includes(format);
-
-  if (shouldOutputRaw) {
-    // Extract and output raw content
-    const content = extractContent(result.data, format);
+  // Determine output mode based on number of formats
+  const isSingleFormat = formats.length === 1;
+  const singleFormat = isSingleFormat ? formats[0] : null;
+  const isRawTextFormat =
+    singleFormat && RAW_TEXT_FORMATS.includes(singleFormat);
+
+  // Single raw text format: output raw content (current behavior)
+  if (isSingleFormat && isRawTextFormat && singleFormat) {
+    const content = extractContent(result.data, singleFormat);
     if (content !== null) {
       writeOutput(content, outputPath, !!outputPath);
       return;
     }
   }
 
-  // For JSON format or complex formats (branding, json, etc.), output clean JSON
-  // Always stringify the entire data object to ensure valid JSON
+  // Multiple formats or complex format: output JSON
+  let outputData: any;
+
+  if (isSingleFormat) {
+    // Single complex format - output entire data object
+    outputData = result.data;
+  } else {
+    // Multiple formats - extract only requested formats
+    outputData = extractMultipleFormats(result.data, formats);
+  }
+
   let jsonContent: string;
   try {
     jsonContent = pretty
-      ? JSON.stringify(result.data, null, 2)
-      : JSON.stringify(result.data);
+      ? JSON.stringify(outputData, null, 2)
+      : JSON.stringify(outputData);
   } catch (error) {
-    // If stringification fails, try to create a minimal error response
     jsonContent = JSON.stringify({
       error: 'Failed to serialize response',
       message: error instanceof Error ? error.message : 'Unknown error',
     });
   }
 
-  // Ensure clean JSON output (no extra newlines or text before JSON)
-  // Output directly to stdout without any prefix
   writeOutput(jsonContent, outputPath, !!outputPath);
 }