Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 121 additions & 12 deletions internal/share/pdf.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ import (
"html"
"io"
"log/slog"
"net"
"net/http"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"time"
Expand Down Expand Up @@ -77,11 +81,21 @@ func (p *PDFSharer) Export(ctx context.Context, w io.Writer, req ExportRequest)
includeAssets := SettingBool(req.Config, "include_assets", true)
pageSize := SettingString(req.Config, "page_size", "A4")

// Detect mermaid blocks in any page and load mermaid JS if needed
hasMermaid := pagesHaveMermaid(req.Pages)
var mermaidJS []byte
if hasMermaid {
mermaidJS = findMermaidJS()
if mermaidJS == nil {
hasMermaid = false // degrade gracefully — render code blocks as-is
}
}

// Render pages to HTML
htmlDoc := renderHTMLDocument(req.Pages, req.Assets, ctx, includeTOC, includeAssets)
htmlDoc := renderHTMLDocument(req.Pages, req.Assets, ctx, includeTOC, includeAssets, hasMermaid)

// Convert to PDF via headless browser
pdfBytes, err := htmlToPDF(ctx, browserPath, htmlDoc, pageSize)
pdfBytes, err := htmlToPDF(ctx, browserPath, htmlDoc, pageSize, mermaidJS)
if err != nil {
return fmt.Errorf("PDF generation failed: %w", err)
}
Expand All @@ -91,13 +105,43 @@ func (p *PDFSharer) Export(ctx context.Context, w io.Writer, req ExportRequest)
}

// renderHTMLDocument builds a complete HTML document from the exported pages.
func renderHTMLDocument(pages []Page, assets AssetReader, ctx context.Context, includeTOC, includeAssets bool) string {
// If hasMermaid is true, includes a script tag that loads mermaid from /mermaid.min.js
// (served by the local HTTP server in htmlToPDF).
func renderHTMLDocument(pages []Page, assets AssetReader, ctx context.Context, includeTOC, includeAssets, hasMermaid bool) string {
var buf strings.Builder

buf.WriteString(`<!DOCTYPE html><html><head><meta charset="utf-8">`)
buf.WriteString(`<style>`)
buf.WriteString(pdfCSS)
buf.WriteString(`</style></head><body>`)
buf.WriteString(`</style>`)
if hasMermaid {
// Load mermaid from local server, then render code blocks to SVG
buf.WriteString(`<script src="/mermaid.min.js"></script>`)
buf.WriteString(`<script>`)
buf.WriteString(`mermaid.initialize({ startOnLoad: false, theme: 'default' });`)
buf.WriteString(`window.addEventListener('DOMContentLoaded', async function() {`)
buf.WriteString(` var nodes = document.querySelectorAll('code.language-mermaid');`)
buf.WriteString(` for (var i = 0; i < nodes.length; i++) {`)
buf.WriteString(` var pre = nodes[i].parentElement;`)
buf.WriteString(` var container = document.createElement('div');`)
buf.WriteString(` container.className = 'mermaid';`)
buf.WriteString(` container.textContent = nodes[i].textContent;`)
buf.WriteString(` pre.replaceWith(container);`)
buf.WriteString(` }`)
buf.WriteString(` var mermaidNodes = document.querySelectorAll('.mermaid');`)
buf.WriteString(` if (mermaidNodes.length > 0) {`)
buf.WriteString(` await mermaid.run({ nodes: mermaidNodes });`)
buf.WriteString(` }`)
buf.WriteString(` document.body.setAttribute('data-mermaid-done', 'true');`)
buf.WriteString(`});`)
buf.WriteString(`</script>`)
}
buf.WriteString(`</head><body>`)

// If no mermaid, immediately mark done for the wait loop
if !hasMermaid {
buf.WriteString(`<script>document.body.setAttribute('data-mermaid-done','true');</script>`)
}

// Table of contents
if includeTOC && len(pages) > 1 {
Expand Down Expand Up @@ -172,8 +216,74 @@ func embedImages(body string, imageRefs []string, assets AssetReader, ctx contex
return body
}

// pagesHaveMermaid returns true if any page body contains a mermaid fenced code block.
func pagesHaveMermaid(pages []Page) bool {
for _, p := range pages {
if strings.Contains(p.Body, "```mermaid") {
return true
}
}
return false
}

// findMermaidJS locates and reads the mermaid.min.js bundle.
// It checks common locations relative to the working directory and executable.
func findMermaidJS() []byte {
candidates := []string{
// Development: relative to project root (cwd)
"webui/node_modules/mermaid/dist/mermaid.min.js",
// Two levels up from internal/share/ (tests run from package dir)
"../../webui/node_modules/mermaid/dist/mermaid.min.js",
}

for _, candidate := range candidates {
data, err := os.ReadFile(candidate)
if err == nil {
return data
}
}

// Try relative to the executable (production: mermaid.min.js next to binary)
if exePath, err := os.Executable(); err == nil {
dir := filepath.Dir(exePath)
for _, rel := range []string{
filepath.Join(dir, "mermaid.min.js"),
filepath.Join(dir, "webui", "node_modules", "mermaid", "dist", "mermaid.min.js"),
} {
if data, err := os.ReadFile(rel); err == nil {
return data
}
}
}

return nil
}

// htmlToPDF uses chromedp to render HTML to PDF.
func htmlToPDF(ctx context.Context, browserPath, htmlContent, pageSize string) ([]byte, error) {
func htmlToPDF(ctx context.Context, browserPath, htmlContent, pageSize string, mermaidJS []byte) ([]byte, error) {
// Start a local HTTP server to serve the HTML content.
// This gives the page a proper origin so scripts and local resources work.
mux := http.NewServeMux()
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html; charset=utf-8")
w.Write([]byte(htmlContent))
})
if mermaidJS != nil {
mux.HandleFunc("/mermaid.min.js", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/javascript")
w.Write(mermaidJS)
})
}
listener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
return nil, fmt.Errorf("failed to start local server: %w", err)
}
srv := &http.Server{Handler: mux}
go srv.Serve(listener)
defer srv.Close()
defer listener.Close()
pageURL := fmt.Sprintf("http://127.0.0.1:%d/", listener.Addr().(*net.TCPAddr).Port)

// Create a context with the browser path
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.ExecPath(browserPath),
Expand All @@ -193,14 +303,13 @@ func htmlToPDF(ctx context.Context, browserPath, htmlContent, pageSize string) (

// Navigate to the HTML content and print to PDF
var pdfBuf []byte
err := chromedp.Run(taskCtx,
chromedp.Navigate("about:blank"),
err = chromedp.Run(taskCtx,
chromedp.Navigate(pageURL),
// Wait for Mermaid diagrams to finish rendering.
// The mermaid init script sets data-mermaid-done="true" on <body>
// after all diagrams render (or on error). Poll until it appears.
chromedp.ActionFunc(func(ctx context.Context) error {
frameTree, err := page.GetFrameTree().Do(ctx)
if err != nil {
return err
}
return page.SetDocumentContent(frameTree.Frame.ID, htmlContent).Do(ctx)
return chromedp.Poll(`document.body && document.body.getAttribute('data-mermaid-done') === 'true'`, nil, chromedp.WithPollingInterval(100*time.Millisecond)).Do(ctx)
}),
chromedp.ActionFunc(func(ctx context.Context) error {
paperWidth, paperHeight := paperDimensions(pageSize)
Expand Down
178 changes: 177 additions & 1 deletion internal/share/pdf_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
package share

import "testing"
import (
"bytes"
"context"
"fmt"
"net"
"net/http"
"strings"
"testing"
"time"

"github.com/chromedp/chromedp"
)

func TestPDFRegistered(t *testing.T) {
if findBrowser() == "" {
Expand All @@ -14,3 +25,168 @@ func TestPDFRegistered(t *testing.T) {
t.Errorf("expected name 'pdf', got %q", s.Name())
}
}

func TestPDFMermaidRendering(t *testing.T) {
if findBrowser() == "" {
t.Skip("no Chromium-based browser on $PATH — PDF sharer won't register")
}
if findMermaidJS() == nil {
t.Skip("mermaid.min.js not found — cannot test mermaid rendering")
}

sharer := &PDFSharer{}

// Page with a mermaid diagram
mermaidPage := Page{
Path: "test/mermaid-page",
Title: "Mermaid Test",
Body: `# Test Page

Here is a diagram:

` + "```mermaid\ngraph TD\n A[Start] --> B[Process]\n B --> C[End]\n```" + `

And some text after.
`,
}

ctx := context.Background()

// Export the mermaid page to PDF
var mermaidBuf bytes.Buffer
err := sharer.Export(ctx, &mermaidBuf, ExportRequest{
Pages: []Page{mermaidPage},
Assets: nil,
Config: ShareConfig{Page: "test/mermaid-page", Depth: 0},
})
if err != nil {
t.Fatalf("PDF export with mermaid failed: %v", err)
}

// Verify it's a valid PDF
if !bytes.HasPrefix(mermaidBuf.Bytes(), []byte("%PDF")) {
t.Fatal("mermaid PDF output doesn't start with %PDF header")
}
t.Logf("mermaid PDF size: %d bytes", mermaidBuf.Len())

// Verify mermaid was rendered by checking the HTML intermediate output.
// Use the htmlToPDF local server approach to get the rendered HTML.
browserPath := findBrowser()
mermaidJS := findMermaidJS()
htmlDoc := renderHTMLDocument([]Page{mermaidPage}, nil, ctx, false, false, true)

// Render in browser and capture the resulting HTML to verify SVG
mux := http.NewServeMux()
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html; charset=utf-8")
w.Write([]byte(htmlDoc))
})
mux.HandleFunc("/mermaid.min.js", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/javascript")
w.Write(mermaidJS)
})
listener, listenErr := net.Listen("tcp", "127.0.0.1:0")
if listenErr != nil {
t.Fatalf("failed to listen: %v", listenErr)
}
srv := &http.Server{Handler: mux}
go srv.Serve(listener)
defer srv.Close()
defer listener.Close()
pageURL := fmt.Sprintf("http://127.0.0.1:%d/", listener.Addr().(*net.TCPAddr).Port)

opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.ExecPath(browserPath),
chromedp.Flag("no-sandbox", true),
chromedp.Flag("disable-gpu", true),
)
allocCtx, allocCancel := chromedp.NewExecAllocator(ctx, opts...)
defer allocCancel()
taskCtx, taskCancel := chromedp.NewContext(allocCtx)
defer taskCancel()
taskCtx, timeoutCancel := context.WithTimeout(taskCtx, 30*time.Second)
defer timeoutCancel()

var bodyHTML string
err = chromedp.Run(taskCtx,
chromedp.Navigate(pageURL),
chromedp.ActionFunc(func(ctx context.Context) error {
return chromedp.Poll(`document.body && document.body.getAttribute('data-mermaid-done') === 'true'`, nil, chromedp.WithPollingInterval(100*time.Millisecond)).Do(ctx)
}),
chromedp.OuterHTML("body", &bodyHTML),
)
if err != nil {
t.Fatalf("chromedp failed: %v", err)
}

// The rendered HTML should contain SVG elements from mermaid
if !strings.Contains(bodyHTML, "<svg") {
t.Error("rendered HTML does not contain SVG — mermaid diagram was not rendered")
}
if !strings.Contains(bodyHTML, "flowchart") || !strings.Contains(bodyHTML, "mermaid") {
t.Error("rendered HTML does not contain expected mermaid/flowchart class")
}
// The original code block should be GONE (replaced by mermaid div)
if strings.Contains(bodyHTML, `class="language-mermaid"`) {
t.Error("original code.language-mermaid element still present — mermaid didn't replace it")
}
t.Logf("rendered body HTML length: %d bytes, contains SVG: true", len(bodyHTML))
}

func TestPDFRenderHTMLContainsMermaidScript(t *testing.T) {
// Verify that renderHTMLDocument includes the Mermaid initialization
// script when pages contain mermaid code blocks.
pages := []Page{
{
Path: "test/page",
Title: "Test",
Body: "# Hello\n\n```mermaid\ngraph TD\n A --> B\n```\n",
},
}

html := renderHTMLDocument(pages, nil, context.Background(), false, false, true)

// Should contain mermaid.min.js script reference (local server)
if !strings.Contains(html, "/mermaid.min.js") {
t.Error("HTML does not contain /mermaid.min.js script reference")
}

// Should contain the data-mermaid-done signal
if !strings.Contains(html, "data-mermaid-done") {
t.Error("HTML does not contain data-mermaid-done signal")
}

// Goldmark should have rendered the mermaid block as a code element
if !strings.Contains(html, `class="language-mermaid"`) {
t.Error("HTML does not contain language-mermaid code block (goldmark output)")
}

// Should contain the graph definition text
if !strings.Contains(html, "A --&gt; B") || !strings.Contains(html, "graph TD") {
// goldmark may or may not HTML-escape inside code blocks
if !strings.Contains(html, "A --> B") && !strings.Contains(html, "A --&gt; B") {
t.Error("HTML does not contain the mermaid graph definition")
}
}
}

func TestPDFRenderHTMLNoMermaid(t *testing.T) {
// When hasMermaid is false, no mermaid script should be included
pages := []Page{
{
Path: "test/page",
Title: "Test",
Body: "# Hello\n\nJust text.\n",
},
}

html := renderHTMLDocument(pages, nil, context.Background(), false, false, false)

if strings.Contains(html, "/mermaid.min.js") {
t.Error("HTML should not contain mermaid script when hasMermaid is false")
}
// Should still have the done signal for the wait loop
if !strings.Contains(html, "data-mermaid-done") {
t.Error("HTML should still contain data-mermaid-done signal for consistency")
}
}