diff --git a/internal/share/pdf.go b/internal/share/pdf.go index 7f87487..600ae19 100644 --- a/internal/share/pdf.go +++ b/internal/share/pdf.go @@ -8,7 +8,11 @@ import ( "html" "io" "log/slog" + "net" + "net/http" + "os" "os/exec" + "path/filepath" "runtime" "strings" "time" @@ -77,11 +81,21 @@ func (p *PDFSharer) Export(ctx context.Context, w io.Writer, req ExportRequest) includeAssets := SettingBool(req.Config, "include_assets", true) pageSize := SettingString(req.Config, "page_size", "A4") + // Detect mermaid blocks in any page and load mermaid JS if needed + hasMermaid := pagesHaveMermaid(req.Pages) + var mermaidJS []byte + if hasMermaid { + mermaidJS = findMermaidJS() + if mermaidJS == nil { + hasMermaid = false // degrade gracefully — render code blocks as-is + } + } + // Render pages to HTML - htmlDoc := renderHTMLDocument(req.Pages, req.Assets, ctx, includeTOC, includeAssets) + htmlDoc := renderHTMLDocument(req.Pages, req.Assets, ctx, includeTOC, includeAssets, hasMermaid) // Convert to PDF via headless browser - pdfBytes, err := htmlToPDF(ctx, browserPath, htmlDoc, pageSize) + pdfBytes, err := htmlToPDF(ctx, browserPath, htmlDoc, pageSize, mermaidJS) if err != nil { return fmt.Errorf("PDF generation failed: %w", err) } @@ -91,13 +105,43 @@ func (p *PDFSharer) Export(ctx context.Context, w io.Writer, req ExportRequest) } // renderHTMLDocument builds a complete HTML document from the exported pages. -func renderHTMLDocument(pages []Page, assets AssetReader, ctx context.Context, includeTOC, includeAssets bool) string { +// If hasMermaid is true, includes a script tag that loads mermaid from /mermaid.min.js +// (served by the local HTTP server in htmlToPDF). +func renderHTMLDocument(pages []Page, assets AssetReader, ctx context.Context, includeTOC, includeAssets, hasMermaid bool) string { var buf strings.Builder buf.WriteString(``) buf.WriteString(``) + buf.WriteString(``) + if hasMermaid { + // Load mermaid from local server, then render code blocks to SVG + buf.WriteString(``) + buf.WriteString(``) + } + buf.WriteString(``) + + // If no mermaid, immediately mark done for the wait loop + if !hasMermaid { + buf.WriteString(``) + } // Table of contents if includeTOC && len(pages) > 1 { @@ -172,8 +216,74 @@ func embedImages(body string, imageRefs []string, assets AssetReader, ctx contex return body } +// pagesHaveMermaid returns true if any page body contains a mermaid fenced code block. +func pagesHaveMermaid(pages []Page) bool { + for _, p := range pages { + if strings.Contains(p.Body, "```mermaid") { + return true + } + } + return false +} + +// findMermaidJS locates and reads the mermaid.min.js bundle. +// It checks common locations relative to the working directory and executable. +func findMermaidJS() []byte { + candidates := []string{ + // Development: relative to project root (cwd) + "webui/node_modules/mermaid/dist/mermaid.min.js", + // Two levels up from internal/share/ (tests run from package dir) + "../../webui/node_modules/mermaid/dist/mermaid.min.js", + } + + for _, candidate := range candidates { + data, err := os.ReadFile(candidate) + if err == nil { + return data + } + } + + // Try relative to the executable (production: mermaid.min.js next to binary) + if exePath, err := os.Executable(); err == nil { + dir := filepath.Dir(exePath) + for _, rel := range []string{ + filepath.Join(dir, "mermaid.min.js"), + filepath.Join(dir, "webui", "node_modules", "mermaid", "dist", "mermaid.min.js"), + } { + if data, err := os.ReadFile(rel); err == nil { + return data + } + } + } + + return nil +} + // htmlToPDF uses chromedp to render HTML to PDF. -func htmlToPDF(ctx context.Context, browserPath, htmlContent, pageSize string) ([]byte, error) { +func htmlToPDF(ctx context.Context, browserPath, htmlContent, pageSize string, mermaidJS []byte) ([]byte, error) { + // Start a local HTTP server to serve the HTML content. + // This gives the page a proper origin so scripts and local resources work. + mux := http.NewServeMux() + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html; charset=utf-8") + w.Write([]byte(htmlContent)) + }) + if mermaidJS != nil { + mux.HandleFunc("/mermaid.min.js", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/javascript") + w.Write(mermaidJS) + }) + } + listener, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + return nil, fmt.Errorf("failed to start local server: %w", err) + } + srv := &http.Server{Handler: mux} + go srv.Serve(listener) + defer srv.Close() + defer listener.Close() + pageURL := fmt.Sprintf("http://127.0.0.1:%d/", listener.Addr().(*net.TCPAddr).Port) + // Create a context with the browser path opts := append(chromedp.DefaultExecAllocatorOptions[:], chromedp.ExecPath(browserPath), @@ -193,14 +303,13 @@ func htmlToPDF(ctx context.Context, browserPath, htmlContent, pageSize string) ( // Navigate to the HTML content and print to PDF var pdfBuf []byte - err := chromedp.Run(taskCtx, - chromedp.Navigate("about:blank"), + err = chromedp.Run(taskCtx, + chromedp.Navigate(pageURL), + // Wait for Mermaid diagrams to finish rendering. + // The mermaid init script sets data-mermaid-done="true" on + // after all diagrams render (or on error). Poll until it appears. chromedp.ActionFunc(func(ctx context.Context) error { - frameTree, err := page.GetFrameTree().Do(ctx) - if err != nil { - return err - } - return page.SetDocumentContent(frameTree.Frame.ID, htmlContent).Do(ctx) + return chromedp.Poll(`document.body && document.body.getAttribute('data-mermaid-done') === 'true'`, nil, chromedp.WithPollingInterval(100*time.Millisecond)).Do(ctx) }), chromedp.ActionFunc(func(ctx context.Context) error { paperWidth, paperHeight := paperDimensions(pageSize) diff --git a/internal/share/pdf_test.go b/internal/share/pdf_test.go index d72393d..77cfcb5 100644 --- a/internal/share/pdf_test.go +++ b/internal/share/pdf_test.go @@ -1,6 +1,17 @@ package share -import "testing" +import ( + "bytes" + "context" + "fmt" + "net" + "net/http" + "strings" + "testing" + "time" + + "github.com/chromedp/chromedp" +) func TestPDFRegistered(t *testing.T) { if findBrowser() == "" { @@ -14,3 +25,168 @@ func TestPDFRegistered(t *testing.T) { t.Errorf("expected name 'pdf', got %q", s.Name()) } } + +func TestPDFMermaidRendering(t *testing.T) { + if findBrowser() == "" { + t.Skip("no Chromium-based browser on $PATH — PDF sharer won't register") + } + if findMermaidJS() == nil { + t.Skip("mermaid.min.js not found — cannot test mermaid rendering") + } + + sharer := &PDFSharer{} + + // Page with a mermaid diagram + mermaidPage := Page{ + Path: "test/mermaid-page", + Title: "Mermaid Test", + Body: `# Test Page + +Here is a diagram: + +` + "```mermaid\ngraph TD\n A[Start] --> B[Process]\n B --> C[End]\n```" + ` + +And some text after. +`, + } + + ctx := context.Background() + + // Export the mermaid page to PDF + var mermaidBuf bytes.Buffer + err := sharer.Export(ctx, &mermaidBuf, ExportRequest{ + Pages: []Page{mermaidPage}, + Assets: nil, + Config: ShareConfig{Page: "test/mermaid-page", Depth: 0}, + }) + if err != nil { + t.Fatalf("PDF export with mermaid failed: %v", err) + } + + // Verify it's a valid PDF + if !bytes.HasPrefix(mermaidBuf.Bytes(), []byte("%PDF")) { + t.Fatal("mermaid PDF output doesn't start with %PDF header") + } + t.Logf("mermaid PDF size: %d bytes", mermaidBuf.Len()) + + // Verify mermaid was rendered by checking the HTML intermediate output. + // Use the htmlToPDF local server approach to get the rendered HTML. + browserPath := findBrowser() + mermaidJS := findMermaidJS() + htmlDoc := renderHTMLDocument([]Page{mermaidPage}, nil, ctx, false, false, true) + + // Render in browser and capture the resulting HTML to verify SVG + mux := http.NewServeMux() + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html; charset=utf-8") + w.Write([]byte(htmlDoc)) + }) + mux.HandleFunc("/mermaid.min.js", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/javascript") + w.Write(mermaidJS) + }) + listener, listenErr := net.Listen("tcp", "127.0.0.1:0") + if listenErr != nil { + t.Fatalf("failed to listen: %v", listenErr) + } + srv := &http.Server{Handler: mux} + go srv.Serve(listener) + defer srv.Close() + defer listener.Close() + pageURL := fmt.Sprintf("http://127.0.0.1:%d/", listener.Addr().(*net.TCPAddr).Port) + + opts := append(chromedp.DefaultExecAllocatorOptions[:], + chromedp.ExecPath(browserPath), + chromedp.Flag("no-sandbox", true), + chromedp.Flag("disable-gpu", true), + ) + allocCtx, allocCancel := chromedp.NewExecAllocator(ctx, opts...) + defer allocCancel() + taskCtx, taskCancel := chromedp.NewContext(allocCtx) + defer taskCancel() + taskCtx, timeoutCancel := context.WithTimeout(taskCtx, 30*time.Second) + defer timeoutCancel() + + var bodyHTML string + err = chromedp.Run(taskCtx, + chromedp.Navigate(pageURL), + chromedp.ActionFunc(func(ctx context.Context) error { + return chromedp.Poll(`document.body && document.body.getAttribute('data-mermaid-done') === 'true'`, nil, chromedp.WithPollingInterval(100*time.Millisecond)).Do(ctx) + }), + chromedp.OuterHTML("body", &bodyHTML), + ) + if err != nil { + t.Fatalf("chromedp failed: %v", err) + } + + // The rendered HTML should contain SVG elements from mermaid + if !strings.Contains(bodyHTML, " B\n```\n", + }, + } + + html := renderHTMLDocument(pages, nil, context.Background(), false, false, true) + + // Should contain mermaid.min.js script reference (local server) + if !strings.Contains(html, "/mermaid.min.js") { + t.Error("HTML does not contain /mermaid.min.js script reference") + } + + // Should contain the data-mermaid-done signal + if !strings.Contains(html, "data-mermaid-done") { + t.Error("HTML does not contain data-mermaid-done signal") + } + + // Goldmark should have rendered the mermaid block as a code element + if !strings.Contains(html, `class="language-mermaid"`) { + t.Error("HTML does not contain language-mermaid code block (goldmark output)") + } + + // Should contain the graph definition text + if !strings.Contains(html, "A --> B") || !strings.Contains(html, "graph TD") { + // goldmark may or may not HTML-escape inside code blocks + if !strings.Contains(html, "A --> B") && !strings.Contains(html, "A --> B") { + t.Error("HTML does not contain the mermaid graph definition") + } + } +} + +func TestPDFRenderHTMLNoMermaid(t *testing.T) { + // When hasMermaid is false, no mermaid script should be included + pages := []Page{ + { + Path: "test/page", + Title: "Test", + Body: "# Hello\n\nJust text.\n", + }, + } + + html := renderHTMLDocument(pages, nil, context.Background(), false, false, false) + + if strings.Contains(html, "/mermaid.min.js") { + t.Error("HTML should not contain mermaid script when hasMermaid is false") + } + // Should still have the done signal for the wait loop + if !strings.Contains(html, "data-mermaid-done") { + t.Error("HTML should still contain data-mermaid-done signal for consistency") + } +}