|
2 | 2 |
|
3 | 3 | from __future__ import annotations |
4 | 4 |
|
| 5 | +import urllib.request |
| 6 | +from io import BytesIO |
5 | 7 | from pathlib import Path |
6 | 8 | from typing import Any |
7 | 9 |
|
8 | 10 | from docx import Document |
| 11 | +from docx.shared import Cm |
| 12 | + |
| 13 | +from document_placeholder.image_value import ImageValue |
9 | 14 |
|
10 | 15 |
|
11 | 16 | class DocumentProcessor: |
@@ -43,16 +48,97 @@ def _replace_in_paragraph(paragraph, values: dict[str, Any]) -> None: |
43 | 48 | if not runs: |
44 | 49 | return |
45 | 50 |
|
46 | | - full_text = "".join(run.text for run in runs) |
| 51 | + for key, value in values.items(): |
| 52 | + if not isinstance(value, ImageValue): |
| 53 | + continue |
| 54 | + placeholder = "{" + key + "}" |
| 55 | + for run in runs: |
| 56 | + if placeholder in run.text: |
| 57 | + run.text = run.text.replace(placeholder, "") |
| 58 | + try: |
| 59 | + stream = DocumentProcessor._load_image(value.source) |
| 60 | + w = value.width_cm if value.width_cm is not None else 5.0 |
| 61 | + kwargs: dict[str, Any] = {"width": Cm(w)} |
| 62 | + if value.height_cm is not None: |
| 63 | + kwargs["height"] = Cm(value.height_cm) |
| 64 | + run.add_picture(stream, **kwargs) |
| 65 | + except (OSError, ValueError, KeyError): |
| 66 | + pass |
| 67 | + break |
47 | 68 |
|
| 69 | + full_text = "".join(run.text for run in runs) |
48 | 70 | new_text = full_text |
49 | 71 | for key, value in values.items(): |
| 72 | + if isinstance(value, ImageValue): |
| 73 | + continue |
50 | 74 | placeholder = "{" + key + "}" |
51 | 75 | if placeholder in new_text: |
52 | 76 | display = str(value) if value is not None else "" |
| 77 | + display = DocumentProcessor._sanitize_xml_text(display) |
53 | 78 | new_text = new_text.replace(placeholder, display) |
54 | 79 |
|
55 | 80 | if new_text != full_text: |
56 | | - runs[0].text = new_text |
| 81 | + runs[0].text = DocumentProcessor._sanitize_xml_text(new_text) |
57 | 82 | for run in runs[1:]: |
58 | 83 | run.text = "" |
| 84 | + |
| 85 | + @staticmethod |
| 86 | + def _sanitize_xml_text(text: str) -> str: |
| 87 | + """Удалить символы, недопустимые в XML (NULL, control chars).""" |
| 88 | + if not text: |
| 89 | + return "" |
| 90 | + result = [] |
| 91 | + for c in str(text): |
| 92 | + code = ord(c) |
| 93 | + if code == 0x9 or code == 0xA or code == 0xD: |
| 94 | + result.append(c) |
| 95 | + elif 0x20 <= code <= 0xD7FF or 0xE000 <= code <= 0xFFFD: |
| 96 | + result.append(c) |
| 97 | + elif 0x10000 <= code <= 0x10FFFF: |
| 98 | + result.append(c) |
| 99 | + else: |
| 100 | + result.append(" ") |
| 101 | + return "".join(result) |
| 102 | + |
| 103 | + @staticmethod |
| 104 | + def _load_image(source: str) -> BytesIO: |
| 105 | + """Загрузить изображение из URL или файла. SVG конвертируется в PNG.""" |
| 106 | + source = source.strip() |
| 107 | + if source.startswith(("http://", "https://")): |
| 108 | + req = urllib.request.Request( |
| 109 | + source, |
| 110 | + headers={"User-Agent": "DocumentPlaceholder/1.0"}, |
| 111 | + ) |
| 112 | + with urllib.request.urlopen(req, timeout=30) as resp: |
| 113 | + data = resp.read() |
| 114 | + else: |
| 115 | + path = Path(source) |
| 116 | + if path.exists(): |
| 117 | + data = path.read_bytes() |
| 118 | + else: |
| 119 | + raise FileNotFoundError(source) |
| 120 | + |
| 121 | + return DocumentProcessor._ensure_raster(data, source) |
| 122 | + |
| 123 | + @staticmethod |
| 124 | + def _ensure_raster(data: bytes, source: str = "") -> BytesIO: |
| 125 | + """Конвертировать SVG в PNG, остальные форматы вернуть как есть.""" |
| 126 | + if DocumentProcessor._is_svg(data, source): |
| 127 | + try: |
| 128 | + from cairosvg import svg2png |
| 129 | + |
| 130 | + png_out = BytesIO() |
| 131 | + svg2png(bytestring=data, write_to=png_out) |
| 132 | + png_out.seek(0) |
| 133 | + return png_out |
| 134 | + except Exception: |
| 135 | + raise ValueError("SVG conversion failed") |
| 136 | + return BytesIO(data) |
| 137 | + |
| 138 | + @staticmethod |
| 139 | + def _is_svg(data: bytes, source: str) -> bool: |
| 140 | + """Проверить, является ли контент SVG.""" |
| 141 | + if source.lower().endswith(".svg") or source.lower().endswith(".svgz"): |
| 142 | + return True |
| 143 | + start = data.lstrip()[:200].decode("utf-8", errors="ignore") |
| 144 | + return start.lstrip().startswith("<svg") or start.lstrip().startswith("<?xml") |
0 commit comments