From c6979f3c6a0144880ffdd9e9951d99d6fd83f35c Mon Sep 17 00:00:00 2001
From: Nicolas Kruse
Date: Mon, 19 May 2025 11:15:42 +0200
Subject: [PATCH] renumbering of SVG-ids added to match HTML specification
---
src/pyladoc/__init__.py | 59 +++++++++++++++++++++---------------
src/pyladoc/svg_tools.py | 38 +++++++++++++++++++++++
tests/document_validation.py | 7 ++---
3 files changed, 75 insertions(+), 29 deletions(-)
create mode 100644 src/pyladoc/svg_tools.py
diff --git a/src/pyladoc/__init__.py b/src/pyladoc/__init__.py
index 3f3c658..e30d379 100644
--- a/src/pyladoc/__init__.py
+++ b/src/pyladoc/__init__.py
@@ -8,6 +8,7 @@ from . import latex
import pkgutil
from html.parser import HTMLParser
from io import StringIO
+from . import svg_tools
HTML_OUTPUT = 0
LATEX_OUTPUT = 1
@@ -58,17 +59,6 @@ def _markdown_to_html(text: str) -> str:
return html_text
-def _clean_svg(svg_text: str) -> str:
- # remove all tags not alllowd for inline svg from metadata:
- svg_text = re.sub(r'.*?', '', svg_text, flags=re.DOTALL)
-
- # remove illegal path-tags without d attribute:
- return re.sub(r']*\sd=)\s.*?/>', '', svg_text, flags=re.DOTALL)
-
-# def _get_templ_vars(template: str) -> list[str]:
-# return re.findall(".*?", template, re.DOTALL)
-
-
def _drop_indent(text: str, amount: int) -> str:
"""
Drops a specific number of indentation spaces from a multiline text.
@@ -142,6 +132,7 @@ def escape_html(text: str) -> str:
def figure_to_string(fig: Figure,
+ unique_id: str,
figure_format: FFormat = 'svg',
font_family: str | None = None,
scale: float = 1,
@@ -175,7 +166,7 @@ def figure_to_string(fig: Figure,
elif figure_format == 'svg' and not base64:
i = buff.read(2028).find(b'
{eq_html}")
+ else:
+ self.modified_html.write(eq_html)
+
else:
self.modified_html.write(data)
def handle_endtag(self, tag: str) -> None:
if tag == 'latex':
self.in_latex = False
+ elif self.self_closing:
+ self.self_closing = False
else:
self.modified_html.write(f"{tag}>")
+ if tag == 'p' and self.p_tags > 0:
+ self.p_tags -= 1
parser = HTMLPostProcessor(self)
parser.feed(html_code)
@@ -435,14 +446,14 @@ class DocumentWriter():
caption_prefix, reference = self._add_item(ref_id, ref_type, prefix_pattern)
return '
%s%s
' % (
reference,
- figure_to_string(fig, self._figure_format, base64=self._base64_svgs, scale=self._fig_scale),
+ figure_to_string(fig, reference, self._figure_format, base64=self._base64_svgs, scale=self._fig_scale),
'
' + caption_prefix + escape_html(caption) if caption else '')
def render_to_latex() -> str:
_, reference = self._add_item(ref_id, ref_type, prefix_pattern)
return '\\begin{figure}%s\n%s\n\\caption{%s}\n%s\\end{figure}' % (
'\n\\centering' if centered else '',
- figure_to_string(fig, 'pgf', self._font_family, scale=self._fig_scale),
+ figure_to_string(fig, reference, 'pgf', self._font_family, scale=self._fig_scale),
latex.escape_text(caption),
'\\label{%s}\n' % latex.normalize_label_text(reference) if ref_id else '')
@@ -603,7 +614,7 @@ class DocumentWriter():
norm_text = _normalize_text_indent(str(text))
def render_to_html() -> str:
- html = self._html_post_processing(_markdown_to_html(self._equation_embedding_reescaping(norm_text)))
+ html = _markdown_to_html(self._equation_embedding_reescaping(norm_text))
if section_class:
return '' + html + '
'
else:
@@ -637,7 +648,7 @@ class DocumentWriter():
self._base64_svgs = base64_svgs
self._fig_scale = figure_scale
- return _fillin_reference_names(self._render_doc(HTML_OUTPUT), self._item_index)
+ return self._html_post_processing(_fillin_reference_names(self._render_doc(HTML_OUTPUT), self._item_index))
def to_latex(self, font_family: Literal[None, 'serif', 'sans-serif'] = None,
table_renderer: TRenderer = 'simple', figure_scale: float = 1) -> str:
diff --git a/src/pyladoc/svg_tools.py b/src/pyladoc/svg_tools.py
new file mode 100644
index 0000000..35eda24
--- /dev/null
+++ b/src/pyladoc/svg_tools.py
@@ -0,0 +1,38 @@
+import xml.etree.ElementTree as ET
+import re
+from re import Match
+
+def update_svg_ids(input_svg: str, unique_id: str) -> str:
+ """Add a unique ID part to all svg IDs and update references ti these IDs"""
+ id_mapping: dict[str, str] = {}
+
+ def update_ids(match: Match[str]) -> str:
+ old_id = match.group(1)
+ new_id = f"svg-{unique_id}-{old_id}"
+ id_mapping[old_id] = new_id
+ return f' id="{new_id}"'
+
+ def update_references(match: Match[str]) -> str:
+ old_ref = match.group(1)
+ new_ref = id_mapping.get(old_ref, old_ref)
+ if match.group(0).startswith('xlink:href'):
+ return f'xlink:href="#{new_ref}"'
+ else:
+ return f'url(#{new_ref})'
+
+ # Update IDs
+ svg_string = re.sub(r'\sid="(.*?)"', update_ids, input_svg)
+
+ # Update references to IDs
+ svg_string = re.sub(r'url\(#([^\)]+)\)', update_references, svg_string)
+ svg_string = re.sub(r'xlink:href="#([^\"]+)"', update_references, svg_string)
+
+ return svg_string
+
+
+def clean_svg(svg_text: str) -> str:
+ # remove all tags not alllowd for inline svg from metadata:
+ svg_text = re.sub(r'.*?', '', svg_text, flags=re.DOTALL)
+
+ # remove illegal path-tags without d attribute:
+ return re.sub(r']*\sd=)\s.*?/>', '', svg_text, flags=re.DOTALL)
\ No newline at end of file
diff --git a/tests/document_validation.py b/tests/document_validation.py
index fe9eb80..01ba2a2 100644
--- a/tests/document_validation.py
+++ b/tests/document_validation.py
@@ -2,10 +2,7 @@ from typing import Generator, Any
from lxml import etree
from lxml.etree import _Element as EElement # type: ignore
import requests
-
-
-with open('src/pyladoc/templates/test_template.html', mode='rt', encoding='utf-8') as f:
- html_test_template = f.read()
+import pyladoc
def add_line_numbers(multiline_string: str) -> str:
@@ -53,7 +50,7 @@ def validate_html(html_string: str, validate_online: bool = False, check_for: li
assert tag_type in tags, f"Tag {tag_type} not found in the html code"
if validate_online:
- test_page = html_test_template.replace('', html_string)
+ test_page = pyladoc.inject_to_template(html_string, internal_template='templates/test_template.html')
validation_result = validate_html_with_w3c(test_page)
assert 'messages' in validation_result, 'Validate request failed'
if validation_result['messages']: