first commit

2025-03-28 13:30:08 +01:00 · 2025-03-28 13:30:08 +01:00 · 67c48776ac
commit 67c48776ac
19 changed files with 3646 additions and 0 deletions
--- a/.flake8
+++ b/.flake8
@ -0,0 +1,21 @@
 [flake8]
 # Specify the maximum allowed line length
 max-line-length = 88
 # Ignore specific rules
 # For example, E501: Line too long, W503: Line break before binary operator
 ignore = E501, W503, W504
 # Exclude specific files or directories
 exclude =
    .git,
    __pycache__,
    build,
    dist,
    .conda
    .venv
    venv
 # Enable specific plugins or options
 # Example: Enabling flake8-docstrings
 select = C,E,F,W,D
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,134 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 .pdm.toml
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 .venv/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 pyModbusTCP_old/
 test.py
 test_*.ipynb
 settings.json
--- a/README.md
+++ b/README.md
@ -0,0 +1,111 @@
 # Pyladoc
 ## Description
 Pyladoc is a python package for programmatically generating HTML and
 PDF/LaTex output. This package targets specifically applications where reports
 or results with Pandas-tables and Matplotlib-figures are generated programmatically
 to be displayed as website and as PDF document without any manual formatting
 steps.
 This package focuses on the "Document in Code" approach for cases
 where a lot of calculations and data handling is done but not a lot of
 document text needs to be displayed.
 As backend for PDF generation LaTex is used. There are excellent engines for
 rendering HTML to PDF available, but even if there is no requirement for an
 accurate typesetting, placing programmatically content of variable
 composition and sizes on fixed size pages without manual intervention
 is a hard problem that LaTeX is very capable of.
 ### Sported primitives
 - Text (can be Markdown or HTML formatted)
 - Headings
 - Tables (Pandas, Markdown or HTML)
 - Matplotlib figures
 - LaTex equations
 - Named references for figures, tables and equation
 ### Key Features
 - HTML and PDF/LaTex rendering of the same document
 - Single file output including figures
 - Figure and equation embedding in HTML by inline SVG, SVG in Base64 or PNG in Base64
 - Figure embedding in LaTex as PGF/TikZ
 ### Usage Scenarios
 - Webservices
 - Report generation for lab equipment
 ## Installation
 It can be installed with pip:
 ```bash
 pip install pyladoc
 ```
 ## Usage
 It is easy to use as the following example code shows:
 ```python
 import pyladoc
 doc = pyladoc.DocumentWriter()
 doc.add_markdown("""
    # Example
    This is an example. The @table:pandas_example shows some random data.
    """)
 some_data = {
    'Row1': ["Line1", "Line2", "Line3"],
    'Row2': [120, 100, 110],
    'Row3': ['12 g/km', '> 150 g/km', '110 g/km']
 }
 df = pd.DataFrame(some_data)
 dw.add_table(df, 'This is a pandas example table', 'pandas_example')
 html_code = doc.to_html()
 doc.to_pdf('test.pdf')
 ```
 ## Example outputs
 The following documents are generated by tests/test_rendering_example_doc.py:
 - HTML: [test_html_render.html](tests/out/test_html_render.html)
 - PDF: [test_latex_render.pdf](tests/out/test_latex_render.pdf)
 ## Contributing
 Contributions are welcome, please open an issue or submit a pull request on GitHub.
 ## Developer Guide
 To get started with developing the `pyladoc` package, follow these steps.
 First, clone the repository to your local machine using Git:
 ```bash
 git clone https://github.com/Nonannet/pyladoc.git
 cd pyladoc
 ```
 It's recommended to setup an venv:
 ```bash
 python -m venv venv
 source venv/bin/activate  # On Windows use `venv\Scripts\activate`
 ```
 Install the package and dev-dependencies while keeping files in the
 current directory:
 ```bash
 pip install -e .[dev]
 ```
 Ensure that everything is set up correctly by running the tests:
 ```bash
 pytest
 ```
 ## License
 This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,58 @@
 [project]
 name = "pyladoc"
 version = "1.0.0"
 authors = [
  { name="Nicolas Kruse", email="nicolas.kruse@nonan.net" },
 ]
 description = "Package for generating HTML and PDF/latex from python code"
 readme = "README.md"
 requires-python = ">=3.8"
 classifiers = [
    "Programming Language :: Python :: 3",
    "License :: OSI Approved :: MIT License",
    "Operating System :: OS Independent",
 ]
 dependencies = [
    "markdown>=3.3.0",
    "beautifulsoup4>=4.9.1"
 ]
 [project.optional-dependencies]
 dev = [
    "pytest", "flake8", "mypy",
    "lxml", "types-lxml",
    "requests",
    "matplotlib>=3.1.1",
    "pandas>=2.0.0", "Jinja2",
 ]
 [project.urls]
 Homepage = "https://github.com/Nonannet/pyladoc"
 Repository = "https://github.com/Nonannet/pyladoc"
 Issues = "https://github.com/Nonannet/pyladoc/issues"
 [build-system]
 requires = ["setuptools>=61.0", "wheel"]
 build-backend = "setuptools.build_meta"
 [tool.setuptools.packages.find]
 where = ["src"]
 [tool.setuptools.package-data]
 pyladoc = ["templates/*"]
 [tool.mypy]
 files = ["src"]
 strict = true
 warn_return_any = true
 warn_unused_configs = true
 check_untyped_defs = true
 no_implicit_optional = true
 show_error_codes = true
 [tool.pytest.ini_options]
 minversion = "6.0"
 addopts = "-ra -q"
 testpaths = ["tests"]
 pythonpath = ["src"]
--- a/src/pyladoc/init.py
+++ b/src/pyladoc/init.py
@ -0,0 +1,570 @@
 from typing import Callable, Generator, Self, Literal, TYPE_CHECKING
 import html
 import markdown
 from base64 import b64encode
 import re
 import io
 from . import latex
 import pkgutil
 HTML_OUTPUT = 0
 LATEX_OUTPUT = 1
 if TYPE_CHECKING:
    from pandas import DataFrame
    from pandas.io.formats.style import Styler
    import matplotlib.pyplot as plt
    from matplotlib.figure import Figure
    from matplotlib.legend import Legend as Mpl_Legend
    from matplotlib.text import Text as Mpl_Text
    Table = DataFrame | Styler
 else:
    try:
        from pandas import DataFrame
    except ImportError:
        DataFrame = None
    try:
        from pandas.io.formats.style import Styler
        Table = DataFrame | Styler
    except ImportError:
        Table = DataFrame
    try:
        import matplotlib.pyplot as plt
        from matplotlib.figure import Figure
        from matplotlib.legend import Legend as Mpl_Legend
        from matplotlib.text import Text as Mpl_Text
    except ImportError:
        Figure = None
 TRenderer = Literal['pandas', 'simple']
 FFormat = Literal['svg', 'png', 'pgf']
 def _get_pkgutil_string(path: str) -> str:
    data = pkgutil.get_data(__name__, path)
    assert data is not None
    return data.decode()
 def _markdown_to_html(text: str) -> str:
    prep_text = re.sub(r'\u00A0', '&nbsp;', text)  # non-breaking space
    html = markdown.markdown(prep_text, extensions=['tables', 'fenced_code', 'def_list', 'abbr', 'sane_lists'])
    return html.replace('<hr />', '<hr>')
 def escape_html(text: str) -> str:
    """
    Escapes special HTML characters in a given string.
    Args:
        text: The text to escape
    Returns:
        Escaped text save for inserting into HTML code
    """
    ret = re.sub(r'\u00A0', '&nbsp;', text)  # non-breaking space
    ret = html.escape(ret)
    return ' '.join(ret.strip().splitlines())
 def _clean_svg(svg_text: str) -> str:
    # remove all tags not alllowd for inline svg from metadata:
    svg_text = re.sub(r'<metadata>.*?</metadata>', '', svg_text, flags=re.DOTALL)
    # remove illegal path-tags without d attribute:
    return re.sub(r'<path(?![^>]*\sd=)\s.*?/>', '', svg_text, flags=re.DOTALL)
 # def _get_templ_vars(template: str) -> list[str]:
 #    return re.findall("<!---START (.+?)--->.*?<!---END .+?--->", template, re.DOTALL)
 def _drop_indent(text: str, amount: int) -> str:
    """
    Drops a specific number of indentation spaces from a multiline text.
    Args:
        text: The text to drop indentation from
        amount: The number of indentation space characters to drop
    Returns:
        The text with the specified amount of indentation removed
    """
    return ''.join(' ' * amount + line for line in text.splitlines(True))
 def _save_figure(fig: Figure, buff: io.BytesIO, figure_format: FFormat, font_family: str | None, scale: float) -> None:
    """
    Saves a matplotlib figure to a file-like object.
    Args:
        fig: The figure to save
        buff: The file-like object to save the figure to
        figure_format: The format to save the figure in (svg, png or pgf)
        font_family: The font family to use for the figure
    """
    def get_all_elements() -> Generator[Mpl_Text, None, None]:
        for ax in fig.get_axes():
            yield ax.title
            yield ax.xaxis.label
            yield ax.yaxis.label
            yield from ax.get_xticklabels() + ax.get_yticklabels()
            legend: Mpl_Legend = ax.get_legend()
            if legend:
                yield from legend.get_texts()
    # Store current figure settings
    old_state = ((e, e.get_fontfamily()) for e in get_all_elements())
    old_size: tuple[float, float] = tuple(fig.get_size_inches())  # type: ignore[unused-ignore]
    # Adjust figure settings
    if font_family:
        for e, _ in old_state:
            e.set_fontfamily(font_family)
    fig.set_size_inches(old_size[0] * scale, old_size[1] * scale, False)
    # Render figure
    backends = {'png': 'AGG', 'svg': 'SVG', 'pgf': 'PGF'}
    assert figure_format in backends, 'Figure format can be pgf (vector), svg (vector) or png (raster)'
    fig.savefig(buff, format=figure_format, backend=backends[figure_format])  # type: ignore[unused-ignore]
    # Reset figure setting
    for e, s in old_state:
        e.set_fontfamily(s)
    fig.set_size_inches(old_size, None, False)
 def figure_to_string(fig: Figure,
                     figure_format: FFormat = 'svg',
                     font_family: str | None = None,
                     scale: float = 1,
                     alt_text: str = '',
                     base64: bool = False) -> str:
    """
    Converts a matplotlib figure to a ascii-string. For png base64 encoding is
    used in general, for svg base64 encoding can be enabled. For base64 encoded
    figures a img-tag is included in the output.
    Args:
        fig: The figure to convert
        figure_format: The format to save the figure in (svg, png or pgf)
        font_family: The font family to use for the figure
        scale: Scaling factor for the figure size
        alt_text: The alt text for the figure
        base64: If the format is svg this determine if the image is encode in base64
    Returns:
        The figure as ascii-string
    """
    assert fig and isinstance(fig, Figure), 'fig parameter must be a matplotlib figure'
    with io.BytesIO() as buff:
        _save_figure(fig, buff, figure_format, font_family, scale)
        buff.seek(0)
        if figure_format == 'pgf':
            i = buff.read(2028).find(b'\\begingroup%')  # skip comments
            buff.seek(max(i, 0))
            return latex.to_ascii(buff.read().decode('utf-8'))
        elif figure_format == 'svg' and not base64:
            i = buff.read(2028).find(b'<svg')  # skip xml and DOCTYPE header
            buff.seek(max(i, 0))
            return _clean_svg(buff.read().decode('utf-8'))
        else:
            image_mime = {"png": "image/png", "svg": "image/svg+xml"}
            assert figure_format in image_mime, 'Unknown image format'
            return '<img alt="%s" src="data:%s;charset=utf-8;base64,%s">' % \
                (escape_html(alt_text),
                 image_mime[figure_format],
                 b64encode(buff.read()).decode('ascii'))  # base64 assures (7-bit) ascii
 def latex_to_figure(latex_code: str) -> Figure:
    assert Figure, 'Matplotlib is required for rendering LaTex expressions for HTML output.'  # type:ignore[truthy-function]
    fig, ax = plt.subplots()
    ax.set_xticks([])
    ax.set_yticks([])
    ax.axis('off')
    text = plt.text(0.5, 0.5, f'${latex_code}$', horizontalalignment='center',
                    verticalalignment='center', transform=ax.transAxes)
    fig.draw_without_rendering()
    bbox = text.get_window_extent()
    fig.set_size_inches(bbox.width / fig.dpi * 1.2, bbox.height / fig.dpi * 1.2)
    return fig
 def _fillin_fields(template: str, fields: dict[str, str]) -> str:
    html_out = template
    for variable_name, value in fields.items():
        # Find indentation depths:
        ret = re.search(f"^(.*?)<!---START {variable_name}--->.*?<!---END {variable_name}--->", html_out, flags=re.MULTILINE)
        if ret:
            indent_depths = len(ret.group(1))
            html_out = html_out[:ret.start(0)] + _drop_indent(value, indent_depths) + html_out[ret.end(0):]
    return html_out
 def _fillin_reference_names(input_string: str, item_index: dict[str, int]) -> str:
    replacements = [(*m.span(), m.group()) for m in re.finditer(r'(?<=@)\w+:[\w\_\-]+', input_string)]
    ret: list[str] = []
    current_pos = 0
    for start, end, ref in replacements:
        assert ref in item_index, f"Reference {ref} does not exist in the document"
        ret.append(input_string[current_pos:start - 1])
        ret.append(str(item_index[ref]))
        current_pos = end
    return ''.join(ret) + input_string[current_pos:]
 def _check_latex_references(input_string: str, item_index: dict[str, int]) -> str:
    replacements = [m.group() for m in re.finditer(r'(?<=\\ref\{)\w+:[\w\_\\\-]+(?=\})', input_string)]
    escaped_items = set(latex.normalize_label_text(item) for item in item_index)
    for ref in replacements:
        assert ref in escaped_items, f"Reference {ref} does not exist in the document"
    return input_string
 def _normalize_text_indent(text: str) -> str:
    text_lines = text.splitlines()
    if len(text_lines) > 1 and not text_lines[0].strip():
        text_lines = text_lines[1:]
    if not text_lines:
        return ''
    if len(text_lines) > 1 and text_lines[0] and text_lines[0][0] != ' ':
        indent_amount = len(text_lines[1]) - len(text_lines[1].lstrip())
    else:
        indent_amount = len(text_lines[0]) - len(text_lines[0].lstrip())
    return '\n'.join(
        [' ' * max(0, len(line) - len(line.strip()) - indent_amount) + line.strip()
         for line in text_lines])
 def _create_document_writer() -> 'DocumentWriter':
    new_dwr = DocumentWriter()
    return new_dwr
 def inject_to_template(content: str, template_path: str = '', internal_template: str = '') -> str:
    """
    injects a content string into a template. The placeholder <!--CONTENT-->
    will be replaced by the content. If the placeholder is prefixed with a
    '%' comment character, this character will be replaced as well.
    Args:
        template_path: Path to a template file
        internal_template: Path to a internal default template
    Returns:
        Template with included content
    """
    if template_path:
        with open(template_path, 'r') as f:
            template = f.read()
    elif internal_template:
        template = _get_pkgutil_string(internal_template)
    else:
        raise Exception('No template provided')
    assert '<!--CONTENT-->' in template, 'No <!--CONTENT--> expression in template located'
    prep_template = re.sub(r"\%?\s*<!--CONTENT-->", '<!--CONTENT-->', template)
    return prep_template.replace('<!--CONTENT-->', content)
 class DocumentWriter():
    """
    A class to create a document for exporting to HTML or LaTeX.
    """
    def __init__(self) -> None:
        self._doc: list[list[Callable[[], str]]] = []
        self._fields: dict[str, DocumentWriter] = dict()
        self._base64_svgs: bool = False
        self._figure_format: FFormat = 'svg'
        self._table_renderer: TRenderer = 'simple'
        self._font_family: str | None = None
        self._item_count: dict[str, int] = {}
        self._item_index: dict[str, int] = {}
        self._fig_scale: float = 1
    def _add_item(self, ref_id: str, ref_type: str, caption_prefix: str) -> str:
        current_index = self._item_count.get(ref_type, 0) + 1
        if not ref_id:
            ref_id = str(current_index)
        self._item_index[f"{ref_type}:{ref_id}"] = current_index
        self._item_count[ref_type] = current_index
        return caption_prefix.format(current_index)
    def new_field(self, name: str) -> 'DocumentWriter':
        new_dwr = _create_document_writer()
        self._fields[name] = new_dwr
        return new_dwr
    def add_document(self, doc: Self) -> None:
        self._doc += doc._doc
    def add_diagram(self, fig: Figure, caption: str = '', ref_id: str = '',
                    prefix_pattern: str = 'Figure {}: ', ref_type: str = 'fig',
                    centered: bool = True) -> None:
        caption_prefix = self._add_item(ref_id, ref_type, prefix_pattern)
        def render_to_html() -> str:
            return '<div class="figure">%s%s</div>' % (
                figure_to_string(fig, self._figure_format, base64=self._base64_svgs, scale=self._fig_scale),
                '<br>' + caption_prefix + escape_html(caption) if caption else '')
        def render_to_latex() -> str:
            return '\\begin{figure}%s\n%s\n\\caption{%s}\n%s\\end{figure}' % (
                '\n\\centering' if centered else '',
                figure_to_string(fig, 'pgf', self._font_family, scale=self._fig_scale),
                latex.escape_text(caption),
                '\\label{%s}\n' % latex.normalize_label_text(ref_type + ':' + ref_id) if ref_id else '')
        self._doc.append([render_to_html, render_to_latex])
    def add_table(self, table: Table, caption: str = '', ref_id: str = '',
                  prefix_pattern: str = 'Table {}: ', ref_type: str = 'table', centered: bool = True) -> None:
        assert Table and isinstance(table, Table), 'Table has to be a pandas DataFrame oder DataFrame Styler'
        caption_prefix = self._add_item(ref_id, ref_type, prefix_pattern)
        styler = table if isinstance(table, Styler) else getattr(table, 'style', None)
        assert isinstance(styler, Styler), 'Jinja2 package is required for rendering tables'
        def render_to_html() -> str:
            html_string = styler.to_html(table_uuid=ref_id, caption=caption_prefix + escape_html(caption))
            return re.sub(r'<style.*?>.*?</style>', '', html_string, flags=re.DOTALL)
        def render_to_latex() -> str:
            if self._table_renderer == 'pandas':
                return styler.to_latex(
                    label=latex.normalize_label_text(ref_type + ':' + ref_id),
                    hrules=True,
                    convert_css=True,
                    siunitx=True,
                    caption=latex.escape_text(caption),
                    position_float='centering' if centered else None)
            else:
                return latex.render_pandas_styler_table(styler, caption, ref_type + ':' + ref_id, centered)
        self._doc.append([render_to_html, render_to_latex])
    def add_text(self, text: str, section_class: str = '') -> None:
        """
        Adds a text paragraph to the document.
        Args:
            text: The text to add
            section_class: The class for the paragraph
        """
        norm_text = _normalize_text_indent(text)
        def render_to_html() -> str:
            html = '<p>' + escape_html(norm_text) + '</p>'
            if section_class:
                return '<div class="' + section_class + '">' + html + '</div>'
            else:
                return html
        def render_to_latex() -> str:
            return latex.from_html(render_to_html())
        self._doc.append([render_to_html, render_to_latex])
    def add_html(self, text: str) -> None:
        """
        Adds HTML formatted text to the document. For the LaTeX
        export only basic HTML for text formatting and tables
        is supported.
        Args:
            text: The HTML to add to the document
        """
        def render_to_html() -> str:
            return text
        def render_to_latex() -> str:
            return latex.from_html(text)
        self._doc.append([render_to_html, render_to_latex])
    def add_h1(self, text: str) -> None:
        """
        Adds a h1 heading to the document.
        Args:
            text: The text of the heading
        """
        def render_to_html() -> str:
            return '<h1>' + escape_html(text) + '</h1>'
        def render_to_latex() -> str:
            return '\\section{' + latex.escape_text(text) + '}\n'
        self._doc.append([render_to_html, render_to_latex])
    def add_h2(self, text: str) -> None:
        """
        Adds a h2 heading to the document.
        Args:
            text: The text of the heading
        """
        def render_to_html() -> str:
            return '<h2>' + escape_html(text) + '</h2>'
        def render_to_latex() -> str:
            return '\\subsection{' + latex.escape_text(text) + '}\n'
        self._doc.append([render_to_html, render_to_latex])
    def add_h3(self, text: str) -> None:
        """
        Adds a h3 heading to the document.
        Args:
            text: The text of the heading
        """
        def render_to_html() -> str:
            return '<h3>' + escape_html(text) + '</h3>'
        def render_to_latex() -> str:
            return '\\subsubsection{' + latex.escape_text(text) + '}\n'
        self._doc.append([render_to_html, render_to_latex])
    def add_equation(self, latex_equation: str, ref_id: str = '', ref_type: str = 'eq') -> None:
        """
        Adds a LaTeX equation to the document.
        Args:
            latex_equation: LaTeX formatted equation
            ref_id: If provided, the equation is displayed with
                a number and can be referenced by the ref_id
        """
        caption = self._add_item(ref_id, ref_type, '({})')
        def render_to_html() -> str:
            fig = latex_to_figure(latex_equation)
            return ('<div class="equation-container"><div class="equation">%s</div>'
                   '<div class="equation-number">%s</div></div>') % (
                    figure_to_string(fig, self._figure_format, base64=self._base64_svgs),
                    caption)
        def render_to_latex() -> str:
            if ref_id:
                return '\\begin{equation}\\label{%s:%s}%s\\end{equation}' % (
                    ref_type, ref_id, latex_equation)
            else:
                return '\\[%s\\]' % latex_equation
        self._doc.append([render_to_html, render_to_latex])
    def add_markdown(self, text: str, section_class: str = '') -> None:
        """
        Adds a markdown formatted text to the document.
        Args:
            text: The markdown text to add
            section_class: The class for the text section
        """
        norm_text = _normalize_text_indent(str(text))
        def render_to_html() -> str:
            html = _markdown_to_html(norm_text)
            if section_class:
                return '<div class="' + section_class + '">' + html + '</div>'
            else:
                return html
        def render_to_latex() -> str:
            return latex.from_html(render_to_html())
        self._doc.append([render_to_html, render_to_latex])
    def _render_doc(self, doc_type: int) -> str:
        fields = {k: f.to_html() for k, f in self._fields.items()}
        return _fillin_fields(''.join(el[doc_type]() for el in self._doc), fields)
    def to_html(self, figure_format: FFormat = 'svg',
                base64_svgs: bool = False, figure_scale: float = 1) -> str:
        """
        Export the document to HTML. Figures will bew embedded in the HTML code.
        The format can be selected between png in base64, inline svg or svg in base64.
        Args:
            figure_format: The format for embedding the figures in the HTML code (svg or png)
            base64_svgs: Whether to encode svg images in base64
        Returns:
            The HTML code
        """
        self._figure_format = figure_format
        self._base64_svgs = base64_svgs
        self._fig_scale = figure_scale
        return _fillin_reference_names(self._render_doc(HTML_OUTPUT), self._item_index)
    def to_latex(self, font_family: Literal[None, 'serif', 'sans-serif'] = None,
                 table_renderer: TRenderer = 'simple', figure_scale: float = 1) -> str:
        """
        Export the document to LaTeX. Figures will be embedded as pgf graphics.
        Args:
            font_family: Overwrites the front family for figures
            table_renderer: The renderer for tables (simple: renderer with column type
                guessing for text and numbers; pandas: using the internal pandas LaTeX renderer)
        Returns:
            The LaTeX code
        """
        self._font_family = font_family
        assert table_renderer in ['simple', 'pandas'], "table_renderer must be 'simple' or 'pandas'"
        self._table_renderer = table_renderer
        self._fig_scale = figure_scale
        return _check_latex_references(self._render_doc(LATEX_OUTPUT), self._item_index)
    def to_pdf(self, file_path: str,
               font_family: Literal[None, 'serif', 'sans-serif'] = None,
               table_renderer: TRenderer = 'simple',
               latex_template_path: str = '') -> bool:
        """
        Export the document to a PDF file using LaTeX.
        Args:
            file_path: The path to save the PDF file to
            font_family: Overwrites the front family for figures and the template
            latex_template_path: Path to a LaTeX template file. The
                expression <!--CONTENT--> will be replaced by the generated content.
                If no path is provided a default template is used.
        Returns:
            True if the PDF was successfully created
        """
        latex_code = inject_to_template(self.to_latex(font_family, table_renderer),
                                        latex_template_path,
                                        'templates/default_template.tex')
        if font_family == 'sans-serif':
            latex_code = latex.inject_latex_command(latex_code, '\\renewcommand{\\familydefault}{\\sfdefault}')
        success, errors, warnings = latex.compile(latex_code, file_path)
        if not success:
            print('Errors:')
            print('\n'.join(errors))
            print('Warnings:')
            print('\n'.join(warnings))
        return success
    def _repr_html_(self) -> str:
        return self.to_html()
    def __repr__(self) -> str:
        return self.to_html()
--- a/src/pyladoc/latex.py
+++ b/src/pyladoc/latex.py
@ -0,0 +1,346 @@
 import bs4
 from html.parser import HTMLParser
 from typing import Iterator, Generator, Any
 from pandas.io.formats.style import Styler
 import re
 import os
 import shutil
 import subprocess
 import tempfile
 from .latex_escaping import unicode_to_latex_dict, latex_escape_dict
 def basic_formatter(value: Any) -> str:
    return escape_text(str(value))
 def to_ascii(text: str) -> str:
    """
    Replaces/escapes often used unicode characters in latex code or text
    with its LaTex ascii equivalents.
    Args:
        text: The text to convert.
    Returns:
        The escaped text.
    """
    regex_filter = ('|'.join(unicode_to_latex_dict))
    last_s = 0
    ret: list[str] = []
    for m in re.finditer(regex_filter, text):
        s1, s2 = m.span()
        ret.append(text[last_s:s1])
        ret.append(unicode_to_latex_dict[m.group()])
        last_s = s2
    ret.append(text[last_s:])
    return ''.join(ret)
 def normalize_label_text(text: str) -> str:
    """
    Replace any special non-allowed character in the lable text.
    Args:
        text: Input text
    Returns:
        Normalized text
    """
    return re.sub(r"[^a-zA-Z0-9.:]", '-', text)
 def escape_text(text: str) -> str:
    """
    Escapes special LaTeX characters and often used unicode characters in a given string.
    Args:
        text: The text to escape
    Returns:
        Escaped text
    """
    latex_translation = latex_escape_dict | unicode_to_latex_dict
    regex_filter = '|'.join(latex_translation)
    last_s = 0
    ret: list[str] = []
    for m in re.finditer(regex_filter, text):
        s1, s2 = m.span()
        ret.append(text[last_s:s1])
        matches = [v for k, v in latex_translation.items() if re.match(k, m.group())]
        if m.group(1):
            ret.append(matches[0].replace(r'\g<1>', normalize_label_text(m.group(1))))
        else:
            ret.append(matches[0])
        last_s = s2
    ret.append(text[last_s:])
    return ''.join(ret)
 def render_pandas_styler_table(df_style: Styler, caption: str = '', label: str = '', centering: bool = True) -> str:
    """
    Converts a pandas Styler object to LaTeX table.
    Args:
        df_style: The pandas Styler object to convert.
        caption: The caption for the table.
        label: Label for referencing the table.
        centering: Whether to center the table.
    Returns:
        The LaTeX code.
    """
    def iter_table(table: dict[str, Any]) -> Generator[str, None, None]:
        yield '\\begin{table}\n'
        if centering:
            yield '\\centering\n'
        # Guess column type
        numeric = re.compile(r'^[<>]?\s*(?:\d+,?)+(?:\.\d+)?(?:\s\D.*)?$')
        formats = ['S' if all(
            (numeric.match(line[ci]['display_value'].strip()) for line in table['body'])
        ) else 'l' for ci in range(len(table['body'][0])) if table['body'][0][ci]['is_visible']]
        if caption:
            yield f"\\caption{{{escape_text(caption)}}}\n"
        if label:
            yield f"\\label{{{normalize_label_text(label)}}}\n"
        yield f"\\begin{{tabular}}{{{''.join(formats)}}}\n\\toprule\n"
        for head in table['head']:
            yield (' & '.join(f"\\text{{{escape_text(c['display_value'].strip())}}}"
                              for c in head if c['is_visible']))
            yield ' \\\\\n'
        yield '\\midrule\n'
        for body in table['body']:
            yield (' & '.join(escape_text(c['display_value'].strip())
                              for c in body if c['is_visible']))
            yield ' \\\\\n'
        yield '\\bottomrule\n\\end{tabular}\n\\end{table}'
    str_list = iter_table(df_style._translate(False, False, blank=''))  # type: ignore[attr-defined]
    return ''.join(str_list)
 def from_html_old(html_code: str) -> str:
    """
    Converts HTML code to LaTeX code.
    Args:
        html_code: The HTML code to convert.
    Returns:
        The LaTeX code.
    """
    root = bs4.BeautifulSoup(html_code, 'html.parser')
    html_to_latex = {
        'strong': ('\\textbf{', '}'),
        'b': ('\\textbf{', '}'),
        'em': ('\\emph{', '}'),
        'i': ('\\emph{', '}'),
        'p': ('', '\n\n'),
        'h1': ('\\section{', '}'),
        'h2': ('\\subsection{', '}'),
        'h3': ('\\subsubsection{', '}'),
        'ul': ('\\begin{itemize}', '\\end{itemize}'),
        'ol': ('\\begin{enumerate}', '\\end{enumerate}'),
        'li': ('\\item ', ''),
        'latex_eq': ('\\[', '\\]'),
    }
    def handle_table(table: bs4.element.Tag) -> str:
        rows = table.find_all('tr')
        latex_table: str = ''
        for row in rows:
            assert isinstance(row, bs4.element.Tag), 'HTML table not valid'
            cells = row.find_all(['th', 'td'])
            if not latex_table:
                latex_table = "\\begin{tabular}{|" + "|".join(['l'] * len(cells)) + "|}\\toprule\n"
            else:
                latex_table += " & ".join(escape_text(cell.get_text(strip=True)) for cell in cells) + " \\\\\n"
        latex_table += "\\bottomrule\n\\end{tabular}"
        return latex_table
    def parse_node(element: bs4.element.Tag) -> Iterator[str]:
        prefix, post = html_to_latex.get(element.name, ('', ''))
        yield prefix
        for c in element.children:
            if isinstance(c, bs4.element.Tag):
                if c.name == 'table':
                    yield handle_table(c)
                else:
                    yield from parse_node(c)
            else:
                yield escape_text(c.text)
        yield post
    return ''.join(parse_node(root))
 def from_html(html_code: str) -> str:
    """
    Converts HTML code to LaTeX code using HTMLParser.
    Args:
        html_code: The HTML code to convert.
    Returns:
        The LaTeX code.
    """
    html_to_latex = {
        'strong': ('\\textbf{', '}'),
        'b': ('\\textbf{', '}'),
        'em': ('\\emph{', '}'),
        'i': ('\\emph{', '}'),
        'p': ('', '\n\n'),
        'h1': ('\\section{', '}\n'),
        'h2': ('\\subsection{', '}\n'),
        'h3': ('\\subsubsection{', '}\n'),
        'ul': ('\\begin{itemize}\n', '\\end{itemize}\n'),
        'ol': ('\\begin{enumerate}\n', '\\end{enumerate}\n'),
        'li': ('\\item ', '\n')
    }
    class LaTeXHTMLParser(HTMLParser):
        def __init__(self) -> None:
            super().__init__()
            self.latex_code: list[str] = []
            self.header_index: int = -1
            self.column_alignment = ''
            self.midrule_flag = False
            self.header_flag = False
        def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
            if tag in html_to_latex:
                prefix, _ = html_to_latex[tag]
                self.latex_code.append(prefix)
            elif tag == 'table':
                self.header_index = len(self.latex_code)
                self.latex_code.append('')  # Placeholder for column header
                self.midrule_flag = False
                self.header_flag = False
            elif tag == 'tr':
                self.column_alignment = ''
            elif tag in ['th', 'td']:
                style = [v for k, v in attrs if k == 'style']
                if style and style[0] and 'right' in style[0]:
                    self.column_alignment += 'r'
                else:
                    self.column_alignment += 'l'
            elif tag == 'a':
                href = [v for k, v in attrs if k == 'href']
                assert href, 'Link href attribute is missing'
                self.latex_code.append(f"\\href{{{href[0]}}}{{")
        def handle_endtag(self, tag: str) -> None:
            if tag in html_to_latex:
                _, postfix = html_to_latex[tag]
                self.latex_code.append(postfix)
            elif tag == 'table':
                self.latex_code.append("\\bottomrule\n\\end{tabular}\n")
            elif tag == 'tr':
                self.latex_code.pop()  # Remove column separator after last entry
                if self.header_index >= 0:
                    self.latex_code[self.header_index] = f"\\begin{{tabular}}{{{self.column_alignment}}}\\toprule\n"
                    self.header_index = -1
                self.latex_code.append(' \\\\\n')
                if self.header_flag and not self.midrule_flag:
                    self.latex_code.append("\\midrule\n")
                    self.midrule_flag = True
            elif tag == 'th':
                self.latex_code.append(" & ")
                self.header_flag = True
            elif tag == 'td':
                self.latex_code.append(" & ")
            elif tag == 'a':
                self.latex_code.append("}")
        def handle_data(self, data: str) -> None:
            if data.strip():
                self.latex_code.append(escape_text(data))
    parser = LaTeXHTMLParser()
    parser.feed(html_code)
    return ''.join(parser.latex_code)
 def compile(latex_code: str, output_file: str = '', encoding: str = 'utf-8') -> tuple[bool, list[str], list[str]]:
    """
    Compiles LaTeX code to a PDF file.
    Args:
        latex_code: The LaTeX code to compile.
        output_file: The output file path.
        encoding: The encoding of the LaTeX code.
    Returns:
        A tuple with three elements:
        - A boolean indicating whether the compilation was successful.
        - A list of errors.
        - A list of warnings.
    """
    with tempfile.TemporaryDirectory() as tmp_path:
        command = ['pdflatex', '-halt-on-error', '--output-directory', tmp_path]
        errors: list[str] = []
        warnings: list[str] = []
        for i in range(1, 4):
            rerun_flag = False
            error_flag = False
            process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
            output, error = process.communicate(input=latex_code.encode(encoding))
            assert not error, 'Compilation error: ' + output.decode(encoding)
            for line in output.decode(encoding).split('\n'):
                if 'Warning' in line:
                    warnings.append(f"Run {i}: " + line)
                    if 'reference' in line:
                        rerun_flag = True
                if line.startswith('!') or line.startswith('*!'):
                    error_flag = True
                if error_flag:
                    errors.append(line)
            if not rerun_flag or errors:
                break
        # Copy pdf file
        file_list = [f for f in os.listdir(tmp_path) if f.lower().endswith('.pdf')]
        if file_list:
            pdf_file = os.path.join(tmp_path, file_list[0])
            if output_file:
                shutil.copyfile(pdf_file, output_file)
    return not errors, errors, warnings
 def inject_latex_command(text: str, command: str) -> str:
    lines = text.splitlines()
    last_package_index = -1
    for i, line in enumerate(lines):
        if line.strip().startswith("\\usepackage"):
            last_package_index = i
    if last_package_index != -1:
        lines.insert(last_package_index + 1, f"\n{command}\n")
    else:
        lines.append(f"\n{command}\n")
    return '\n'.join(lines)
--- a/src/pyladoc/latex_escaping.py
+++ b/src/pyladoc/latex_escaping.py
@ -0,0 +1,89 @@
 unicode_to_latex_dict = {
    # Unicode numeric subscripts
    '₀': r'\textsubscript{0}', '₁': r'\textsubscript{1}', '₂': r'\textsubscript{2}', '₃': r'\textsubscript{3}',
    '₄': r'\textsubscript{4}', '₅': r'\textsubscript{5}', '₆': r'\textsubscript{6}', '₇': r'\textsubscript{7}',
    '₈': r'\textsubscript{8}', '₉': r'\textsubscript{9}',
    # Unicode numeric superscripts
    '⁰': r'\textsuperscript{0}', '¹': r'\textsuperscript{1}', '²': r'\textsuperscript{2}', '³': r'\textsuperscript{3}',
    '⁴': r'\textsuperscript{4}', '⁵': r'\textsuperscript{5}', '⁶': r'\textsuperscript{6}', '⁷': r'\textsuperscript{7}',
    '⁸': r'\textsuperscript{8}', '⁹': r'\textsuperscript{9}', '⁺': r'\textsuperscript{+}', '⁻': r'\textsuperscript{-}',
    # Often used European non-ascii-characters
    'ä': r'{\"a}',
    'ö': r'{\"o}',
    'ü': r'{\"u}',
    'Ä': r'{\"A}',
    'Ö': r'{\"O}',
    'Ü': r'{\"U}',
    'ß': r'{\ss}',
    'é': r"{\'e}",
    'è': r"{\`e}",
    'ê': r"{\^e}",
    'à': r"{\`a}",
    'â': r"{\^a}",
    'ç': r"{\c{c}}",
    'É': r"{\'E}",
    'È': r"{\`E}",
    'Ê': r"{\^E}",
    'À': r"{\`A}",
    'Â': r"{\^A}",
    'Ç': r"{\c{C}}",
    'ó': r"{\'o}",
    'ò': r"{\`o}",
    'ô': r"{\^o}",
    'Ó': r"{\'O}",
    'Ò': r"{\`O}",
    'Ô': r"{\^O}",
    'í': r"{\'i}",
    'ì': r"{\`i}",
    'î': r"{\^i}",
    'Í': r"{\'I}",
    'Ì': r"{\`I}",
    'Î': r"{\^I}",
    'ú': r"{\'u}",
    'ù': r"{\`u}",
    'û': r"{\^u}",
    'Ú': r"{\'U}",
    'Ù': r"{\`U}",
    'Û': r"{\^U}",
    'å': r"{\r{a}}",
    'Å': r"{\r{A}}",
    'ø': r"{\o}",
    'Ø': r"{\O}",
    'æ': r"{\ae}",
    'Æ': r"{\AE}",
    'œ': r"{\oe}",
    'Œ': r"{\OE}",
    # Other unicode
    '°': r'{\textdegree}',
    'µ': r'{\textmu}',
    'π': r'$\pi$',
    '≈': r'$\approx$',
    '±': r'$\pm$',
    '≠': r'$\neq$',
    '∆': r'$\Delta$',
    'Ω': r'$\Omega$',
    'Λ': r'$\Lambda$',
    'Σ': r'$\Sigma$',
    # '€': r'{\euro}',
    '£': r'{\pounds}',
    '¥': r'{\yen}',
    '\u00A0': r'~',  # Non-breaking space
    '\u2007': ' '  # Figure space
 }
 latex_escape_dict = {
    '&': r'\&',
    '%': r'\%',
    r'\$': r'\$',
    '#': r'\#',
    '_': r'\_',
    '{': r'\{',
    '}': r'\}',
    '<': r'{\textless}',
    '>': r'{\textgreater}',
    '~': r'\textasciitilde{}',
    r'\^': r'\textasciicircum{}',
    r'\\': r'\textbackslash{}',
    # References:
    r'@(\w+:[\w\_\-]+)': r'\ref{\g<1>}'
 }
--- a/src/pyladoc/templates/default_template.tex
+++ b/src/pyladoc/templates/default_template.tex
@ -0,0 +1,38 @@
 \documentclass[a4paper,12pt]{article}
 % Packages
 \usepackage[utf8]{inputenc}
 \usepackage[T1]{fontenc}
 \usepackage{lmodern}  % Load Latin Modern font
 \usepackage{graphicx} % For including images
 \usepackage{amsmath}  % For mathematical symbols
 \usepackage{amssymb}  % For additional symbols
 \usepackage{hyperref} % For hyperlinks
 \usepackage{caption}  % For customizing captions
 \usepackage{geometry} % To set margins
 \usepackage{natbib}   % For citations
 \usepackage{float}    % For fixing figure positions
 \usepackage{siunitx}  % For scientific units
 \usepackage{booktabs} % For professional-looking tables
 \usepackage{pgf} % For using pgf grafics
 \usepackage{textcomp, gensymb} % provides \degree symbol
 \sisetup{
  table-align-text-post = false
 }
 % Geometry Settings
 \geometry{margin=1in} % 1-inch margins
 % Title and Author Information
 % \title{Report Title}
 % \author{Your Name \\ Department of XYZ \\ \texttt{email@example.com}}
 % \date{\today}
 \begin{document}
 % Title Page
 % # \maketitle
 % <!--CONTENT-->
 \end{document}
--- a/src/pyladoc/templates/test_template.html
+++ b/src/pyladoc/templates/test_template.html
@ -0,0 +1,114 @@
 <!doctype html>
 <html lang="en">
 <head>
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>Test template</title>
  <style>
    body
    {
      background-color: white;
      font-family: Lucida Grande,Lucida Sans Unicode,Lucida Sans,Geneva,Verdana,sans-serif;
    }
    div.document
    {
        max-width: 820px;
        top: 20px;
        overflow: visible;
        margin: 0 auto;
        width: 90%;
        padding-bottom: 50px;
    }
    div h1
    {
        font-size: 32px;
        font-weight: normal;
        margin-bottom: 10px;
        margin-top: 24px;
        color: black;
    }
    div h2
    {
        font-size: 24px;
        font-weight: normal;
        margin-bottom: 10px;
        margin-top: 24px;
        color: black;
    }
    div.figure {
        text-align: center;
    }
    div svg {
        margin-left: -5%;
        max-width: 110%;
        height: auto;
        object-fit: contain;
    }
    .equation-container {
            display: flex;
            justify-content: space-between;
            align-items: center;
            width: 100%;
    }
    .equation {
        text-align: center;
        width: 100%;
    }
    .equation-number {
        text-align: right;
    }
    table {
        border-collapse: collapse;
        margin: 20px auto;
    }
    table th,
    table td {
        padding: 8px;
        font-variant-numeric: tabular-nums;
    }
    table th {
        background-color: #f4f4f4;
        font-weight: bold;
    }
    /* Alternating row colors for readability */
    table tr:nth-child(even) {
        background-color: #f9f9f9;
    }
    table tr:nth-child(odd) {
        background-color: #ffffff;
    }
    /* Caption styling */
    table caption {
        caption-side: top;
        font-weight: bold;
        font-size: 16px;
        margin-bottom: 10px;
    }
    /* Fine-tuning text in table */
    table td, table th {
        vertical-align: middle;
        line-height: 1.5;
    }
  </style>
 </head>
 <body>
    <div class="document">
        <!--CONTENT-->
    </div>
 </body>
--- a/tests/document_validation.py
+++ b/tests/document_validation.py
@ -0,0 +1,64 @@
 from typing import Generator, Any
 from lxml import etree
 from lxml.etree import _Element as EElement  # type: ignore
 import requests
 with open('src/pyladoc/templates/test_template.html', mode='rt', encoding='utf-8') as f:
    html_test_template = f.read()
 def add_line_numbers(multiline_string: str) -> str:
    lines = multiline_string.splitlines()
    numbered_lines = [f"{i + 1}: {line}" for i, line in enumerate(lines)]
    return "\n".join(numbered_lines)
 def validate_html_with_w3c(html_string: str) -> dict[str, Any]:
    validator_url = "https://validator.w3.org/nu/"
    # Parameters for the POST request
    headers = {
        "Content-Type": "text/html; charset=utf-8",
        "User-Agent": "Python HTML Validator"}
    try:
        response = requests.post(validator_url, headers=headers, data=html_string, params={"out": "json"})
        if response.status_code == 200:
            return response.json()
        else:
            return {
                "error": f"Failed to validate HTML. Status code: {response.status_code}",
                "details": response.text
            }
    except requests.RequestException as e:
        return {"error": f"An error occurred while connecting to the W3C Validator: {str(e)}"}
 def validate_html(html_string: str, validate_online: bool = False, check_for: list['str'] = ['table', 'svg', 'div']):
    root = etree.fromstring(html_string, parser=etree.HTMLParser(recover=True))
    def recursive_search(element: EElement) -> Generator[str, None, None]:
        if isinstance(element.tag, str):
            yield element.tag
        for child in element:
            yield from recursive_search(child)
    tags = set(recursive_search(root))
    for tag_type in check_for:
        assert tag_type in tags, f"Tag {tag_type} not found in the html code"
    if validate_online:
        test_page = html_test_template.replace('<!--CONTENT-->', html_string)
        validation_result = validate_html_with_w3c(test_page)
        assert 'messages' in validation_result, 'Validate request failed'
        if validation_result['messages']:
            print(add_line_numbers(test_page))
        for verr in validation_result['messages']:
            print(f"- {verr['type']}: {verr['message']} (line: {verr['lastLine']})")
        assert len(validation_result['messages']) == 0, f'{len(validation_result["messages"])} validation error, first error: {validation_result["messages"][0]["message"]}'
--- a/tests/out/test_html_render.html
+++ b/tests/out/test_html_render.html
--- a/tests/out/test_latex_render.pdf
+++ b/tests/out/test_latex_render.pdf
--- a/tests/out/test_markdown_characters.html
+++ b/tests/out/test_markdown_characters.html
@ -0,0 +1,5 @@
 <h1>Special caracters</h1>
 <p>Umlaute: ÖÄÜ öäü</p>
 <p>Other: ß, €, @, $, %, ~, µ</p>
 <p>Units: m³, cm²</p>
 <p>Controll characters: &lt;, &gt;, ", ', &amp;, |, /, \</p>
--- a/tests/out/test_markdown_equations.html
+++ b/tests/out/test_markdown_equations.html
@ -0,0 +1,39 @@
 <h1>Source Equations</h1>
 <ol>
 <li>$4(3x + 2) - 5(x - 1) = 3x + 14$</li>
 <li>$
 rac{2y + 5}{4} +
 rac{3y - 1}{2} = 5$</li>
 <li>$
 rac{5}{x + 2} +
 rac{2}{x - 2} = 3$</li>
 <li>$8(3b - 5) + 4(b + 2) = 60$</li>
 <li>$2c^2 - 3c - 5 = 0$</li>
 <li>$4(2d - 1) + 5(3d + 2) = 7d + 28$</li>
 <li>$q^2 + 6q + 9 = 16$</li>
 </ol>
 <h1>Result Equations</h1>
 <ol>
 <li>$x =
 rac{1}{4}$</li>
 <li>$y =
 rac{17}{8}$</li>
 <li>$z =
 rac{7}{3}$</li>
 <li>$x = 1$ or $x = -6$</li>
 <li>$a =
 rac{1}{3}$ or $a = 2$</li>
 <li>$x = -
 rac{2}{3}$ or $x = 3$</li>
 <li>$b =
 rac{23}{7}$</li>
 </ol>
 <h1>Step by Step</h1>
 <ol>
 <li>Distribute: $12x + 8 - 5x + 5 = 3x + 14$</li>
 <li>Combine like terms: $7x + 13 = 3x + 14$</li>
 <li>Subtract $3x$: $4x + 13 = 14$</li>
 <li>Subtract $13$: $4x = 1$</li>
 <li>Divide by $4$: $x =
 rac{1}{4}$</li>
 </ol>
--- a/tests/out/test_markdown_style.html
+++ b/tests/out/test_markdown_style.html
@ -0,0 +1,44 @@
 <p>Below is an in-depth explanation of the AArch64 (ARM64)
 unconditional branch instruction—often simply called the
 “B” instruction—and how its 26‐bit immediate field (imm26)
 is laid out and later relocated during linking.</p>
 <hr>
 <h2>Instruction Layout</h2>
 <p>The unconditional branch in AArch64 is encoded in a 32‑bit
 instruction. Its layout is as follows:</p>
 <pre><code>Bits:  31         26 25                           0
        +-------------+------------------------------+
        |  Opcode     |          imm26               |
        +-------------+------------------------------+
 </code></pre>
 <ul>
 <li><strong>Opcode (bits 31:26):</strong></li>
 <li>For a plain branch (<code>B</code>), the opcode is <code>000101</code>.</li>
 <li>
 <p>For a branch with link (<code>BL</code>), which saves the return
 address (i.e., a call), the opcode is <code>100101</code>.
 These 6 bits determine the instruction type.</p>
 </li>
 <li>
 <p><strong>Immediate Field (imm26, bits 25:0):</strong></p>
 </li>
 <li>This 26‑bit field holds a signed immediate value.</li>
 <li>
 <p><strong>Offset Calculation:</strong> At runtime, the processor:</p>
 <ol>
 <li><strong>Shifts</strong> the 26‑bit immediate left by 2 bits.
 (Because instructions are 4-byte aligned,
 the two least-significant bits are always zero.)</li>
 <li><strong>Sign-extends</strong> the resulting 28‑bit value to
 the full register width (typically 64 bits).</li>
 <li><strong>Adds</strong> this value to the program counter
 (PC) to obtain the branch target.</li>
 </ol>
 </li>
 <li>
 <p><strong>Reach:</strong></p>
 </li>
 <li>With a 26‑bit signed field that’s effectively 28 bits
  after the shift, the branch can cover a range
  of approximately ±128&nbsp;MB from the current instruction.</li>
 </ul>
--- a/tests/out/test_markdown_table.html
+++ b/tests/out/test_markdown_table.html
@ -0,0 +1,77 @@
 <h2>Klemmen</h2>
 <table>
 <thead>
 <tr>
 <th style="text-align: right;">Anz.</th>
 <th>Typ</th>
 <th>Beschreibung</th>
 </tr>
 </thead>
 <tbody>
 <tr>
 <td style="text-align: right;">12</td>
 <td>BK9050</td>
 <td>Buskoppler</td>
 </tr>
 <tr>
 <td style="text-align: right;">2</td>
 <td>KL1104</td>
 <td>4 Digitaleingänge</td>
 </tr>
 <tr>
 <td style="text-align: right;">2</td>
 <td>KL2404</td>
 <td>4 Digitalausgänge (0,5 A)</td>
 </tr>
 <tr>
 <td style="text-align: right;">3</td>
 <td>KL2424</td>
 <td>4 Digitalausgänge (2 A)</td>
 </tr>
 <tr>
 <td style="text-align: right;">2</td>
 <td>KL4004</td>
 <td>4 Analogausgänge</td>
 </tr>
 <tr>
 <td style="text-align: right;">1</td>
 <td>KL4002</td>
 <td>2 Analogausgänge</td>
 </tr>
 <tr>
 <td style="text-align: right;">22</td>
 <td>KL9188</td>
 <td>Potenzialverteilungsklemme</td>
 </tr>
 <tr>
 <td style="text-align: right;">1</td>
 <td>KL9100</td>
 <td>Potenzialeinspeiseklemme</td>
 </tr>
 <tr>
 <td style="text-align: right;">3</td>
 <td>KL3054</td>
 <td>4 Analogeingänge</td>
 </tr>
 <tr>
 <td style="text-align: right;">5</td>
 <td>KL3214</td>
 <td>PT100 4 Temperatureingänge (3-Leiter)</td>
 </tr>
 <tr>
 <td style="text-align: right;">3</td>
 <td>KL3202</td>
 <td>PT100 2 Temperatureingänge (3-Leiter)</td>
 </tr>
 <tr>
 <td style="text-align: right;">1</td>
 <td>KL2404</td>
 <td>4 Digitalausgänge</td>
 </tr>
 <tr>
 <td style="text-align: right;">2</td>
 <td>KL9010</td>
 <td>Endklemme</td>
 </tr>
 </tbody>
 </table>
--- a/tests/test_latex_tools.py
+++ b/tests/test_latex_tools.py
@ -0,0 +1,151 @@
 import pyladoc.latex
 def normalize_latex_code(latex_code: str) -> str:
    return '\n'.join(line.strip() for line in latex_code.splitlines() if line)
 def check_only_ascii(latex_code: str) -> bool:
    return all(ord(c) < 128 for c in latex_code)
 def test_latex_from_html():
    html_code = """
    <h1>Test</h1>
    <p>This is are Umlautes: Ä,Ö and Ü</p>
    <p>This is a <b>test</b>.</p>
    <p>And this is another <em>test</em>.</p>
    <p>And this is a <strong>third</strong> test.</p>
    <p>And this is a <i>fourth</i> test.</p>
    <p>This is a LaTeX command: \\textbf{test}</p>
    <p>This are typical control characters: {, }, <, >, ", ', &, |, /, \\</p>
    <ul>
        <li>Item 1</li>
        <li>Item 2</li>
    </ul>
    <table>
        <tr>
            <th>Header 1</th>
            <th>Header 2</th>
        </tr>
        <tr>
            <td>Cell 1</td>
            <td>Cell 2</td>
        </tr>
    </table>
    """
    latex_code = pyladoc.latex.from_html(html_code)
    ref_latex_code = r"""
        \section{Test}
        This is are Umlautes: {\"A},{\"O} and {\"U}
        This is a \textbf{test}.
        And this is another \emph{test}.
        And this is a \textbf{third} test.
        And this is a \emph{fourth} test.
        This is a LaTeX command: \textbackslash{}textbf\{test\}
        This are typical control characters: \{, \}, {\textless}, {\textgreater}, ", ', \&, |, /, \textbackslash{}
        \begin{itemize}
        \item Item 1
        \item Item 2
        \end{itemize}
        \begin{tabular}{ll}\toprule
        Header 1 & Header 2 \\
        \midrule
        Cell 1 & Cell 2 \\
        \bottomrule
        \end{tabular}"""
    print(latex_code)
    print('--')
    # print(pyladoc.latex.escape_text(html_code))
    assert check_only_ascii(latex_code), 'Some characters are not ASCII'
    assert normalize_latex_code(ref_latex_code) == normalize_latex_code(latex_code)
 def test_latex_from_markdown():
    markdown_code = """
        ## Test1
        | Anz.| Typ      | Beschreibung
        |----:|----------|------------------------------------
        | 12  | BK9050   | Buskoppler
        |  2  | KL1104   | 4 Digitaleingänge
        |  2  | KL2404   | 4 Digitalausgänge (0,5 A)
        |  3  | KL2424   | 4 Digitalausgänge (2 A)
        |  2  | KL4004   | 4 Analogausgänge
        |  1  | KL4002   | 2 Analogausgänge
        | 22  | KL9188   | Potenzialverteilungsklemme
        |  1  | KL9100   | Potenzialeinspeiseklemme
        |  3  | KL3054   | 4 Analogeingänge
        |  5  | KL3214   | PT100 4 Temperatureingänge (3-Leiter)
        |  3  | KL3202   | PT100 2 Temperatureingänge (3-Leiter)
        |  1  | KL2404   | 4 Digitalausgänge
        |  2  | KL9010   | Endklemme
        This is a **test**.
        ## Test2
        | Anz.| Beschreibung
        |----:|------------------------------------
        | 12  | Buskoppler
        |  2  | 4 Digitaleingänge
        |  2  | 4 Digitalausgänge (0,5 A)
        |  3  | 4 Digitalausgänge (2 A)
        |  2  | 4 Analogausgänge
        |  1  | 2 Analogausgänge
    """
    pyla = pyladoc.DocumentWriter()
    pyla.add_markdown(markdown_code)
    latex_code = pyladoc.latex.from_html(pyla.to_html())
    ref_latex_code = r"""
        \subsection{Test1}
        \begin{tabular}{rll}\toprule
        Anz. & Typ & Beschreibung \\
        \midrule
        12 & BK9050 & Buskoppler \\
        2 & KL1104 & 4 Digitaleing{\"a}nge \\
        2 & KL2404 & 4 Digitalausg{\"a}nge (0,5 A) \\
        3 & KL2424 & 4 Digitalausg{\"a}nge (2 A) \\
        2 & KL4004 & 4 Analogausg{\"a}nge \\
        1 & KL4002 & 2 Analogausg{\"a}nge \\
        22 & KL9188 & Potenzialverteilungsklemme \\
        1 & KL9100 & Potenzialeinspeiseklemme \\
        3 & KL3054 & 4 Analogeing{\"a}nge \\
        5 & KL3214 & PT100 4 Temperatureing{\"a}nge (3-Leiter) \\
        3 & KL3202 & PT100 2 Temperatureing{\"a}nge (3-Leiter) \\
        1 & KL2404 & 4 Digitalausg{\"a}nge \\
        2 & KL9010 & Endklemme \\
        \bottomrule
        \end{tabular}
        This is a \textbf{test}.
        \subsection{Test2}
        \begin{tabular}{rl}\toprule
        Anz. & Beschreibung \\
        \midrule
        12 & Buskoppler \\
        2 & 4 Digitaleing{\"a}nge \\
        2 & 4 Digitalausg{\"a}nge (0,5 A) \\
        3 & 4 Digitalausg{\"a}nge (2 A) \\
        2 & 4 Analogausg{\"a}nge \\
        1 & 2 Analogausg{\"a}nge \\
        \bottomrule
        \end{tabular}"""
    print(latex_code)
    assert check_only_ascii(latex_code), 'Some characters are not ASCII'
    assert normalize_latex_code(ref_latex_code) == normalize_latex_code(latex_code)
 if __name__ == '__main__':
    test_latex_from_html()
    test_latex_from_markdown()
--- a/tests/test_rendering_example_doc.py
+++ b/tests/test_rendering_example_doc.py
@ -0,0 +1,108 @@
 import pyladoc
 import matplotlib.pyplot as plt
 import pandas as pd
 import document_validation
 VALIDATE_HTML_CODE_ONLINE = False
 WRITE_RESULT_FILES = True
 def make_document():
    dw = pyladoc.DocumentWriter()
    dw.add_markdown("""
    # Special characters
    ö ä ü Ö Ä Ü ß @ ∆
    π ≈ ± ∆ Σ
    £ ¥ $ €
    Œ
    # Link
    This is a hyperlink: [nonan.net](https://www.nonan.net)
    # Table
    | Anz.| Typ      | Beschreibung
    |----:|----------|------------------------------------
    | 12  | BK9050   | Buskoppler
    |  2  | KL1104   | 4 Digitaleingänge
    |  2  | KL2404   | 4 Digitalausgänge (0,5 A)
    |  3  | KL2424   | 4 Digitalausgänge (2 A)
    |  2  | KL4004   | 4 Analogausgänge
    |  1  | KL4002   | 2 Analogausgänge
    | 22  | KL9188   | Potenzialverteilungsklemme
    |  1  | KL9100   | Potenzialeinspeiseklemme
    |  3  | KL3054   | 4 Analogeingänge
    |  5  | KL3214   | PT100 4 Temperatureingänge (3-Leiter)
    |  3  | KL3202   | PT100 2 Temperatureingänge (3-Leiter)
    |  1  | KL2404   | 4 Digitalausgänge
    |  2  | KL9010   | Endklemme
    ---
    # Equations
    This line represents a reference to the equation @eq:test1.
    """)
    dw.add_equation(r'y = a + b * \sum_{i=0}^{\infty} a_i x^i', 'test1')
    # Figure
    fig, ax = plt.subplots()
    fruits = ['apple', 'blueberry', 'cherry', 'orange']
    counts = [40, 100, 30, 55]
    bar_labels = ['red', 'blue', '_red', 'orange']
    bar_colors = ['tab:red', 'tab:blue', 'tab:red', 'tab:orange']
    ax.bar(fruits, counts, label=bar_labels, color=bar_colors)
    ax.set_ylabel('fruit supply')
    ax.set_title('Fruit supply by kind and color')
    ax.legend(title='Fruit color')
    dw.add_diagram(fig, 'Bar chart with individual bar colors')
    # Table
    mydataset = {
        'Row1': ["Line1", "Line2", "Line3", "Line4", "Line5"],
        'Row2': [120, '95 km/h', 110, '105 km/h', 130],
        'Row3': ['12 g/km', '> 150 g/km', '110 g/km', '1140 g/km', '13.05 g/km'],
        'Row4': ['5 stars', '4 stars', '5 stars', '4.5 stars', '5 stars'],
        'Row5': [3.5, 7.8, 8.5, 6.9, 4.2],
        'Row6': ['1850 kg', '1500 kg', '1400 kg', '1600 kg', '1700 kg'],
        'Row7': ['600 Nm', '250 Nm', '280 Nm', '320 Nm', '450 Nm']
    }
    df = pd.DataFrame(mydataset)
    dw.add_table(df.style.hide(axis="index"), 'This is a example table', 'example1')
    return dw
 def test_html_render():
    doc = make_document()
    html_code = doc.to_html()
    document_validation.validate_html(html_code, VALIDATE_HTML_CODE_ONLINE)
    if WRITE_RESULT_FILES:
        with open('tests/out/test_html_render.html', 'w', encoding='utf-8') as f:
            f.write(pyladoc.inject_to_template(html_code, internal_template='templates/test_template.html'))
 def test_latex_render():
    doc = make_document()
    # print(doc.to_latex())
    assert doc.to_pdf('tests/out/test_latex_render.pdf', font_family='serif')
 if __name__ == '__main__':
    test_html_render()
    test_latex_render()
--- a/tests/test_rendering_markdown.py
+++ b/tests/test_rendering_markdown.py
@ -0,0 +1,152 @@
 import pyladoc
 import document_validation
 VALIDATE_HTML_CODE_ONLINE = False
 WRITE_RESULT_FILES = True
 def test_markdown_styling():
    pyla = pyladoc.DocumentWriter()
    pyla.add_markdown(
        """
        Below is an in-depth explanation of the AArch64 (ARM64)
        unconditional branch instruction—often simply called the
        “B” instruction—and how its 26‐bit immediate field (imm26)
        is laid out and later relocated during linking.
        ---
        ## Instruction Layout
        The unconditional branch in AArch64 is encoded in a 32‑bit
        instruction. Its layout is as follows:
        ```
        Bits:  31         26 25                           0
                +-------------+------------------------------+
                |  Opcode     |          imm26               |
                +-------------+------------------------------+
        ```
        - **Opcode (bits 31:26):**
        - For a plain branch (`B`), the opcode is `000101`.
        - For a branch with link (`BL`), which saves the return
        address (i.e., a call), the opcode is `100101`.
        These 6 bits determine the instruction type.
        - **Immediate Field (imm26, bits 25:0):**
        - This 26‑bit field holds a signed immediate value.
        - **Offset Calculation:** At runtime, the processor:
            1. **Shifts** the 26‑bit immediate left by 2 bits.
            (Because instructions are 4-byte aligned,
            the two least-significant bits are always zero.)
            2. **Sign-extends** the resulting 28‑bit value to
            the full register width (typically 64 bits).
            3. **Adds** this value to the program counter
            (PC) to obtain the branch target.
        - **Reach:**
        - With a 26‑bit signed field that’s effectively 28 bits
          after the shift, the branch can cover a range
          of approximately ±128 MB from the current instruction.
        """)
    html_code = pyla.to_html()
    document_validation.validate_html(html_code, check_for=['strong', 'ol', 'li', 'code', 'hr'])
    if WRITE_RESULT_FILES:
        with open('tests/out/test_markdown_style.html', 'w', encoding='utf-8') as f:
            f.write(html_code)
 def test_markdown_table():
    pyla = pyladoc.DocumentWriter()
    pyla.add_markdown(
        """
        ## Klemmen
        | Anz.| Typ      | Beschreibung
        |----:|----------|------------------------------------
        | 12  | BK9050   | Buskoppler
        |  2  | KL1104   | 4 Digitaleingänge
        |  2  | KL2404   | 4 Digitalausgänge (0,5 A)
        |  3  | KL2424   | 4 Digitalausgänge (2 A)
        |  2  | KL4004   | 4 Analogausgänge
        |  1  | KL4002   | 2 Analogausgänge
        | 22  | KL9188   | Potenzialverteilungsklemme
        |  1  | KL9100   | Potenzialeinspeiseklemme
        |  3  | KL3054   | 4 Analogeingänge
        |  5  | KL3214   | PT100 4 Temperatureingänge (3-Leiter)
        |  3  | KL3202   | PT100 2 Temperatureingänge (3-Leiter)
        |  1  | KL2404   | 4 Digitalausgänge
        |  2  | KL9010   | Endklemme
        """)
    html_code = pyla.to_html()
    document_validation.validate_html(html_code, check_for=['table'])
    if WRITE_RESULT_FILES:
        with open('tests/out/test_markdown_table.html', 'w', encoding='utf-8') as f:
            f.write(html_code)
 def test_markdown_equations():
    pyla = pyladoc.DocumentWriter()
    pyla.add_markdown(
        """
        # Source Equations
        1. $4(3x + 2) - 5(x - 1) = 3x + 14$
        2. $\frac{2y + 5}{4} + \frac{3y - 1}{2} = 5$
        3. $\frac{5}{x + 2} + \frac{2}{x - 2} = 3$
        4. $8(3b - 5) + 4(b + 2) = 60$
        5. $2c^2 - 3c - 5 = 0$
        6. $4(2d - 1) + 5(3d + 2) = 7d + 28$
        7. $q^2 + 6q + 9 = 16$
        # Result Equations
        1. $x = \frac{1}{4}$
        2. $y = \frac{17}{8}$
        3. $z = \frac{7}{3}$
        4. $x = 1$ or $x = -6$
        5. $a = \frac{1}{3}$ or $a = 2$
        6. $x = -\frac{2}{3}$ or $x = 3$
        7. $b = \frac{23}{7}$
        # Step by Step
        1. Distribute: $12x + 8 - 5x + 5 = 3x + 14$
        2. Combine like terms: $7x + 13 = 3x + 14$
        3. Subtract $3x$: $4x + 13 = 14$
        4. Subtract $13$: $4x = 1$
        5. Divide by $4$: $x = \frac{1}{4}$
        """)
    html_code = pyla.to_html()
    document_validation.validate_html(html_code, check_for=['h1'])
    if WRITE_RESULT_FILES:
        with open('tests/out/test_markdown_equations.html', 'w', encoding='utf-8') as f:
            f.write(html_code)
 def test_markdown_characters():
    pyla = pyladoc.DocumentWriter()
    pyla.add_markdown(
        """
        # Special caracters
        Umlaute: ÖÄÜ öäü
        Other: ß, €, @, $, %, ~, µ
        Units: m³, cm²
        Controll characters: <, >, ", ', &, |, /, \\
        """)
    html_code = pyla.to_html()
    document_validation.validate_html(html_code, check_for=['h1'])
    if WRITE_RESULT_FILES:
        with open('tests/out/test_markdown_characters.html', 'w', encoding='utf-8') as f:
            f.write(html_code)