first commit

2025-03-28 13:30:08 +01:00 · 2025-03-28 13:30:08 +01:00 · 67c48776ac
commit 67c48776ac
19 changed files with 3646 additions and 0 deletions
--- a/.flake8
+++ b/.flake8
@ -0,0 +1,21 @@
+[flake8]
+# Specify the maximum allowed line length
+max-line-length = 88
+
+# Ignore specific rules
+# For example, E501: Line too long, W503: Line break before binary operator
+ignore = E501, W503, W504
+
+# Exclude specific files or directories
+exclude =
+    .git,
+    __pycache__,
+    build,
+    dist,
+    .conda
+    .venv
+    venv
+
+# Enable specific plugins or options
+# Example: Enabling flake8-docstrings
+select = C,E,F,W,D
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,134 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+.venv/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+pyModbusTCP_old/
+test.py
+test_*.ipynb
+settings.json
--- a/README.md
+++ b/README.md
@ -0,0 +1,111 @@
+# Pyladoc
+
+## Description
+Pyladoc is a python package for programmatically generating HTML and
+PDF/LaTex output. This package targets specifically applications where reports
+or results with Pandas-tables and Matplotlib-figures are generated programmatically
+to be displayed as website and as PDF document without any manual formatting
+steps.
+
+This package focuses on the "Document in Code" approach for cases
+where a lot of calculations and data handling is done but not a lot of
+document text needs to be displayed.
+
+As backend for PDF generation LaTex is used. There are excellent engines for
+rendering HTML to PDF available, but even if there is no requirement for an
+accurate typesetting, placing programmatically content of variable
+composition and sizes on fixed size pages without manual intervention
+is a hard problem that LaTeX is very capable of.
+
+### Sported primitives
+- Text (can be Markdown or HTML formatted)
+- Headings
+- Tables (Pandas, Markdown or HTML)
+- Matplotlib figures
+- LaTex equations
+- Named references for figures, tables and equation
+
+### Key Features
+- HTML and PDF/LaTex rendering of the same document
+- Single file output including figures
+- Figure and equation embedding in HTML by inline SVG, SVG in Base64 or PNG in Base64
+- Figure embedding in LaTex as PGF/TikZ
+
+### Usage Scenarios
+- Webservices
+- Report generation for lab equipment
+
+## Installation
+It can be installed with pip:
+
+```bash
+pip install pyladoc
+```
+
+## Usage
+It is easy to use as the following example code shows:
+
+```python
+import pyladoc
+
+doc = pyladoc.DocumentWriter()
+
+doc.add_markdown("""
+    # Example
+    This is an example. The @table:pandas_example shows some random data.
+    """)
+
+some_data = {
+    'Row1': ["Line1", "Line2", "Line3"],
+    'Row2': [120, 100, 110],
+    'Row3': ['12 g/km', '> 150 g/km', '110 g/km']
+}
+df = pd.DataFrame(some_data)
+dw.add_table(df, 'This is a pandas example table', 'pandas_example')
+
+html_code = doc.to_html()
+
+doc.to_pdf('test.pdf')
+```
+
+## Example outputs
+The following documents are generated by tests/test_rendering_example_doc.py:
+
+- HTML: [test_html_render.html](tests/out/test_html_render.html)
+- PDF: [test_latex_render.pdf](tests/out/test_latex_render.pdf)
+
+## Contributing
+Contributions are welcome, please open an issue or submit a pull request on GitHub.
+
+## Developer Guide
+To get started with developing the `pyladoc` package, follow these steps.
+
+First, clone the repository to your local machine using Git:
+
+```bash
+git clone https://github.com/Nonannet/pyladoc.git
+cd pyladoc
+```
+
+It's recommended to setup an venv:
+
+```bash
+python -m venv venv
+source venv/bin/activate  # On Windows use `venv\Scripts\activate`
+```
+
+Install the package and dev-dependencies while keeping files in the
+current directory:
+
+```bash
+pip install -e .[dev]
+```
+
+Ensure that everything is set up correctly by running the tests:
+
+```bash
+pytest
+```
+   
+## License
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,58 @@
+[project]
+name = "pyladoc"
+version = "1.0.0"
+authors = [
+  { name="Nicolas Kruse", email="nicolas.kruse@nonan.net" },
+]
+description = "Package for generating HTML and PDF/latex from python code"
+readme = "README.md"
+requires-python = ">=3.8"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+]
+dependencies = [
+    "markdown>=3.3.0",
+    "beautifulsoup4>=4.9.1"
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest", "flake8", "mypy",
+    "lxml", "types-lxml",
+    "requests",
+    "matplotlib>=3.1.1",
+    "pandas>=2.0.0", "Jinja2",
+]
+
+[project.urls]
+Homepage = "https://github.com/Nonannet/pyladoc"
+Repository = "https://github.com/Nonannet/pyladoc"
+Issues = "https://github.com/Nonannet/pyladoc/issues"
+
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.packages.find]
+where = ["src"]
+
+[tool.setuptools.package-data]
+pyladoc = ["templates/*"]
+
+
+[tool.mypy]
+files = ["src"]
+strict = true
+warn_return_any = true
+warn_unused_configs = true
+check_untyped_defs = true
+no_implicit_optional = true
+show_error_codes = true
+
+[tool.pytest.ini_options]
+minversion = "6.0"
+addopts = "-ra -q"
+testpaths = ["tests"]
+pythonpath = ["src"]
--- a/src/pyladoc/init.py
+++ b/src/pyladoc/init.py
@ -0,0 +1,570 @@
+from typing import Callable, Generator, Self, Literal, TYPE_CHECKING
+import html
+import markdown
+from base64 import b64encode
+import re
+import io
+from . import latex
+import pkgutil
+
+
+HTML_OUTPUT = 0
+LATEX_OUTPUT = 1
+
+if TYPE_CHECKING:
+    from pandas import DataFrame
+    from pandas.io.formats.style import Styler
+    import matplotlib.pyplot as plt
+    from matplotlib.figure import Figure
+    from matplotlib.legend import Legend as Mpl_Legend
+    from matplotlib.text import Text as Mpl_Text
+
+    Table = DataFrame | Styler
+else:
+    try:
+        from pandas import DataFrame
+    except ImportError:
+        DataFrame = None
+
+    try:
+        from pandas.io.formats.style import Styler
+        Table = DataFrame | Styler
+    except ImportError:
+        Table = DataFrame
+
+    try:
+        import matplotlib.pyplot as plt
+        from matplotlib.figure import Figure
+        from matplotlib.legend import Legend as Mpl_Legend
+        from matplotlib.text import Text as Mpl_Text
+    except ImportError:
+        Figure = None
+
+
+TRenderer = Literal['pandas', 'simple']
+FFormat = Literal['svg', 'png', 'pgf']
+
+
+def _get_pkgutil_string(path: str) -> str:
+    data = pkgutil.get_data(__name__, path)
+    assert data is not None
+    return data.decode()
+
+
+def _markdown_to_html(text: str) -> str:
+    prep_text = re.sub(r'\u00A0', '&nbsp;', text)  # non-breaking space
+    html = markdown.markdown(prep_text, extensions=['tables', 'fenced_code', 'def_list', 'abbr', 'sane_lists'])
+    return html.replace('<hr />', '<hr>')
+
+
+def escape_html(text: str) -> str:
+    """
+    Escapes special HTML characters in a given string.
+
+    Args:
+        text: The text to escape
+
+    Returns:
+        Escaped text save for inserting into HTML code
+    """
+    ret = re.sub(r'\u00A0', '&nbsp;', text)  # non-breaking space
+    ret = html.escape(ret)
+    return ' '.join(ret.strip().splitlines())
+
+
+def _clean_svg(svg_text: str) -> str:
+    # remove all tags not alllowd for inline svg from metadata:
+    svg_text = re.sub(r'<metadata>.*?</metadata>', '', svg_text, flags=re.DOTALL)
+
+    # remove illegal path-tags without d attribute:
+    return re.sub(r'<path(?![^>]*\sd=)\s.*?/>', '', svg_text, flags=re.DOTALL)
+
+# def _get_templ_vars(template: str) -> list[str]:
+#    return re.findall("<!---START (.+?)--->.*?<!---END .+?--->", template, re.DOTALL)
+
+
+def _drop_indent(text: str, amount: int) -> str:
+    """
+    Drops a specific number of indentation spaces from a multiline text.
+
+    Args:
+        text: The text to drop indentation from
+        amount: The number of indentation space characters to drop
+
+    Returns:
+        The text with the specified amount of indentation removed
+    """
+    return ''.join(' ' * amount + line for line in text.splitlines(True))
+
+
+def _save_figure(fig: Figure, buff: io.BytesIO, figure_format: FFormat, font_family: str | None, scale: float) -> None:
+    """
+    Saves a matplotlib figure to a file-like object.
+
+    Args:
+        fig: The figure to save
+        buff: The file-like object to save the figure to
+        figure_format: The format to save the figure in (svg, png or pgf)
+        font_family: The font family to use for the figure
+    """
+    def get_all_elements() -> Generator[Mpl_Text, None, None]:
+        for ax in fig.get_axes():
+            yield ax.title
+            yield ax.xaxis.label
+            yield ax.yaxis.label
+            yield from ax.get_xticklabels() + ax.get_yticklabels()
+            legend: Mpl_Legend = ax.get_legend()
+            if legend:
+                yield from legend.get_texts()
+
+    # Store current figure settings
+    old_state = ((e, e.get_fontfamily()) for e in get_all_elements())
+    old_size: tuple[float, float] = tuple(fig.get_size_inches())  # type: ignore[unused-ignore]
+
+    # Adjust figure settings
+    if font_family:
+        for e, _ in old_state:
+            e.set_fontfamily(font_family)
+
+    fig.set_size_inches(old_size[0] * scale, old_size[1] * scale, False)
+
+    # Render figure
+    backends = {'png': 'AGG', 'svg': 'SVG', 'pgf': 'PGF'}
+    assert figure_format in backends, 'Figure format can be pgf (vector), svg (vector) or png (raster)'
+    fig.savefig(buff, format=figure_format, backend=backends[figure_format])  # type: ignore[unused-ignore]
+
+    # Reset figure setting
+    for e, s in old_state:
+        e.set_fontfamily(s)
+
+    fig.set_size_inches(old_size, None, False)
+
+
+def figure_to_string(fig: Figure,
+                     figure_format: FFormat = 'svg',
+                     font_family: str | None = None,
+                     scale: float = 1,
+                     alt_text: str = '',
+                     base64: bool = False) -> str:
+    """
+    Converts a matplotlib figure to a ascii-string. For png base64 encoding is
+    used in general, for svg base64 encoding can be enabled. For base64 encoded
+    figures a img-tag is included in the output.
+
+    Args:
+        fig: The figure to convert
+        figure_format: The format to save the figure in (svg, png or pgf)
+        font_family: The font family to use for the figure
+        scale: Scaling factor for the figure size
+        alt_text: The alt text for the figure
+        base64: If the format is svg this determine if the image is encode in base64
+
+    Returns:
+        The figure as ascii-string
+    """
+    assert fig and isinstance(fig, Figure), 'fig parameter must be a matplotlib figure'
+    with io.BytesIO() as buff:
+        _save_figure(fig, buff, figure_format, font_family, scale)
+        buff.seek(0)
+        if figure_format == 'pgf':
+            i = buff.read(2028).find(b'\\begingroup%')  # skip comments
+            buff.seek(max(i, 0))
+            return latex.to_ascii(buff.read().decode('utf-8'))
+
+        elif figure_format == 'svg' and not base64:
+            i = buff.read(2028).find(b'<svg')  # skip xml and DOCTYPE header
+            buff.seek(max(i, 0))
+            return _clean_svg(buff.read().decode('utf-8'))
+
+        else:
+            image_mime = {"png": "image/png", "svg": "image/svg+xml"}
+            assert figure_format in image_mime, 'Unknown image format'
+            return '<img alt="%s" src="data:%s;charset=utf-8;base64,%s">' % \
+                (escape_html(alt_text),
+                 image_mime[figure_format],
+                 b64encode(buff.read()).decode('ascii'))  # base64 assures (7-bit) ascii
+
+
+def latex_to_figure(latex_code: str) -> Figure:
+    assert Figure, 'Matplotlib is required for rendering LaTex expressions for HTML output.'  # type:ignore[truthy-function]
+    fig, ax = plt.subplots()
+    ax.set_xticks([])
+    ax.set_yticks([])
+    ax.axis('off')
+    text = plt.text(0.5, 0.5, f'${latex_code}$', horizontalalignment='center',
+                    verticalalignment='center', transform=ax.transAxes)
+    fig.draw_without_rendering()
+    bbox = text.get_window_extent()
+    fig.set_size_inches(bbox.width / fig.dpi * 1.2, bbox.height / fig.dpi * 1.2)
+    return fig
+
+
+def _fillin_fields(template: str, fields: dict[str, str]) -> str:
+    html_out = template
+    for variable_name, value in fields.items():
+        # Find indentation depths:
+        ret = re.search(f"^(.*?)<!---START {variable_name}--->.*?<!---END {variable_name}--->", html_out, flags=re.MULTILINE)
+        if ret:
+            indent_depths = len(ret.group(1))
+            html_out = html_out[:ret.start(0)] + _drop_indent(value, indent_depths) + html_out[ret.end(0):]
+    return html_out
+
+
+def _fillin_reference_names(input_string: str, item_index: dict[str, int]) -> str:
+    replacements = [(*m.span(), m.group()) for m in re.finditer(r'(?<=@)\w+:[\w\_\-]+', input_string)]
+    ret: list[str] = []
+    current_pos = 0
+    for start, end, ref in replacements:
+        assert ref in item_index, f"Reference {ref} does not exist in the document"
+        ret.append(input_string[current_pos:start - 1])
+        ret.append(str(item_index[ref]))
+        current_pos = end
+    return ''.join(ret) + input_string[current_pos:]
+
+
+def _check_latex_references(input_string: str, item_index: dict[str, int]) -> str:
+    replacements = [m.group() for m in re.finditer(r'(?<=\\ref\{)\w+:[\w\_\\\-]+(?=\})', input_string)]
+    escaped_items = set(latex.normalize_label_text(item) for item in item_index)
+    for ref in replacements:
+        assert ref in escaped_items, f"Reference {ref} does not exist in the document"
+    return input_string
+
+
+def _normalize_text_indent(text: str) -> str:
+    text_lines = text.splitlines()
+    if len(text_lines) > 1 and not text_lines[0].strip():
+        text_lines = text_lines[1:]
+
+    if not text_lines:
+        return ''
+
+    if len(text_lines) > 1 and text_lines[0] and text_lines[0][0] != ' ':
+        indent_amount = len(text_lines[1]) - len(text_lines[1].lstrip())
+    else:
+        indent_amount = len(text_lines[0]) - len(text_lines[0].lstrip())
+
+    return '\n'.join(
+        [' ' * max(0, len(line) - len(line.strip()) - indent_amount) + line.strip()
+         for line in text_lines])
+
+
+def _create_document_writer() -> 'DocumentWriter':
+    new_dwr = DocumentWriter()
+    return new_dwr
+
+
+def inject_to_template(content: str, template_path: str = '', internal_template: str = '') -> str:
+    """
+    injects a content string into a template. The placeholder <!--CONTENT-->
+    will be replaced by the content. If the placeholder is prefixed with a
+    '%' comment character, this character will be replaced as well.
+
+    Args:
+        template_path: Path to a template file
+        internal_template: Path to a internal default template
+
+    Returns:
+        Template with included content
+    """
+    if template_path:
+        with open(template_path, 'r') as f:
+            template = f.read()
+    elif internal_template:
+        template = _get_pkgutil_string(internal_template)
+    else:
+        raise Exception('No template provided')
+
+    assert '<!--CONTENT-->' in template, 'No <!--CONTENT--> expression in template located'
+    prep_template = re.sub(r"\%?\s*<!--CONTENT-->", '<!--CONTENT-->', template)
+    return prep_template.replace('<!--CONTENT-->', content)
+
+
+class DocumentWriter():
+    """
+    A class to create a document for exporting to HTML or LaTeX.
+    """
+    def __init__(self) -> None:
+        self._doc: list[list[Callable[[], str]]] = []
+        self._fields: dict[str, DocumentWriter] = dict()
+        self._base64_svgs: bool = False
+        self._figure_format: FFormat = 'svg'
+        self._table_renderer: TRenderer = 'simple'
+        self._font_family: str | None = None
+        self._item_count: dict[str, int] = {}
+        self._item_index: dict[str, int] = {}
+        self._fig_scale: float = 1
+
+    def _add_item(self, ref_id: str, ref_type: str, caption_prefix: str) -> str:
+        current_index = self._item_count.get(ref_type, 0) + 1
+        if not ref_id:
+            ref_id = str(current_index)
+        self._item_index[f"{ref_type}:{ref_id}"] = current_index
+        self._item_count[ref_type] = current_index
+        return caption_prefix.format(current_index)
+
+    def new_field(self, name: str) -> 'DocumentWriter':
+        new_dwr = _create_document_writer()
+        self._fields[name] = new_dwr
+        return new_dwr
+
+    def add_document(self, doc: Self) -> None:
+        self._doc += doc._doc
+
+    def add_diagram(self, fig: Figure, caption: str = '', ref_id: str = '',
+                    prefix_pattern: str = 'Figure {}: ', ref_type: str = 'fig',
+                    centered: bool = True) -> None:
+        caption_prefix = self._add_item(ref_id, ref_type, prefix_pattern)
+
+        def render_to_html() -> str:
+            return '<div class="figure">%s%s</div>' % (
+                figure_to_string(fig, self._figure_format, base64=self._base64_svgs, scale=self._fig_scale),
+                '<br>' + caption_prefix + escape_html(caption) if caption else '')
+
+        def render_to_latex() -> str:
+            return '\\begin{figure}%s\n%s\n\\caption{%s}\n%s\\end{figure}' % (
+                '\n\\centering' if centered else '',
+                figure_to_string(fig, 'pgf', self._font_family, scale=self._fig_scale),
+                latex.escape_text(caption),
+                '\\label{%s}\n' % latex.normalize_label_text(ref_type + ':' + ref_id) if ref_id else '')
+
+        self._doc.append([render_to_html, render_to_latex])
+
+    def add_table(self, table: Table, caption: str = '', ref_id: str = '',
+                  prefix_pattern: str = 'Table {}: ', ref_type: str = 'table', centered: bool = True) -> None:
+        assert Table and isinstance(table, Table), 'Table has to be a pandas DataFrame oder DataFrame Styler'
+        caption_prefix = self._add_item(ref_id, ref_type, prefix_pattern)
+        styler = table if isinstance(table, Styler) else getattr(table, 'style', None)
+        assert isinstance(styler, Styler), 'Jinja2 package is required for rendering tables'
+
+        def render_to_html() -> str:
+            html_string = styler.to_html(table_uuid=ref_id, caption=caption_prefix + escape_html(caption))
+            return re.sub(r'<style.*?>.*?</style>', '', html_string, flags=re.DOTALL)
+
+        def render_to_latex() -> str:
+            if self._table_renderer == 'pandas':
+                return styler.to_latex(
+                    label=latex.normalize_label_text(ref_type + ':' + ref_id),
+                    hrules=True,
+                    convert_css=True,
+                    siunitx=True,
+                    caption=latex.escape_text(caption),
+                    position_float='centering' if centered else None)
+            else:
+                return latex.render_pandas_styler_table(styler, caption, ref_type + ':' + ref_id, centered)
+
+        self._doc.append([render_to_html, render_to_latex])
+
+    def add_text(self, text: str, section_class: str = '') -> None:
+        """
+        Adds a text paragraph to the document.
+
+        Args:
+            text: The text to add
+            section_class: The class for the paragraph
+        """
+        norm_text = _normalize_text_indent(text)
+
+        def render_to_html() -> str:
+            html = '<p>' + escape_html(norm_text) + '</p>'
+            if section_class:
+                return '<div class="' + section_class + '">' + html + '</div>'
+            else:
+                return html
+
+        def render_to_latex() -> str:
+            return latex.from_html(render_to_html())
+
+        self._doc.append([render_to_html, render_to_latex])
+
+    def add_html(self, text: str) -> None:
+        """
+        Adds HTML formatted text to the document. For the LaTeX
+        export only basic HTML for text formatting and tables
+        is supported.
+
+        Args:
+            text: The HTML to add to the document
+        """
+        def render_to_html() -> str:
+            return text
+
+        def render_to_latex() -> str:
+            return latex.from_html(text)
+
+        self._doc.append([render_to_html, render_to_latex])
+
+    def add_h1(self, text: str) -> None:
+        """
+        Adds a h1 heading to the document.
+
+        Args:
+            text: The text of the heading
+        """
+        def render_to_html() -> str:
+            return '<h1>' + escape_html(text) + '</h1>'
+
+        def render_to_latex() -> str:
+            return '\\section{' + latex.escape_text(text) + '}\n'
+
+        self._doc.append([render_to_html, render_to_latex])
+
+    def add_h2(self, text: str) -> None:
+        """
+        Adds a h2 heading to the document.
+
+        Args:
+            text: The text of the heading
+        """
+        def render_to_html() -> str:
+            return '<h2>' + escape_html(text) + '</h2>'
+
+        def render_to_latex() -> str:
+            return '\\subsection{' + latex.escape_text(text) + '}\n'
+
+        self._doc.append([render_to_html, render_to_latex])
+
+    def add_h3(self, text: str) -> None:
+        """
+        Adds a h3 heading to the document.
+
+        Args:
+            text: The text of the heading
+        """
+        def render_to_html() -> str:
+            return '<h3>' + escape_html(text) + '</h3>'
+
+        def render_to_latex() -> str:
+            return '\\subsubsection{' + latex.escape_text(text) + '}\n'
+
+        self._doc.append([render_to_html, render_to_latex])
+
+    def add_equation(self, latex_equation: str, ref_id: str = '', ref_type: str = 'eq') -> None:
+        """
+        Adds a LaTeX equation to the document.
+
+        Args:
+            latex_equation: LaTeX formatted equation
+            ref_id: If provided, the equation is displayed with
+                a number and can be referenced by the ref_id
+        """
+        caption = self._add_item(ref_id, ref_type, '({})')
+
+        def render_to_html() -> str:
+            fig = latex_to_figure(latex_equation)
+            return ('<div class="equation-container"><div class="equation">%s</div>'
+                   '<div class="equation-number">%s</div></div>') % (
+                    figure_to_string(fig, self._figure_format, base64=self._base64_svgs),
+                    caption)
+
+        def render_to_latex() -> str:
+            if ref_id:
+                return '\\begin{equation}\\label{%s:%s}%s\\end{equation}' % (
+                    ref_type, ref_id, latex_equation)
+            else:
+                return '\\[%s\\]' % latex_equation
+
+        self._doc.append([render_to_html, render_to_latex])
+
+    def add_markdown(self, text: str, section_class: str = '') -> None:
+        """
+        Adds a markdown formatted text to the document.
+
+        Args:
+            text: The markdown text to add
+            section_class: The class for the text section
+        """
+        norm_text = _normalize_text_indent(str(text))
+
+        def render_to_html() -> str:
+            html = _markdown_to_html(norm_text)
+            if section_class:
+                return '<div class="' + section_class + '">' + html + '</div>'
+            else:
+                return html
+
+        def render_to_latex() -> str:
+            return latex.from_html(render_to_html())
+
+        self._doc.append([render_to_html, render_to_latex])
+
+    def _render_doc(self, doc_type: int) -> str:
+        fields = {k: f.to_html() for k, f in self._fields.items()}
+        return _fillin_fields(''.join(el[doc_type]() for el in self._doc), fields)
+
+    def to_html(self, figure_format: FFormat = 'svg',
+                base64_svgs: bool = False, figure_scale: float = 1) -> str:
+        """
+        Export the document to HTML. Figures will bew embedded in the HTML code.
+        The format can be selected between png in base64, inline svg or svg in base64.
+
+        Args:
+            figure_format: The format for embedding the figures in the HTML code (svg or png)
+            base64_svgs: Whether to encode svg images in base64
+
+        Returns:
+            The HTML code
+        """
+        self._figure_format = figure_format
+        self._base64_svgs = base64_svgs
+        self._fig_scale = figure_scale
+
+        return _fillin_reference_names(self._render_doc(HTML_OUTPUT), self._item_index)
+
+    def to_latex(self, font_family: Literal[None, 'serif', 'sans-serif'] = None,
+                 table_renderer: TRenderer = 'simple', figure_scale: float = 1) -> str:
+        """
+        Export the document to LaTeX. Figures will be embedded as pgf graphics.
+
+        Args:
+            font_family: Overwrites the front family for figures
+            table_renderer: The renderer for tables (simple: renderer with column type
+                guessing for text and numbers; pandas: using the internal pandas LaTeX renderer)
+
+        Returns:
+            The LaTeX code
+        """
+        self._font_family = font_family
+        assert table_renderer in ['simple', 'pandas'], "table_renderer must be 'simple' or 'pandas'"
+        self._table_renderer = table_renderer
+        self._fig_scale = figure_scale
+
+        return _check_latex_references(self._render_doc(LATEX_OUTPUT), self._item_index)
+
+    def to_pdf(self, file_path: str,
+               font_family: Literal[None, 'serif', 'sans-serif'] = None,
+               table_renderer: TRenderer = 'simple',
+               latex_template_path: str = '') -> bool:
+        """
+        Export the document to a PDF file using LaTeX.
+
+        Args:
+            file_path: The path to save the PDF file to
+            font_family: Overwrites the front family for figures and the template
+            latex_template_path: Path to a LaTeX template file. The
+                expression <!--CONTENT--> will be replaced by the generated content.
+                If no path is provided a default template is used.
+
+        Returns:
+            True if the PDF was successfully created
+        """
+        latex_code = inject_to_template(self.to_latex(font_family, table_renderer),
+                                        latex_template_path,
+                                        'templates/default_template.tex')
+
+        if font_family == 'sans-serif':
+            latex_code = latex.inject_latex_command(latex_code, '\\renewcommand{\\familydefault}{\\sfdefault}')
+        success, errors, warnings = latex.compile(latex_code, file_path)
+
+        if not success:
+            print('Errors:')
+            print('\n'.join(errors))
+            print('Warnings:')
+            print('\n'.join(warnings))
+
+        return success
+
+    def _repr_html_(self) -> str:
+        return self.to_html()
+
+    def __repr__(self) -> str:
+        return self.to_html()
--- a/src/pyladoc/latex.py
+++ b/src/pyladoc/latex.py
@ -0,0 +1,346 @@
+import bs4
+from html.parser import HTMLParser
+from typing import Iterator, Generator, Any
+from pandas.io.formats.style import Styler
+import re
+import os
+import shutil
+import subprocess
+import tempfile
+from .latex_escaping import unicode_to_latex_dict, latex_escape_dict
+
+
+def basic_formatter(value: Any) -> str:
+    return escape_text(str(value))
+
+
+def to_ascii(text: str) -> str:
+    """
+    Replaces/escapes often used unicode characters in latex code or text
+    with its LaTex ascii equivalents.
+
+    Args:
+        text: The text to convert.
+
+    Returns:
+        The escaped text.
+    """
+    regex_filter = ('|'.join(unicode_to_latex_dict))
+
+    last_s = 0
+    ret: list[str] = []
+    for m in re.finditer(regex_filter, text):
+        s1, s2 = m.span()
+        ret.append(text[last_s:s1])
+        ret.append(unicode_to_latex_dict[m.group()])
+        last_s = s2
+    ret.append(text[last_s:])
+
+    return ''.join(ret)
+
+
+def normalize_label_text(text: str) -> str:
+    """
+    Replace any special non-allowed character in the lable text.
+
+    Args:
+        text: Input text
+
+    Returns:
+        Normalized text
+    """
+    return re.sub(r"[^a-zA-Z0-9.:]", '-', text)
+
+
+def escape_text(text: str) -> str:
+    """
+    Escapes special LaTeX characters and often used unicode characters in a given string.
+
+    Args:
+        text: The text to escape
+
+    Returns:
+        Escaped text
+    """
+
+    latex_translation = latex_escape_dict | unicode_to_latex_dict
+
+    regex_filter = '|'.join(latex_translation)
+
+    last_s = 0
+    ret: list[str] = []
+    for m in re.finditer(regex_filter, text):
+        s1, s2 = m.span()
+        ret.append(text[last_s:s1])
+        matches = [v for k, v in latex_translation.items() if re.match(k, m.group())]
+        if m.group(1):
+            ret.append(matches[0].replace(r'\g<1>', normalize_label_text(m.group(1))))
+        else:
+            ret.append(matches[0])
+        last_s = s2
+    ret.append(text[last_s:])
+
+    return ''.join(ret)
+
+
+def render_pandas_styler_table(df_style: Styler, caption: str = '', label: str = '', centering: bool = True) -> str:
+    """
+    Converts a pandas Styler object to LaTeX table.
+
+    Args:
+        df_style: The pandas Styler object to convert.
+        caption: The caption for the table.
+        label: Label for referencing the table.
+        centering: Whether to center the table.
+
+    Returns:
+        The LaTeX code.
+    """
+    def iter_table(table: dict[str, Any]) -> Generator[str, None, None]:
+        yield '\\begin{table}\n'
+        if centering:
+            yield '\\centering\n'
+
+        # Guess column type
+        numeric = re.compile(r'^[<>]?\s*(?:\d+,?)+(?:\.\d+)?(?:\s\D.*)?$')
+        formats = ['S' if all(
+            (numeric.match(line[ci]['display_value'].strip()) for line in table['body'])
+        ) else 'l' for ci in range(len(table['body'][0])) if table['body'][0][ci]['is_visible']]
+
+        if caption:
+            yield f"\\caption{{{escape_text(caption)}}}\n"
+        if label:
+            yield f"\\label{{{normalize_label_text(label)}}}\n"
+        yield f"\\begin{{tabular}}{{{''.join(formats)}}}\n\\toprule\n"
+
+        for head in table['head']:
+            yield (' & '.join(f"\\text{{{escape_text(c['display_value'].strip())}}}"
+                              for c in head if c['is_visible']))
+            yield ' \\\\\n'
+
+        yield '\\midrule\n'
+
+        for body in table['body']:
+            yield (' & '.join(escape_text(c['display_value'].strip())
+                              for c in body if c['is_visible']))
+            yield ' \\\\\n'
+
+        yield '\\bottomrule\n\\end{tabular}\n\\end{table}'
+
+    str_list = iter_table(df_style._translate(False, False, blank=''))  # type: ignore[attr-defined]
+
+    return ''.join(str_list)
+
+
+def from_html_old(html_code: str) -> str:
+    """
+    Converts HTML code to LaTeX code.
+
+    Args:
+        html_code: The HTML code to convert.
+
+    Returns:
+        The LaTeX code.
+    """
+    root = bs4.BeautifulSoup(html_code, 'html.parser')
+
+    html_to_latex = {
+        'strong': ('\\textbf{', '}'),
+        'b': ('\\textbf{', '}'),
+        'em': ('\\emph{', '}'),
+        'i': ('\\emph{', '}'),
+        'p': ('', '\n\n'),
+        'h1': ('\\section{', '}'),
+        'h2': ('\\subsection{', '}'),
+        'h3': ('\\subsubsection{', '}'),
+        'ul': ('\\begin{itemize}', '\\end{itemize}'),
+        'ol': ('\\begin{enumerate}', '\\end{enumerate}'),
+        'li': ('\\item ', ''),
+        'latex_eq': ('\\[', '\\]'),
+    }
+
+    def handle_table(table: bs4.element.Tag) -> str:
+        rows = table.find_all('tr')
+        latex_table: str = ''
+        for row in rows:
+            assert isinstance(row, bs4.element.Tag), 'HTML table not valid'
+            cells = row.find_all(['th', 'td'])
+            if not latex_table:
+                latex_table = "\\begin{tabular}{|" + "|".join(['l'] * len(cells)) + "|}\\toprule\n"
+            else:
+                latex_table += " & ".join(escape_text(cell.get_text(strip=True)) for cell in cells) + " \\\\\n"
+        latex_table += "\\bottomrule\n\\end{tabular}"
+        return latex_table
+
+    def parse_node(element: bs4.element.Tag) -> Iterator[str]:
+        prefix, post = html_to_latex.get(element.name, ('', ''))
+        yield prefix
+
+        for c in element.children:
+            if isinstance(c, bs4.element.Tag):
+                if c.name == 'table':
+                    yield handle_table(c)
+                else:
+                    yield from parse_node(c)
+            else:
+                yield escape_text(c.text)
+        yield post
+
+    return ''.join(parse_node(root))
+
+
+def from_html(html_code: str) -> str:
+    """
+    Converts HTML code to LaTeX code using HTMLParser.
+
+    Args:
+        html_code: The HTML code to convert.
+
+    Returns:
+        The LaTeX code.
+    """
+    html_to_latex = {
+        'strong': ('\\textbf{', '}'),
+        'b': ('\\textbf{', '}'),
+        'em': ('\\emph{', '}'),
+        'i': ('\\emph{', '}'),
+        'p': ('', '\n\n'),
+        'h1': ('\\section{', '}\n'),
+        'h2': ('\\subsection{', '}\n'),
+        'h3': ('\\subsubsection{', '}\n'),
+        'ul': ('\\begin{itemize}\n', '\\end{itemize}\n'),
+        'ol': ('\\begin{enumerate}\n', '\\end{enumerate}\n'),
+        'li': ('\\item ', '\n')
+    }
+
+    class LaTeXHTMLParser(HTMLParser):
+        def __init__(self) -> None:
+            super().__init__()
+            self.latex_code: list[str] = []
+            self.header_index: int = -1
+            self.column_alignment = ''
+            self.midrule_flag = False
+            self.header_flag = False
+
+        def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
+            if tag in html_to_latex:
+                prefix, _ = html_to_latex[tag]
+                self.latex_code.append(prefix)
+            elif tag == 'table':
+                self.header_index = len(self.latex_code)
+                self.latex_code.append('')  # Placeholder for column header
+                self.midrule_flag = False
+                self.header_flag = False
+            elif tag == 'tr':
+                self.column_alignment = ''
+            elif tag in ['th', 'td']:
+                style = [v for k, v in attrs if k == 'style']
+                if style and style[0] and 'right' in style[0]:
+                    self.column_alignment += 'r'
+                else:
+                    self.column_alignment += 'l'
+            elif tag == 'a':
+                href = [v for k, v in attrs if k == 'href']
+                assert href, 'Link href attribute is missing'
+                self.latex_code.append(f"\\href{{{href[0]}}}{{")
+
+        def handle_endtag(self, tag: str) -> None:
+            if tag in html_to_latex:
+                _, postfix = html_to_latex[tag]
+                self.latex_code.append(postfix)
+            elif tag == 'table':
+                self.latex_code.append("\\bottomrule\n\\end{tabular}\n")
+            elif tag == 'tr':
+                self.latex_code.pop()  # Remove column separator after last entry
+                if self.header_index >= 0:
+                    self.latex_code[self.header_index] = f"\\begin{{tabular}}{{{self.column_alignment}}}\\toprule\n"
+                    self.header_index = -1
+                self.latex_code.append(' \\\\\n')
+                if self.header_flag and not self.midrule_flag:
+                    self.latex_code.append("\\midrule\n")
+                    self.midrule_flag = True
+            elif tag == 'th':
+                self.latex_code.append(" & ")
+                self.header_flag = True
+            elif tag == 'td':
+                self.latex_code.append(" & ")
+            elif tag == 'a':
+                self.latex_code.append("}")
+
+        def handle_data(self, data: str) -> None:
+            if data.strip():
+                self.latex_code.append(escape_text(data))
+
+    parser = LaTeXHTMLParser()
+    parser.feed(html_code)
+    return ''.join(parser.latex_code)
+
+
+def compile(latex_code: str, output_file: str = '', encoding: str = 'utf-8') -> tuple[bool, list[str], list[str]]:
+    """
+    Compiles LaTeX code to a PDF file.
+
+    Args:
+        latex_code: The LaTeX code to compile.
+        output_file: The output file path.
+        encoding: The encoding of the LaTeX code.
+
+    Returns:
+        A tuple with three elements:
+        - A boolean indicating whether the compilation was successful.
+        - A list of errors.
+        - A list of warnings.
+    """
+
+    with tempfile.TemporaryDirectory() as tmp_path:
+        command = ['pdflatex', '-halt-on-error', '--output-directory', tmp_path]
+
+        errors: list[str] = []
+        warnings: list[str] = []
+
+        for i in range(1, 4):
+            rerun_flag = False
+            error_flag = False
+            process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+            output, error = process.communicate(input=latex_code.encode(encoding))
+
+            assert not error, 'Compilation error: ' + output.decode(encoding)
+
+            for line in output.decode(encoding).split('\n'):
+                if 'Warning' in line:
+                    warnings.append(f"Run {i}: " + line)
+                    if 'reference' in line:
+                        rerun_flag = True
+                if line.startswith('!') or line.startswith('*!'):
+                    error_flag = True
+
+                if error_flag:
+                    errors.append(line)
+
+            if not rerun_flag or errors:
+                break
+
+        # Copy pdf file
+        file_list = [f for f in os.listdir(tmp_path) if f.lower().endswith('.pdf')]
+        if file_list:
+            pdf_file = os.path.join(tmp_path, file_list[0])
+            if output_file:
+                shutil.copyfile(pdf_file, output_file)
+
+    return not errors, errors, warnings
+
+
+def inject_latex_command(text: str, command: str) -> str:
+    lines = text.splitlines()
+
+    last_package_index = -1
+    for i, line in enumerate(lines):
+        if line.strip().startswith("\\usepackage"):
+            last_package_index = i
+
+    if last_package_index != -1:
+        lines.insert(last_package_index + 1, f"\n{command}\n")
+    else:
+        lines.append(f"\n{command}\n")
+
+    return '\n'.join(lines)
--- a/src/pyladoc/latex_escaping.py
+++ b/src/pyladoc/latex_escaping.py
@ -0,0 +1,89 @@
+unicode_to_latex_dict = {
+    # Unicode numeric subscripts
+    '₀': r'\textsubscript{0}', '₁': r'\textsubscript{1}', '₂': r'\textsubscript{2}', '₃': r'\textsubscript{3}',
+    '₄': r'\textsubscript{4}', '₅': r'\textsubscript{5}', '₆': r'\textsubscript{6}', '₇': r'\textsubscript{7}',
+    '₈': r'\textsubscript{8}', '₉': r'\textsubscript{9}',
+    # Unicode numeric superscripts
+    '⁰': r'\textsuperscript{0}', '¹': r'\textsuperscript{1}', '²': r'\textsuperscript{2}', '³': r'\textsuperscript{3}',
+    '⁴': r'\textsuperscript{4}', '⁵': r'\textsuperscript{5}', '⁶': r'\textsuperscript{6}', '⁷': r'\textsuperscript{7}',
+    '⁸': r'\textsuperscript{8}', '⁹': r'\textsuperscript{9}', '⁺': r'\textsuperscript{+}', '⁻': r'\textsuperscript{-}',
+    # Often used European non-ascii-characters
+    'ä': r'{\"a}',
+    'ö': r'{\"o}',
+    'ü': r'{\"u}',
+    'Ä': r'{\"A}',
+    'Ö': r'{\"O}',
+    'Ü': r'{\"U}',
+    'ß': r'{\ss}',
+    'é': r"{\'e}",
+    'è': r"{\`e}",
+    'ê': r"{\^e}",
+    'à': r"{\`a}",
+    'â': r"{\^a}",
+    'ç': r"{\c{c}}",
+    'É': r"{\'E}",
+    'È': r"{\`E}",
+    'Ê': r"{\^E}",
+    'À': r"{\`A}",
+    'Â': r"{\^A}",
+    'Ç': r"{\c{C}}",
+    'ó': r"{\'o}",
+    'ò': r"{\`o}",
+    'ô': r"{\^o}",
+    'Ó': r"{\'O}",
+    'Ò': r"{\`O}",
+    'Ô': r"{\^O}",
+    'í': r"{\'i}",
+    'ì': r"{\`i}",
+    'î': r"{\^i}",
+    'Í': r"{\'I}",
+    'Ì': r"{\`I}",
+    'Î': r"{\^I}",
+    'ú': r"{\'u}",
+    'ù': r"{\`u}",
+    'û': r"{\^u}",
+    'Ú': r"{\'U}",
+    'Ù': r"{\`U}",
+    'Û': r"{\^U}",
+    'å': r"{\r{a}}",
+    'Å': r"{\r{A}}",
+    'ø': r"{\o}",
+    'Ø': r"{\O}",
+    'æ': r"{\ae}",
+    'Æ': r"{\AE}",
+    'œ': r"{\oe}",
+    'Œ': r"{\OE}",
+    # Other unicode
+    '°': r'{\textdegree}',
+    'µ': r'{\textmu}',
+    'π': r'$\pi$',
+    '≈': r'$\approx$',
+    '±': r'$\pm$',
+    '≠': r'$\neq$',
+    '∆': r'$\Delta$',
+    'Ω': r'$\Omega$',
+    'Λ': r'$\Lambda$',
+    'Σ': r'$\Sigma$',
+    # '€': r'{\euro}',
+    '£': r'{\pounds}',
+    '¥': r'{\yen}',
+    '\u00A0': r'~',  # Non-breaking space
+    '\u2007': ' '  # Figure space
+}
+
+latex_escape_dict = {
+    '&': r'\&',
+    '%': r'\%',
+    r'\$': r'\$',
+    '#': r'\#',
+    '_': r'\_',
+    '{': r'\{',
+    '}': r'\}',
+    '<': r'{\textless}',
+    '>': r'{\textgreater}',
+    '~': r'\textasciitilde{}',
+    r'\^': r'\textasciicircum{}',
+    r'\\': r'\textbackslash{}',
+    # References:
+    r'@(\w+:[\w\_\-]+)': r'\ref{\g<1>}'
+}
--- a/src/pyladoc/templates/default_template.tex
+++ b/src/pyladoc/templates/default_template.tex
@ -0,0 +1,38 @@
+\documentclass[a4paper,12pt]{article}
+
+% Packages
+\usepackage[utf8]{inputenc}
+\usepackage[T1]{fontenc}
+\usepackage{lmodern}  % Load Latin Modern font
+\usepackage{graphicx} % For including images
+\usepackage{amsmath}  % For mathematical symbols
+\usepackage{amssymb}  % For additional symbols
+\usepackage{hyperref} % For hyperlinks
+\usepackage{caption}  % For customizing captions
+\usepackage{geometry} % To set margins
+\usepackage{natbib}   % For citations
+\usepackage{float}    % For fixing figure positions
+\usepackage{siunitx}  % For scientific units
+\usepackage{booktabs} % For professional-looking tables
+\usepackage{pgf} % For using pgf grafics
+\usepackage{textcomp, gensymb} % provides \degree symbol
+
+\sisetup{
+  table-align-text-post = false
+}
+
+% Geometry Settings
+\geometry{margin=1in} % 1-inch margins
+
+% Title and Author Information
+% \title{Report Title}
+% \author{Your Name \\ Department of XYZ \\ \texttt{email@example.com}}
+% \date{\today}
+
+\begin{document}
+
+% Title Page
+% # \maketitle
+
+% <!--CONTENT-->
+\end{document}
--- a/src/pyladoc/templates/test_template.html
+++ b/src/pyladoc/templates/test_template.html
@ -0,0 +1,114 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Test template</title>
+  <style>
+    body
+    {
+      background-color: white;
+      font-family: Lucida Grande,Lucida Sans Unicode,Lucida Sans,Geneva,Verdana,sans-serif;
+    }
+
+    div.document
+    {
+        max-width: 820px;
+        top: 20px;
+        overflow: visible;
+        margin: 0 auto;
+        width: 90%;
+        padding-bottom: 50px;
+    }
+
+    div h1
+    {
+        font-size: 32px;
+        font-weight: normal;
+        margin-bottom: 10px;
+        margin-top: 24px;
+        color: black;
+    }
+
+    div h2
+    {
+        font-size: 24px;
+        font-weight: normal;
+        margin-bottom: 10px;
+        margin-top: 24px;
+        color: black;
+    }
+
+    div.figure {
+        text-align: center;
+    }
+
+    div svg {
+        margin-left: -5%;
+        max-width: 110%;
+        height: auto;
+        object-fit: contain;
+    }
+
+    .equation-container {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            width: 100%;
+    }
+
+    .equation {
+        text-align: center;
+        width: 100%;
+    }
+
+    .equation-number {
+        text-align: right;
+    }
+
+    table {
+        border-collapse: collapse;
+        margin: 20px auto;
+    }
+
+
+    table th,
+    table td {
+        padding: 8px;
+        font-variant-numeric: tabular-nums;
+    }
+
+    table th {
+        background-color: #f4f4f4;
+        font-weight: bold;
+    }
+
+    /* Alternating row colors for readability */
+    table tr:nth-child(even) {
+        background-color: #f9f9f9;
+    }
+
+    table tr:nth-child(odd) {
+        background-color: #ffffff;
+    }
+
+    /* Caption styling */
+    table caption {
+        caption-side: top;
+        font-weight: bold;
+        font-size: 16px;
+        margin-bottom: 10px;
+    }
+
+    /* Fine-tuning text in table */
+    table td, table th {
+        vertical-align: middle;
+        line-height: 1.5;
+    }
+  </style>
+</head>
+<body>
+    <div class="document">
+        <!--CONTENT-->
+    </div>
+</body>
--- a/tests/document_validation.py
+++ b/tests/document_validation.py
@ -0,0 +1,64 @@
+from typing import Generator, Any
+from lxml import etree
+from lxml.etree import _Element as EElement  # type: ignore
+import requests
+
+
+with open('src/pyladoc/templates/test_template.html', mode='rt', encoding='utf-8') as f:
+    html_test_template = f.read()
+
+
+def add_line_numbers(multiline_string: str) -> str:
+    lines = multiline_string.splitlines()
+    numbered_lines = [f"{i + 1}: {line}" for i, line in enumerate(lines)]
+    return "\n".join(numbered_lines)
+
+
+def validate_html_with_w3c(html_string: str) -> dict[str, Any]:
+    validator_url = "https://validator.w3.org/nu/"
+
+    # Parameters for the POST request
+    headers = {
+        "Content-Type": "text/html; charset=utf-8",
+        "User-Agent": "Python HTML Validator"}
+
+    try:
+        response = requests.post(validator_url, headers=headers, data=html_string, params={"out": "json"})
+
+        if response.status_code == 200:
+            return response.json()
+        else:
+            return {
+                "error": f"Failed to validate HTML. Status code: {response.status_code}",
+                "details": response.text
+            }
+
+    except requests.RequestException as e:
+        return {"error": f"An error occurred while connecting to the W3C Validator: {str(e)}"}
+
+
+def validate_html(html_string: str, validate_online: bool = False, check_for: list['str'] = ['table', 'svg', 'div']):
+    root = etree.fromstring(html_string, parser=etree.HTMLParser(recover=True))
+
+    def recursive_search(element: EElement) -> Generator[str, None, None]:
+        if isinstance(element.tag, str):
+            yield element.tag
+
+        for child in element:
+            yield from recursive_search(child)
+
+    tags = set(recursive_search(root))
+
+    for tag_type in check_for:
+        assert tag_type in tags, f"Tag {tag_type} not found in the html code"
+
+    if validate_online:
+        test_page = html_test_template.replace('<!--CONTENT-->', html_string)
+        validation_result = validate_html_with_w3c(test_page)
+        assert 'messages' in validation_result, 'Validate request failed'
+        if validation_result['messages']:
+            print(add_line_numbers(test_page))
+        for verr in validation_result['messages']:
+            print(f"- {verr['type']}: {verr['message']} (line: {verr['lastLine']})")
+
+        assert len(validation_result['messages']) == 0, f'{len(validation_result["messages"])} validation error, first error: {validation_result["messages"][0]["message"]}'
--- a/tests/out/test_html_render.html
+++ b/tests/out/test_html_render.html
--- a/tests/out/test_latex_render.pdf
+++ b/tests/out/test_latex_render.pdf
--- a/tests/out/test_markdown_characters.html
+++ b/tests/out/test_markdown_characters.html
@ -0,0 +1,5 @@
+<h1>Special caracters</h1>
+<p>Umlaute: ÖÄÜ öäü</p>
+<p>Other: ß, €, @, $, %, ~, µ</p>
+<p>Units: m³, cm²</p>
+<p>Controll characters: &lt;, &gt;, ", ', &amp;, |, /, \</p>
--- a/tests/out/test_markdown_equations.html
+++ b/tests/out/test_markdown_equations.html
@ -0,0 +1,39 @@
+<h1>Source Equations</h1>
+<ol>
+<li>$4(3x + 2) - 5(x - 1) = 3x + 14$</li>
+<li>$
+rac{2y + 5}{4} +
+rac{3y - 1}{2} = 5$</li>
+<li>$
+rac{5}{x + 2} +
+rac{2}{x - 2} = 3$</li>
+<li>$8(3b - 5) + 4(b + 2) = 60$</li>
+<li>$2c^2 - 3c - 5 = 0$</li>
+<li>$4(2d - 1) + 5(3d + 2) = 7d + 28$</li>
+<li>$q^2 + 6q + 9 = 16$</li>
+</ol>
+<h1>Result Equations</h1>
+<ol>
+<li>$x =
+rac{1}{4}$</li>
+<li>$y =
+rac{17}{8}$</li>
+<li>$z =
+rac{7}{3}$</li>
+<li>$x = 1$ or $x = -6$</li>
+<li>$a =
+rac{1}{3}$ or $a = 2$</li>
+<li>$x = -
+rac{2}{3}$ or $x = 3$</li>
+<li>$b =
+rac{23}{7}$</li>
+</ol>
+<h1>Step by Step</h1>
+<ol>
+<li>Distribute: $12x + 8 - 5x + 5 = 3x + 14$</li>
+<li>Combine like terms: $7x + 13 = 3x + 14$</li>
+<li>Subtract $3x$: $4x + 13 = 14$</li>
+<li>Subtract $13$: $4x = 1$</li>
+<li>Divide by $4$: $x =
+rac{1}{4}$</li>
+</ol>
--- a/tests/out/test_markdown_style.html
+++ b/tests/out/test_markdown_style.html
@ -0,0 +1,44 @@
+<p>Below is an in-depth explanation of the AArch64 (ARM64)
+unconditional branch instruction—often simply called the
+“B” instruction—and how its 26‐bit immediate field (imm26)
+is laid out and later relocated during linking.</p>
+<hr>
+<h2>Instruction Layout</h2>
+<p>The unconditional branch in AArch64 is encoded in a 32‑bit
+instruction. Its layout is as follows:</p>
+<pre><code>Bits:  31         26 25                           0
+        +-------------+------------------------------+
+        |  Opcode     |          imm26               |
+        +-------------+------------------------------+
+</code></pre>
+<ul>
+<li><strong>Opcode (bits 31:26):</strong></li>
+<li>For a plain branch (<code>B</code>), the opcode is <code>000101</code>.</li>
+<li>
+<p>For a branch with link (<code>BL</code>), which saves the return
+address (i.e., a call), the opcode is <code>100101</code>.
+These 6 bits determine the instruction type.</p>
+</li>
+<li>
+<p><strong>Immediate Field (imm26, bits 25:0):</strong></p>
+</li>
+<li>This 26‑bit field holds a signed immediate value.</li>
+<li>
+<p><strong>Offset Calculation:</strong> At runtime, the processor:</p>
+<ol>
+<li><strong>Shifts</strong> the 26‑bit immediate left by 2 bits.
+(Because instructions are 4-byte aligned,
+the two least-significant bits are always zero.)</li>
+<li><strong>Sign-extends</strong> the resulting 28‑bit value to
+the full register width (typically 64 bits).</li>
+<li><strong>Adds</strong> this value to the program counter
+(PC) to obtain the branch target.</li>
+</ol>
+</li>
+<li>
+<p><strong>Reach:</strong></p>
+</li>
+<li>With a 26‑bit signed field that’s effectively 28 bits
+  after the shift, the branch can cover a range
+  of approximately ±128&nbsp;MB from the current instruction.</li>
+</ul>
--- a/tests/out/test_markdown_table.html
+++ b/tests/out/test_markdown_table.html
@ -0,0 +1,77 @@
+<h2>Klemmen</h2>
+<table>
+<thead>
+<tr>
+<th style="text-align: right;">Anz.</th>
+<th>Typ</th>
+<th>Beschreibung</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td style="text-align: right;">12</td>
+<td>BK9050</td>
+<td>Buskoppler</td>
+</tr>
+<tr>
+<td style="text-align: right;">2</td>
+<td>KL1104</td>
+<td>4 Digitaleingänge</td>
+</tr>
+<tr>
+<td style="text-align: right;">2</td>
+<td>KL2404</td>
+<td>4 Digitalausgänge (0,5 A)</td>
+</tr>
+<tr>
+<td style="text-align: right;">3</td>
+<td>KL2424</td>
+<td>4 Digitalausgänge (2 A)</td>
+</tr>
+<tr>
+<td style="text-align: right;">2</td>
+<td>KL4004</td>
+<td>4 Analogausgänge</td>
+</tr>
+<tr>
+<td style="text-align: right;">1</td>
+<td>KL4002</td>
+<td>2 Analogausgänge</td>
+</tr>
+<tr>
+<td style="text-align: right;">22</td>
+<td>KL9188</td>
+<td>Potenzialverteilungsklemme</td>
+</tr>
+<tr>
+<td style="text-align: right;">1</td>
+<td>KL9100</td>
+<td>Potenzialeinspeiseklemme</td>
+</tr>
+<tr>
+<td style="text-align: right;">3</td>
+<td>KL3054</td>
+<td>4 Analogeingänge</td>
+</tr>
+<tr>
+<td style="text-align: right;">5</td>
+<td>KL3214</td>
+<td>PT100 4 Temperatureingänge (3-Leiter)</td>
+</tr>
+<tr>
+<td style="text-align: right;">3</td>
+<td>KL3202</td>
+<td>PT100 2 Temperatureingänge (3-Leiter)</td>
+</tr>
+<tr>
+<td style="text-align: right;">1</td>
+<td>KL2404</td>
+<td>4 Digitalausgänge</td>
+</tr>
+<tr>
+<td style="text-align: right;">2</td>
+<td>KL9010</td>
+<td>Endklemme</td>
+</tr>
+</tbody>
+</table>
--- a/tests/test_latex_tools.py
+++ b/tests/test_latex_tools.py
@ -0,0 +1,151 @@
+import pyladoc.latex
+
+
+def normalize_latex_code(latex_code: str) -> str:
+    return '\n'.join(line.strip() for line in latex_code.splitlines() if line)
+
+
+def check_only_ascii(latex_code: str) -> bool:
+    return all(ord(c) < 128 for c in latex_code)
+
+
+def test_latex_from_html():
+    html_code = """
+    <h1>Test</h1>
+    <p>This is are Umlautes: Ä,Ö and Ü</p>
+    <p>This is a <b>test</b>.</p>
+    <p>And this is another <em>test</em>.</p>
+    <p>And this is a <strong>third</strong> test.</p>
+    <p>And this is a <i>fourth</i> test.</p>
+    <p>This is a LaTeX command: \\textbf{test}</p>
+    <p>This are typical control characters: {, }, <, >, ", ', &, |, /, \\</p>
+    <ul>
+        <li>Item 1</li>
+        <li>Item 2</li>
+    </ul>
+    <table>
+        <tr>
+            <th>Header 1</th>
+            <th>Header 2</th>
+        </tr>
+        <tr>
+            <td>Cell 1</td>
+            <td>Cell 2</td>
+        </tr>
+    </table>
+    """
+
+    latex_code = pyladoc.latex.from_html(html_code)
+
+    ref_latex_code = r"""
+        \section{Test}
+        This is are Umlautes: {\"A},{\"O} and {\"U}
+        This is a \textbf{test}.
+        And this is another \emph{test}.
+        And this is a \textbf{third} test.
+        And this is a \emph{fourth} test.
+        This is a LaTeX command: \textbackslash{}textbf\{test\}
+        This are typical control characters: \{, \}, {\textless}, {\textgreater}, ", ', \&, |, /, \textbackslash{}
+        \begin{itemize}
+        \item Item 1
+        \item Item 2
+        \end{itemize}
+        \begin{tabular}{ll}\toprule
+        Header 1 & Header 2 \\
+        \midrule
+        Cell 1 & Cell 2 \\
+        \bottomrule
+        \end{tabular}"""
+
+    print(latex_code)
+
+    print('--')
+
+    # print(pyladoc.latex.escape_text(html_code))
+
+    assert check_only_ascii(latex_code), 'Some characters are not ASCII'
+    assert normalize_latex_code(ref_latex_code) == normalize_latex_code(latex_code)
+
+
+def test_latex_from_markdown():
+    markdown_code = """
+        ## Test1
+
+        | Anz.| Typ      | Beschreibung
+        |----:|----------|------------------------------------
+        | 12  | BK9050   | Buskoppler
+        |  2  | KL1104   | 4 Digitaleingänge
+        |  2  | KL2404   | 4 Digitalausgänge (0,5 A)
+        |  3  | KL2424   | 4 Digitalausgänge (2 A)
+        |  2  | KL4004   | 4 Analogausgänge
+        |  1  | KL4002   | 2 Analogausgänge
+        | 22  | KL9188   | Potenzialverteilungsklemme
+        |  1  | KL9100   | Potenzialeinspeiseklemme
+        |  3  | KL3054   | 4 Analogeingänge
+        |  5  | KL3214   | PT100 4 Temperatureingänge (3-Leiter)
+        |  3  | KL3202   | PT100 2 Temperatureingänge (3-Leiter)
+        |  1  | KL2404   | 4 Digitalausgänge
+        |  2  | KL9010   | Endklemme
+
+        This is a **test**.
+
+        ## Test2
+
+        | Anz.| Beschreibung
+        |----:|------------------------------------
+        | 12  | Buskoppler
+        |  2  | 4 Digitaleingänge
+        |  2  | 4 Digitalausgänge (0,5 A)
+        |  3  | 4 Digitalausgänge (2 A)
+        |  2  | 4 Analogausgänge
+        |  1  | 2 Analogausgänge
+    """
+
+    pyla = pyladoc.DocumentWriter()
+    pyla.add_markdown(markdown_code)
+    latex_code = pyladoc.latex.from_html(pyla.to_html())
+
+    ref_latex_code = r"""
+        \subsection{Test1}
+        \begin{tabular}{rll}\toprule
+        Anz. & Typ & Beschreibung \\
+        \midrule
+        12 & BK9050 & Buskoppler \\
+        2 & KL1104 & 4 Digitaleing{\"a}nge \\
+        2 & KL2404 & 4 Digitalausg{\"a}nge (0,5 A) \\
+        3 & KL2424 & 4 Digitalausg{\"a}nge (2 A) \\
+        2 & KL4004 & 4 Analogausg{\"a}nge \\
+        1 & KL4002 & 2 Analogausg{\"a}nge \\
+        22 & KL9188 & Potenzialverteilungsklemme \\
+        1 & KL9100 & Potenzialeinspeiseklemme \\
+        3 & KL3054 & 4 Analogeing{\"a}nge \\
+        5 & KL3214 & PT100 4 Temperatureing{\"a}nge (3-Leiter) \\
+        3 & KL3202 & PT100 2 Temperatureing{\"a}nge (3-Leiter) \\
+        1 & KL2404 & 4 Digitalausg{\"a}nge \\
+        2 & KL9010 & Endklemme \\
+        \bottomrule
+        \end{tabular}
+        This is a \textbf{test}.
+
+        \subsection{Test2}
+        \begin{tabular}{rl}\toprule
+        Anz. & Beschreibung \\
+        \midrule
+        12 & Buskoppler \\
+        2 & 4 Digitaleing{\"a}nge \\
+        2 & 4 Digitalausg{\"a}nge (0,5 A) \\
+        3 & 4 Digitalausg{\"a}nge (2 A) \\
+        2 & 4 Analogausg{\"a}nge \\
+        1 & 2 Analogausg{\"a}nge \\
+        \bottomrule
+        \end{tabular}"""
+
+    print(latex_code)
+
+    assert check_only_ascii(latex_code), 'Some characters are not ASCII'
+    assert normalize_latex_code(ref_latex_code) == normalize_latex_code(latex_code)
+
+
+if __name__ == '__main__':
+    test_latex_from_html()
+    test_latex_from_markdown()
--- a/tests/test_rendering_example_doc.py
+++ b/tests/test_rendering_example_doc.py
@ -0,0 +1,108 @@
+import pyladoc
+import matplotlib.pyplot as plt
+import pandas as pd
+import document_validation
+
+VALIDATE_HTML_CODE_ONLINE = False
+WRITE_RESULT_FILES = True
+
+
+def make_document():
+    dw = pyladoc.DocumentWriter()
+
+    dw.add_markdown("""
+    # Special characters
+
+    ö ä ü Ö Ä Ü ß @ ∆
+
+    π ≈ ± ∆ Σ
+
+    £ ¥ $ €
+
+    Œ
+                    
+    # Link
+                    
+    This is a hyperlink: [nonan.net](https://www.nonan.net)
+
+    # Table
+
+    | Anz.| Typ      | Beschreibung
+    |----:|----------|------------------------------------
+    | 12  | BK9050   | Buskoppler
+    |  2  | KL1104   | 4 Digitaleingänge
+    |  2  | KL2404   | 4 Digitalausgänge (0,5 A)
+    |  3  | KL2424   | 4 Digitalausgänge (2 A)
+    |  2  | KL4004   | 4 Analogausgänge
+    |  1  | KL4002   | 2 Analogausgänge
+    | 22  | KL9188   | Potenzialverteilungsklemme
+    |  1  | KL9100   | Potenzialeinspeiseklemme
+    |  3  | KL3054   | 4 Analogeingänge
+    |  5  | KL3214   | PT100 4 Temperatureingänge (3-Leiter)
+    |  3  | KL3202   | PT100 2 Temperatureingänge (3-Leiter)
+    |  1  | KL2404   | 4 Digitalausgänge
+    |  2  | KL9010   | Endklemme
+    
+    ---
+
+    # Equations
+                    
+    This line represents a reference to the equation @eq:test1.
+    """)
+
+    dw.add_equation(r'y = a + b * \sum_{i=0}^{\infty} a_i x^i', 'test1')
+
+    # Figure
+    fig, ax = plt.subplots()
+
+    fruits = ['apple', 'blueberry', 'cherry', 'orange']
+    counts = [40, 100, 30, 55]
+    bar_labels = ['red', 'blue', '_red', 'orange']
+    bar_colors = ['tab:red', 'tab:blue', 'tab:red', 'tab:orange']
+
+    ax.bar(fruits, counts, label=bar_labels, color=bar_colors)
+    ax.set_ylabel('fruit supply')
+    ax.set_title('Fruit supply by kind and color')
+    ax.legend(title='Fruit color')
+
+    dw.add_diagram(fig, 'Bar chart with individual bar colors')
+
+    # Table
+    mydataset = {
+        'Row1': ["Line1", "Line2", "Line3", "Line4", "Line5"],
+        'Row2': [120, '95 km/h', 110, '105 km/h', 130],
+        'Row3': ['12 g/km', '> 150 g/km', '110 g/km', '1140 g/km', '13.05 g/km'],
+        'Row4': ['5 stars', '4 stars', '5 stars', '4.5 stars', '5 stars'],
+        'Row5': [3.5, 7.8, 8.5, 6.9, 4.2],
+        'Row6': ['1850 kg', '1500 kg', '1400 kg', '1600 kg', '1700 kg'],
+        'Row7': ['600 Nm', '250 Nm', '280 Nm', '320 Nm', '450 Nm']
+    }
+    df = pd.DataFrame(mydataset)
+
+    dw.add_table(df.style.hide(axis="index"), 'This is a example table', 'example1')
+
+    return dw
+
+
+def test_html_render():
+    doc = make_document()
+    html_code = doc.to_html()
+
+    document_validation.validate_html(html_code, VALIDATE_HTML_CODE_ONLINE)
+
+    if WRITE_RESULT_FILES:
+        with open('tests/out/test_html_render.html', 'w', encoding='utf-8') as f:
+            f.write(pyladoc.inject_to_template(html_code, internal_template='templates/test_template.html'))
+
+
+def test_latex_render():
+    doc = make_document()
+
+    # print(doc.to_latex())
+
+    assert doc.to_pdf('tests/out/test_latex_render.pdf', font_family='serif')
+
+
+if __name__ == '__main__':
+    test_html_render()
+    test_latex_render()
--- a/tests/test_rendering_markdown.py
+++ b/tests/test_rendering_markdown.py
@ -0,0 +1,152 @@
+import pyladoc
+import document_validation
+
+VALIDATE_HTML_CODE_ONLINE = False
+WRITE_RESULT_FILES = True
+
+
+def test_markdown_styling():
+    pyla = pyladoc.DocumentWriter()
+    pyla.add_markdown(
+        """
+        Below is an in-depth explanation of the AArch64 (ARM64)
+        unconditional branch instruction—often simply called the
+        “B” instruction—and how its 26‐bit immediate field (imm26)
+        is laid out and later relocated during linking.
+
+        ---
+
+        ## Instruction Layout
+
+        The unconditional branch in AArch64 is encoded in a 32‑bit
+        instruction. Its layout is as follows:
+
+        ```
+        Bits:  31         26 25                           0
+                +-------------+------------------------------+
+                |  Opcode     |          imm26               |
+                +-------------+------------------------------+
+        ```
+
+        - **Opcode (bits 31:26):**
+        - For a plain branch (`B`), the opcode is `000101`.
+        - For a branch with link (`BL`), which saves the return
+        address (i.e., a call), the opcode is `100101`.
+        These 6 bits determine the instruction type.
+
+        - **Immediate Field (imm26, bits 25:0):**
+        - This 26‑bit field holds a signed immediate value.
+        - **Offset Calculation:** At runtime, the processor:
+            1. **Shifts** the 26‑bit immediate left by 2 bits.
+            (Because instructions are 4-byte aligned,
+            the two least-significant bits are always zero.)
+            2. **Sign-extends** the resulting 28‑bit value to
+            the full register width (typically 64 bits).
+            3. **Adds** this value to the program counter
+            (PC) to obtain the branch target.
+
+        - **Reach:**
+        - With a 26‑bit signed field that’s effectively 28 bits
+          after the shift, the branch can cover a range
+          of approximately ±128 MB from the current instruction.
+        """)
+
+    html_code = pyla.to_html()
+    document_validation.validate_html(html_code, check_for=['strong', 'ol', 'li', 'code', 'hr'])
+
+    if WRITE_RESULT_FILES:
+        with open('tests/out/test_markdown_style.html', 'w', encoding='utf-8') as f:
+            f.write(html_code)
+
+
+def test_markdown_table():
+    pyla = pyladoc.DocumentWriter()
+    pyla.add_markdown(
+        """
+        ## Klemmen
+
+        | Anz.| Typ      | Beschreibung
+        |----:|----------|------------------------------------
+        | 12  | BK9050   | Buskoppler
+        |  2  | KL1104   | 4 Digitaleingänge
+        |  2  | KL2404   | 4 Digitalausgänge (0,5 A)
+        |  3  | KL2424   | 4 Digitalausgänge (2 A)
+        |  2  | KL4004   | 4 Analogausgänge
+        |  1  | KL4002   | 2 Analogausgänge
+        | 22  | KL9188   | Potenzialverteilungsklemme
+        |  1  | KL9100   | Potenzialeinspeiseklemme
+        |  3  | KL3054   | 4 Analogeingänge
+        |  5  | KL3214   | PT100 4 Temperatureingänge (3-Leiter)
+        |  3  | KL3202   | PT100 2 Temperatureingänge (3-Leiter)
+        |  1  | KL2404   | 4 Digitalausgänge
+        |  2  | KL9010   | Endklemme
+        """)
+
+    html_code = pyla.to_html()
+    document_validation.validate_html(html_code, check_for=['table'])
+
+    if WRITE_RESULT_FILES:
+        with open('tests/out/test_markdown_table.html', 'w', encoding='utf-8') as f:
+            f.write(html_code)
+
+
+def test_markdown_equations():
+    pyla = pyladoc.DocumentWriter()
+    pyla.add_markdown(
+        """
+        # Source Equations
+        1. $4(3x + 2) - 5(x - 1) = 3x + 14$
+        2. $\frac{2y + 5}{4} + \frac{3y - 1}{2} = 5$
+        3. $\frac{5}{x + 2} + \frac{2}{x - 2} = 3$
+        4. $8(3b - 5) + 4(b + 2) = 60$
+        5. $2c^2 - 3c - 5 = 0$
+        6. $4(2d - 1) + 5(3d + 2) = 7d + 28$
+        7. $q^2 + 6q + 9 = 16$
+
+        # Result Equations
+        1. $x = \frac{1}{4}$
+        2. $y = \frac{17}{8}$
+        3. $z = \frac{7}{3}$
+        4. $x = 1$ or $x = -6$
+        5. $a = \frac{1}{3}$ or $a = 2$
+        6. $x = -\frac{2}{3}$ or $x = 3$
+        7. $b = \frac{23}{7}$
+
+        # Step by Step
+        1. Distribute: $12x + 8 - 5x + 5 = 3x + 14$
+        2. Combine like terms: $7x + 13 = 3x + 14$
+        3. Subtract $3x$: $4x + 13 = 14$
+        4. Subtract $13$: $4x = 1$
+        5. Divide by $4$: $x = \frac{1}{4}$
+        """)
+
+    html_code = pyla.to_html()
+    document_validation.validate_html(html_code, check_for=['h1'])
+
+    if WRITE_RESULT_FILES:
+        with open('tests/out/test_markdown_equations.html', 'w', encoding='utf-8') as f:
+            f.write(html_code)
+
+
+def test_markdown_characters():
+    pyla = pyladoc.DocumentWriter()
+    pyla.add_markdown(
+        """
+        # Special caracters
+
+        Umlaute: ÖÄÜ öäü
+
+        Other: ß, €, @, $, %, ~, µ
+
+        Units: m³, cm²
+
+        Controll characters: <, >, ", ', &, |, /, \\
+
+        """)
+
+    html_code = pyla.to_html()
+    document_validation.validate_html(html_code, check_for=['h1'])
+
+    if WRITE_RESULT_FILES:
+        with open('tests/out/test_markdown_characters.html', 'w', encoding='utf-8') as f:
+            f.write(html_code)