first commit

This commit is contained in:
Nicolas 2025-03-28 13:30:08 +01:00
commit 67c48776ac
19 changed files with 3646 additions and 0 deletions

21
.flake8 Normal file
View File

@ -0,0 +1,21 @@
[flake8]
# Specify the maximum allowed line length
max-line-length = 88
# Ignore specific rules
# For example, E501: Line too long, W503: Line break before binary operator
ignore = E501, W503, W504
# Exclude specific files or directories
exclude =
.git,
__pycache__,
build,
dist,
.conda
.venv
venv
# Enable specific plugins or options
# Example: Enabling flake8-docstrings
select = C,E,F,W,D

134
.gitignore vendored Normal file
View File

@ -0,0 +1,134 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
.venv/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
pyModbusTCP_old/
test.py
test_*.ipynb
settings.json

111
README.md Normal file
View File

@ -0,0 +1,111 @@
# Pyladoc
## Description
Pyladoc is a python package for programmatically generating HTML and
PDF/LaTex output. This package targets specifically applications where reports
or results with Pandas-tables and Matplotlib-figures are generated programmatically
to be displayed as website and as PDF document without any manual formatting
steps.
This package focuses on the "Document in Code" approach for cases
where a lot of calculations and data handling is done but not a lot of
document text needs to be displayed.
As backend for PDF generation LaTex is used. There are excellent engines for
rendering HTML to PDF available, but even if there is no requirement for an
accurate typesetting, placing programmatically content of variable
composition and sizes on fixed size pages without manual intervention
is a hard problem that LaTeX is very capable of.
### Sported primitives
- Text (can be Markdown or HTML formatted)
- Headings
- Tables (Pandas, Markdown or HTML)
- Matplotlib figures
- LaTex equations
- Named references for figures, tables and equation
### Key Features
- HTML and PDF/LaTex rendering of the same document
- Single file output including figures
- Figure and equation embedding in HTML by inline SVG, SVG in Base64 or PNG in Base64
- Figure embedding in LaTex as PGF/TikZ
### Usage Scenarios
- Webservices
- Report generation for lab equipment
## Installation
It can be installed with pip:
```bash
pip install pyladoc
```
## Usage
It is easy to use as the following example code shows:
```python
import pyladoc
doc = pyladoc.DocumentWriter()
doc.add_markdown("""
# Example
This is an example. The @table:pandas_example shows some random data.
""")
some_data = {
'Row1': ["Line1", "Line2", "Line3"],
'Row2': [120, 100, 110],
'Row3': ['12 g/km', '> 150 g/km', '110 g/km']
}
df = pd.DataFrame(some_data)
dw.add_table(df, 'This is a pandas example table', 'pandas_example')
html_code = doc.to_html()
doc.to_pdf('test.pdf')
```
## Example outputs
The following documents are generated by tests/test_rendering_example_doc.py:
- HTML: [test_html_render.html](tests/out/test_html_render.html)
- PDF: [test_latex_render.pdf](tests/out/test_latex_render.pdf)
## Contributing
Contributions are welcome, please open an issue or submit a pull request on GitHub.
## Developer Guide
To get started with developing the `pyladoc` package, follow these steps.
First, clone the repository to your local machine using Git:
```bash
git clone https://github.com/Nonannet/pyladoc.git
cd pyladoc
```
It's recommended to setup an venv:
```bash
python -m venv venv
source venv/bin/activate # On Windows use `venv\Scripts\activate`
```
Install the package and dev-dependencies while keeping files in the
current directory:
```bash
pip install -e .[dev]
```
Ensure that everything is set up correctly by running the tests:
```bash
pytest
```
## License
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.

58
pyproject.toml Normal file
View File

@ -0,0 +1,58 @@
[project]
name = "pyladoc"
version = "1.0.0"
authors = [
{ name="Nicolas Kruse", email="nicolas.kruse@nonan.net" },
]
description = "Package for generating HTML and PDF/latex from python code"
readme = "README.md"
requires-python = ">=3.8"
classifiers = [
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
]
dependencies = [
"markdown>=3.3.0",
"beautifulsoup4>=4.9.1"
]
[project.optional-dependencies]
dev = [
"pytest", "flake8", "mypy",
"lxml", "types-lxml",
"requests",
"matplotlib>=3.1.1",
"pandas>=2.0.0", "Jinja2",
]
[project.urls]
Homepage = "https://github.com/Nonannet/pyladoc"
Repository = "https://github.com/Nonannet/pyladoc"
Issues = "https://github.com/Nonannet/pyladoc/issues"
[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"
[tool.setuptools.packages.find]
where = ["src"]
[tool.setuptools.package-data]
pyladoc = ["templates/*"]
[tool.mypy]
files = ["src"]
strict = true
warn_return_any = true
warn_unused_configs = true
check_untyped_defs = true
no_implicit_optional = true
show_error_codes = true
[tool.pytest.ini_options]
minversion = "6.0"
addopts = "-ra -q"
testpaths = ["tests"]
pythonpath = ["src"]

570
src/pyladoc/__init__.py Normal file
View File

@ -0,0 +1,570 @@
from typing import Callable, Generator, Self, Literal, TYPE_CHECKING
import html
import markdown
from base64 import b64encode
import re
import io
from . import latex
import pkgutil
HTML_OUTPUT = 0
LATEX_OUTPUT = 1
if TYPE_CHECKING:
from pandas import DataFrame
from pandas.io.formats.style import Styler
import matplotlib.pyplot as plt
from matplotlib.figure import Figure
from matplotlib.legend import Legend as Mpl_Legend
from matplotlib.text import Text as Mpl_Text
Table = DataFrame | Styler
else:
try:
from pandas import DataFrame
except ImportError:
DataFrame = None
try:
from pandas.io.formats.style import Styler
Table = DataFrame | Styler
except ImportError:
Table = DataFrame
try:
import matplotlib.pyplot as plt
from matplotlib.figure import Figure
from matplotlib.legend import Legend as Mpl_Legend
from matplotlib.text import Text as Mpl_Text
except ImportError:
Figure = None
TRenderer = Literal['pandas', 'simple']
FFormat = Literal['svg', 'png', 'pgf']
def _get_pkgutil_string(path: str) -> str:
data = pkgutil.get_data(__name__, path)
assert data is not None
return data.decode()
def _markdown_to_html(text: str) -> str:
prep_text = re.sub(r'\u00A0', ' ', text) # non-breaking space
html = markdown.markdown(prep_text, extensions=['tables', 'fenced_code', 'def_list', 'abbr', 'sane_lists'])
return html.replace('<hr />', '<hr>')
def escape_html(text: str) -> str:
"""
Escapes special HTML characters in a given string.
Args:
text: The text to escape
Returns:
Escaped text save for inserting into HTML code
"""
ret = re.sub(r'\u00A0', '&nbsp;', text) # non-breaking space
ret = html.escape(ret)
return ' '.join(ret.strip().splitlines())
def _clean_svg(svg_text: str) -> str:
# remove all tags not alllowd for inline svg from metadata:
svg_text = re.sub(r'<metadata>.*?</metadata>', '', svg_text, flags=re.DOTALL)
# remove illegal path-tags without d attribute:
return re.sub(r'<path(?![^>]*\sd=)\s.*?/>', '', svg_text, flags=re.DOTALL)
# def _get_templ_vars(template: str) -> list[str]:
# return re.findall("<!---START (.+?)--->.*?<!---END .+?--->", template, re.DOTALL)
def _drop_indent(text: str, amount: int) -> str:
"""
Drops a specific number of indentation spaces from a multiline text.
Args:
text: The text to drop indentation from
amount: The number of indentation space characters to drop
Returns:
The text with the specified amount of indentation removed
"""
return ''.join(' ' * amount + line for line in text.splitlines(True))
def _save_figure(fig: Figure, buff: io.BytesIO, figure_format: FFormat, font_family: str | None, scale: float) -> None:
"""
Saves a matplotlib figure to a file-like object.
Args:
fig: The figure to save
buff: The file-like object to save the figure to
figure_format: The format to save the figure in (svg, png or pgf)
font_family: The font family to use for the figure
"""
def get_all_elements() -> Generator[Mpl_Text, None, None]:
for ax in fig.get_axes():
yield ax.title
yield ax.xaxis.label
yield ax.yaxis.label
yield from ax.get_xticklabels() + ax.get_yticklabels()
legend: Mpl_Legend = ax.get_legend()
if legend:
yield from legend.get_texts()
# Store current figure settings
old_state = ((e, e.get_fontfamily()) for e in get_all_elements())
old_size: tuple[float, float] = tuple(fig.get_size_inches()) # type: ignore[unused-ignore]
# Adjust figure settings
if font_family:
for e, _ in old_state:
e.set_fontfamily(font_family)
fig.set_size_inches(old_size[0] * scale, old_size[1] * scale, False)
# Render figure
backends = {'png': 'AGG', 'svg': 'SVG', 'pgf': 'PGF'}
assert figure_format in backends, 'Figure format can be pgf (vector), svg (vector) or png (raster)'
fig.savefig(buff, format=figure_format, backend=backends[figure_format]) # type: ignore[unused-ignore]
# Reset figure setting
for e, s in old_state:
e.set_fontfamily(s)
fig.set_size_inches(old_size, None, False)
def figure_to_string(fig: Figure,
figure_format: FFormat = 'svg',
font_family: str | None = None,
scale: float = 1,
alt_text: str = '',
base64: bool = False) -> str:
"""
Converts a matplotlib figure to a ascii-string. For png base64 encoding is
used in general, for svg base64 encoding can be enabled. For base64 encoded
figures a img-tag is included in the output.
Args:
fig: The figure to convert
figure_format: The format to save the figure in (svg, png or pgf)
font_family: The font family to use for the figure
scale: Scaling factor for the figure size
alt_text: The alt text for the figure
base64: If the format is svg this determine if the image is encode in base64
Returns:
The figure as ascii-string
"""
assert fig and isinstance(fig, Figure), 'fig parameter must be a matplotlib figure'
with io.BytesIO() as buff:
_save_figure(fig, buff, figure_format, font_family, scale)
buff.seek(0)
if figure_format == 'pgf':
i = buff.read(2028).find(b'\\begingroup%') # skip comments
buff.seek(max(i, 0))
return latex.to_ascii(buff.read().decode('utf-8'))
elif figure_format == 'svg' and not base64:
i = buff.read(2028).find(b'<svg') # skip xml and DOCTYPE header
buff.seek(max(i, 0))
return _clean_svg(buff.read().decode('utf-8'))
else:
image_mime = {"png": "image/png", "svg": "image/svg+xml"}
assert figure_format in image_mime, 'Unknown image format'
return '<img alt="%s" src="data:%s;charset=utf-8;base64,%s">' % \
(escape_html(alt_text),
image_mime[figure_format],
b64encode(buff.read()).decode('ascii')) # base64 assures (7-bit) ascii
def latex_to_figure(latex_code: str) -> Figure:
assert Figure, 'Matplotlib is required for rendering LaTex expressions for HTML output.' # type:ignore[truthy-function]
fig, ax = plt.subplots()
ax.set_xticks([])
ax.set_yticks([])
ax.axis('off')
text = plt.text(0.5, 0.5, f'${latex_code}$', horizontalalignment='center',
verticalalignment='center', transform=ax.transAxes)
fig.draw_without_rendering()
bbox = text.get_window_extent()
fig.set_size_inches(bbox.width / fig.dpi * 1.2, bbox.height / fig.dpi * 1.2)
return fig
def _fillin_fields(template: str, fields: dict[str, str]) -> str:
html_out = template
for variable_name, value in fields.items():
# Find indentation depths:
ret = re.search(f"^(.*?)<!---START {variable_name}--->.*?<!---END {variable_name}--->", html_out, flags=re.MULTILINE)
if ret:
indent_depths = len(ret.group(1))
html_out = html_out[:ret.start(0)] + _drop_indent(value, indent_depths) + html_out[ret.end(0):]
return html_out
def _fillin_reference_names(input_string: str, item_index: dict[str, int]) -> str:
replacements = [(*m.span(), m.group()) for m in re.finditer(r'(?<=@)\w+:[\w\_\-]+', input_string)]
ret: list[str] = []
current_pos = 0
for start, end, ref in replacements:
assert ref in item_index, f"Reference {ref} does not exist in the document"
ret.append(input_string[current_pos:start - 1])
ret.append(str(item_index[ref]))
current_pos = end
return ''.join(ret) + input_string[current_pos:]
def _check_latex_references(input_string: str, item_index: dict[str, int]) -> str:
replacements = [m.group() for m in re.finditer(r'(?<=\\ref\{)\w+:[\w\_\\\-]+(?=\})', input_string)]
escaped_items = set(latex.normalize_label_text(item) for item in item_index)
for ref in replacements:
assert ref in escaped_items, f"Reference {ref} does not exist in the document"
return input_string
def _normalize_text_indent(text: str) -> str:
text_lines = text.splitlines()
if len(text_lines) > 1 and not text_lines[0].strip():
text_lines = text_lines[1:]
if not text_lines:
return ''
if len(text_lines) > 1 and text_lines[0] and text_lines[0][0] != ' ':
indent_amount = len(text_lines[1]) - len(text_lines[1].lstrip())
else:
indent_amount = len(text_lines[0]) - len(text_lines[0].lstrip())
return '\n'.join(
[' ' * max(0, len(line) - len(line.strip()) - indent_amount) + line.strip()
for line in text_lines])
def _create_document_writer() -> 'DocumentWriter':
new_dwr = DocumentWriter()
return new_dwr
def inject_to_template(content: str, template_path: str = '', internal_template: str = '') -> str:
"""
injects a content string into a template. The placeholder <!--CONTENT-->
will be replaced by the content. If the placeholder is prefixed with a
'%' comment character, this character will be replaced as well.
Args:
template_path: Path to a template file
internal_template: Path to a internal default template
Returns:
Template with included content
"""
if template_path:
with open(template_path, 'r') as f:
template = f.read()
elif internal_template:
template = _get_pkgutil_string(internal_template)
else:
raise Exception('No template provided')
assert '<!--CONTENT-->' in template, 'No <!--CONTENT--> expression in template located'
prep_template = re.sub(r"\%?\s*<!--CONTENT-->", '<!--CONTENT-->', template)
return prep_template.replace('<!--CONTENT-->', content)
class DocumentWriter():
"""
A class to create a document for exporting to HTML or LaTeX.
"""
def __init__(self) -> None:
self._doc: list[list[Callable[[], str]]] = []
self._fields: dict[str, DocumentWriter] = dict()
self._base64_svgs: bool = False
self._figure_format: FFormat = 'svg'
self._table_renderer: TRenderer = 'simple'
self._font_family: str | None = None
self._item_count: dict[str, int] = {}
self._item_index: dict[str, int] = {}
self._fig_scale: float = 1
def _add_item(self, ref_id: str, ref_type: str, caption_prefix: str) -> str:
current_index = self._item_count.get(ref_type, 0) + 1
if not ref_id:
ref_id = str(current_index)
self._item_index[f"{ref_type}:{ref_id}"] = current_index
self._item_count[ref_type] = current_index
return caption_prefix.format(current_index)
def new_field(self, name: str) -> 'DocumentWriter':
new_dwr = _create_document_writer()
self._fields[name] = new_dwr
return new_dwr
def add_document(self, doc: Self) -> None:
self._doc += doc._doc
def add_diagram(self, fig: Figure, caption: str = '', ref_id: str = '',
prefix_pattern: str = 'Figure {}: ', ref_type: str = 'fig',
centered: bool = True) -> None:
caption_prefix = self._add_item(ref_id, ref_type, prefix_pattern)
def render_to_html() -> str:
return '<div class="figure">%s%s</div>' % (
figure_to_string(fig, self._figure_format, base64=self._base64_svgs, scale=self._fig_scale),
'<br>' + caption_prefix + escape_html(caption) if caption else '')
def render_to_latex() -> str:
return '\\begin{figure}%s\n%s\n\\caption{%s}\n%s\\end{figure}' % (
'\n\\centering' if centered else '',
figure_to_string(fig, 'pgf', self._font_family, scale=self._fig_scale),
latex.escape_text(caption),
'\\label{%s}\n' % latex.normalize_label_text(ref_type + ':' + ref_id) if ref_id else '')
self._doc.append([render_to_html, render_to_latex])
def add_table(self, table: Table, caption: str = '', ref_id: str = '',
prefix_pattern: str = 'Table {}: ', ref_type: str = 'table', centered: bool = True) -> None:
assert Table and isinstance(table, Table), 'Table has to be a pandas DataFrame oder DataFrame Styler'
caption_prefix = self._add_item(ref_id, ref_type, prefix_pattern)
styler = table if isinstance(table, Styler) else getattr(table, 'style', None)
assert isinstance(styler, Styler), 'Jinja2 package is required for rendering tables'
def render_to_html() -> str:
html_string = styler.to_html(table_uuid=ref_id, caption=caption_prefix + escape_html(caption))
return re.sub(r'<style.*?>.*?</style>', '', html_string, flags=re.DOTALL)
def render_to_latex() -> str:
if self._table_renderer == 'pandas':
return styler.to_latex(
label=latex.normalize_label_text(ref_type + ':' + ref_id),
hrules=True,
convert_css=True,
siunitx=True,
caption=latex.escape_text(caption),
position_float='centering' if centered else None)
else:
return latex.render_pandas_styler_table(styler, caption, ref_type + ':' + ref_id, centered)
self._doc.append([render_to_html, render_to_latex])
def add_text(self, text: str, section_class: str = '') -> None:
"""
Adds a text paragraph to the document.
Args:
text: The text to add
section_class: The class for the paragraph
"""
norm_text = _normalize_text_indent(text)
def render_to_html() -> str:
html = '<p>' + escape_html(norm_text) + '</p>'
if section_class:
return '<div class="' + section_class + '">' + html + '</div>'
else:
return html
def render_to_latex() -> str:
return latex.from_html(render_to_html())
self._doc.append([render_to_html, render_to_latex])
def add_html(self, text: str) -> None:
"""
Adds HTML formatted text to the document. For the LaTeX
export only basic HTML for text formatting and tables
is supported.
Args:
text: The HTML to add to the document
"""
def render_to_html() -> str:
return text
def render_to_latex() -> str:
return latex.from_html(text)
self._doc.append([render_to_html, render_to_latex])
def add_h1(self, text: str) -> None:
"""
Adds a h1 heading to the document.
Args:
text: The text of the heading
"""
def render_to_html() -> str:
return '<h1>' + escape_html(text) + '</h1>'
def render_to_latex() -> str:
return '\\section{' + latex.escape_text(text) + '}\n'
self._doc.append([render_to_html, render_to_latex])
def add_h2(self, text: str) -> None:
"""
Adds a h2 heading to the document.
Args:
text: The text of the heading
"""
def render_to_html() -> str:
return '<h2>' + escape_html(text) + '</h2>'
def render_to_latex() -> str:
return '\\subsection{' + latex.escape_text(text) + '}\n'
self._doc.append([render_to_html, render_to_latex])
def add_h3(self, text: str) -> None:
"""
Adds a h3 heading to the document.
Args:
text: The text of the heading
"""
def render_to_html() -> str:
return '<h3>' + escape_html(text) + '</h3>'
def render_to_latex() -> str:
return '\\subsubsection{' + latex.escape_text(text) + '}\n'
self._doc.append([render_to_html, render_to_latex])
def add_equation(self, latex_equation: str, ref_id: str = '', ref_type: str = 'eq') -> None:
"""
Adds a LaTeX equation to the document.
Args:
latex_equation: LaTeX formatted equation
ref_id: If provided, the equation is displayed with
a number and can be referenced by the ref_id
"""
caption = self._add_item(ref_id, ref_type, '({})')
def render_to_html() -> str:
fig = latex_to_figure(latex_equation)
return ('<div class="equation-container"><div class="equation">%s</div>'
'<div class="equation-number">%s</div></div>') % (
figure_to_string(fig, self._figure_format, base64=self._base64_svgs),
caption)
def render_to_latex() -> str:
if ref_id:
return '\\begin{equation}\\label{%s:%s}%s\\end{equation}' % (
ref_type, ref_id, latex_equation)
else:
return '\\[%s\\]' % latex_equation
self._doc.append([render_to_html, render_to_latex])
def add_markdown(self, text: str, section_class: str = '') -> None:
"""
Adds a markdown formatted text to the document.
Args:
text: The markdown text to add
section_class: The class for the text section
"""
norm_text = _normalize_text_indent(str(text))
def render_to_html() -> str:
html = _markdown_to_html(norm_text)
if section_class:
return '<div class="' + section_class + '">' + html + '</div>'
else:
return html
def render_to_latex() -> str:
return latex.from_html(render_to_html())
self._doc.append([render_to_html, render_to_latex])
def _render_doc(self, doc_type: int) -> str:
fields = {k: f.to_html() for k, f in self._fields.items()}
return _fillin_fields(''.join(el[doc_type]() for el in self._doc), fields)
def to_html(self, figure_format: FFormat = 'svg',
base64_svgs: bool = False, figure_scale: float = 1) -> str:
"""
Export the document to HTML. Figures will bew embedded in the HTML code.
The format can be selected between png in base64, inline svg or svg in base64.
Args:
figure_format: The format for embedding the figures in the HTML code (svg or png)
base64_svgs: Whether to encode svg images in base64
Returns:
The HTML code
"""
self._figure_format = figure_format
self._base64_svgs = base64_svgs
self._fig_scale = figure_scale
return _fillin_reference_names(self._render_doc(HTML_OUTPUT), self._item_index)
def to_latex(self, font_family: Literal[None, 'serif', 'sans-serif'] = None,
table_renderer: TRenderer = 'simple', figure_scale: float = 1) -> str:
"""
Export the document to LaTeX. Figures will be embedded as pgf graphics.
Args:
font_family: Overwrites the front family for figures
table_renderer: The renderer for tables (simple: renderer with column type
guessing for text and numbers; pandas: using the internal pandas LaTeX renderer)
Returns:
The LaTeX code
"""
self._font_family = font_family
assert table_renderer in ['simple', 'pandas'], "table_renderer must be 'simple' or 'pandas'"
self._table_renderer = table_renderer
self._fig_scale = figure_scale
return _check_latex_references(self._render_doc(LATEX_OUTPUT), self._item_index)
def to_pdf(self, file_path: str,
font_family: Literal[None, 'serif', 'sans-serif'] = None,
table_renderer: TRenderer = 'simple',
latex_template_path: str = '') -> bool:
"""
Export the document to a PDF file using LaTeX.
Args:
file_path: The path to save the PDF file to
font_family: Overwrites the front family for figures and the template
latex_template_path: Path to a LaTeX template file. The
expression <!--CONTENT--> will be replaced by the generated content.
If no path is provided a default template is used.
Returns:
True if the PDF was successfully created
"""
latex_code = inject_to_template(self.to_latex(font_family, table_renderer),
latex_template_path,
'templates/default_template.tex')
if font_family == 'sans-serif':
latex_code = latex.inject_latex_command(latex_code, '\\renewcommand{\\familydefault}{\\sfdefault}')
success, errors, warnings = latex.compile(latex_code, file_path)
if not success:
print('Errors:')
print('\n'.join(errors))
print('Warnings:')
print('\n'.join(warnings))
return success
def _repr_html_(self) -> str:
return self.to_html()
def __repr__(self) -> str:
return self.to_html()

346
src/pyladoc/latex.py Normal file
View File

@ -0,0 +1,346 @@
import bs4
from html.parser import HTMLParser
from typing import Iterator, Generator, Any
from pandas.io.formats.style import Styler
import re
import os
import shutil
import subprocess
import tempfile
from .latex_escaping import unicode_to_latex_dict, latex_escape_dict
def basic_formatter(value: Any) -> str:
return escape_text(str(value))
def to_ascii(text: str) -> str:
"""
Replaces/escapes often used unicode characters in latex code or text
with its LaTex ascii equivalents.
Args:
text: The text to convert.
Returns:
The escaped text.
"""
regex_filter = ('|'.join(unicode_to_latex_dict))
last_s = 0
ret: list[str] = []
for m in re.finditer(regex_filter, text):
s1, s2 = m.span()
ret.append(text[last_s:s1])
ret.append(unicode_to_latex_dict[m.group()])
last_s = s2
ret.append(text[last_s:])
return ''.join(ret)
def normalize_label_text(text: str) -> str:
"""
Replace any special non-allowed character in the lable text.
Args:
text: Input text
Returns:
Normalized text
"""
return re.sub(r"[^a-zA-Z0-9.:]", '-', text)
def escape_text(text: str) -> str:
"""
Escapes special LaTeX characters and often used unicode characters in a given string.
Args:
text: The text to escape
Returns:
Escaped text
"""
latex_translation = latex_escape_dict | unicode_to_latex_dict
regex_filter = '|'.join(latex_translation)
last_s = 0
ret: list[str] = []
for m in re.finditer(regex_filter, text):
s1, s2 = m.span()
ret.append(text[last_s:s1])
matches = [v for k, v in latex_translation.items() if re.match(k, m.group())]
if m.group(1):
ret.append(matches[0].replace(r'\g<1>', normalize_label_text(m.group(1))))
else:
ret.append(matches[0])
last_s = s2
ret.append(text[last_s:])
return ''.join(ret)
def render_pandas_styler_table(df_style: Styler, caption: str = '', label: str = '', centering: bool = True) -> str:
"""
Converts a pandas Styler object to LaTeX table.
Args:
df_style: The pandas Styler object to convert.
caption: The caption for the table.
label: Label for referencing the table.
centering: Whether to center the table.
Returns:
The LaTeX code.
"""
def iter_table(table: dict[str, Any]) -> Generator[str, None, None]:
yield '\\begin{table}\n'
if centering:
yield '\\centering\n'
# Guess column type
numeric = re.compile(r'^[<>]?\s*(?:\d+,?)+(?:\.\d+)?(?:\s\D.*)?$')
formats = ['S' if all(
(numeric.match(line[ci]['display_value'].strip()) for line in table['body'])
) else 'l' for ci in range(len(table['body'][0])) if table['body'][0][ci]['is_visible']]
if caption:
yield f"\\caption{{{escape_text(caption)}}}\n"
if label:
yield f"\\label{{{normalize_label_text(label)}}}\n"
yield f"\\begin{{tabular}}{{{''.join(formats)}}}\n\\toprule\n"
for head in table['head']:
yield (' & '.join(f"\\text{{{escape_text(c['display_value'].strip())}}}"
for c in head if c['is_visible']))
yield ' \\\\\n'
yield '\\midrule\n'
for body in table['body']:
yield (' & '.join(escape_text(c['display_value'].strip())
for c in body if c['is_visible']))
yield ' \\\\\n'
yield '\\bottomrule\n\\end{tabular}\n\\end{table}'
str_list = iter_table(df_style._translate(False, False, blank='')) # type: ignore[attr-defined]
return ''.join(str_list)
def from_html_old(html_code: str) -> str:
"""
Converts HTML code to LaTeX code.
Args:
html_code: The HTML code to convert.
Returns:
The LaTeX code.
"""
root = bs4.BeautifulSoup(html_code, 'html.parser')
html_to_latex = {
'strong': ('\\textbf{', '}'),
'b': ('\\textbf{', '}'),
'em': ('\\emph{', '}'),
'i': ('\\emph{', '}'),
'p': ('', '\n\n'),
'h1': ('\\section{', '}'),
'h2': ('\\subsection{', '}'),
'h3': ('\\subsubsection{', '}'),
'ul': ('\\begin{itemize}', '\\end{itemize}'),
'ol': ('\\begin{enumerate}', '\\end{enumerate}'),
'li': ('\\item ', ''),
'latex_eq': ('\\[', '\\]'),
}
def handle_table(table: bs4.element.Tag) -> str:
rows = table.find_all('tr')
latex_table: str = ''
for row in rows:
assert isinstance(row, bs4.element.Tag), 'HTML table not valid'
cells = row.find_all(['th', 'td'])
if not latex_table:
latex_table = "\\begin{tabular}{|" + "|".join(['l'] * len(cells)) + "|}\\toprule\n"
else:
latex_table += " & ".join(escape_text(cell.get_text(strip=True)) for cell in cells) + " \\\\\n"
latex_table += "\\bottomrule\n\\end{tabular}"
return latex_table
def parse_node(element: bs4.element.Tag) -> Iterator[str]:
prefix, post = html_to_latex.get(element.name, ('', ''))
yield prefix
for c in element.children:
if isinstance(c, bs4.element.Tag):
if c.name == 'table':
yield handle_table(c)
else:
yield from parse_node(c)
else:
yield escape_text(c.text)
yield post
return ''.join(parse_node(root))
def from_html(html_code: str) -> str:
"""
Converts HTML code to LaTeX code using HTMLParser.
Args:
html_code: The HTML code to convert.
Returns:
The LaTeX code.
"""
html_to_latex = {
'strong': ('\\textbf{', '}'),
'b': ('\\textbf{', '}'),
'em': ('\\emph{', '}'),
'i': ('\\emph{', '}'),
'p': ('', '\n\n'),
'h1': ('\\section{', '}\n'),
'h2': ('\\subsection{', '}\n'),
'h3': ('\\subsubsection{', '}\n'),
'ul': ('\\begin{itemize}\n', '\\end{itemize}\n'),
'ol': ('\\begin{enumerate}\n', '\\end{enumerate}\n'),
'li': ('\\item ', '\n')
}
class LaTeXHTMLParser(HTMLParser):
def __init__(self) -> None:
super().__init__()
self.latex_code: list[str] = []
self.header_index: int = -1
self.column_alignment = ''
self.midrule_flag = False
self.header_flag = False
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
if tag in html_to_latex:
prefix, _ = html_to_latex[tag]
self.latex_code.append(prefix)
elif tag == 'table':
self.header_index = len(self.latex_code)
self.latex_code.append('') # Placeholder for column header
self.midrule_flag = False
self.header_flag = False
elif tag == 'tr':
self.column_alignment = ''
elif tag in ['th', 'td']:
style = [v for k, v in attrs if k == 'style']
if style and style[0] and 'right' in style[0]:
self.column_alignment += 'r'
else:
self.column_alignment += 'l'
elif tag == 'a':
href = [v for k, v in attrs if k == 'href']
assert href, 'Link href attribute is missing'
self.latex_code.append(f"\\href{{{href[0]}}}{{")
def handle_endtag(self, tag: str) -> None:
if tag in html_to_latex:
_, postfix = html_to_latex[tag]
self.latex_code.append(postfix)
elif tag == 'table':
self.latex_code.append("\\bottomrule\n\\end{tabular}\n")
elif tag == 'tr':
self.latex_code.pop() # Remove column separator after last entry
if self.header_index >= 0:
self.latex_code[self.header_index] = f"\\begin{{tabular}}{{{self.column_alignment}}}\\toprule\n"
self.header_index = -1
self.latex_code.append(' \\\\\n')
if self.header_flag and not self.midrule_flag:
self.latex_code.append("\\midrule\n")
self.midrule_flag = True
elif tag == 'th':
self.latex_code.append(" & ")
self.header_flag = True
elif tag == 'td':
self.latex_code.append(" & ")
elif tag == 'a':
self.latex_code.append("}")
def handle_data(self, data: str) -> None:
if data.strip():
self.latex_code.append(escape_text(data))
parser = LaTeXHTMLParser()
parser.feed(html_code)
return ''.join(parser.latex_code)
def compile(latex_code: str, output_file: str = '', encoding: str = 'utf-8') -> tuple[bool, list[str], list[str]]:
"""
Compiles LaTeX code to a PDF file.
Args:
latex_code: The LaTeX code to compile.
output_file: The output file path.
encoding: The encoding of the LaTeX code.
Returns:
A tuple with three elements:
- A boolean indicating whether the compilation was successful.
- A list of errors.
- A list of warnings.
"""
with tempfile.TemporaryDirectory() as tmp_path:
command = ['pdflatex', '-halt-on-error', '--output-directory', tmp_path]
errors: list[str] = []
warnings: list[str] = []
for i in range(1, 4):
rerun_flag = False
error_flag = False
process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
output, error = process.communicate(input=latex_code.encode(encoding))
assert not error, 'Compilation error: ' + output.decode(encoding)
for line in output.decode(encoding).split('\n'):
if 'Warning' in line:
warnings.append(f"Run {i}: " + line)
if 'reference' in line:
rerun_flag = True
if line.startswith('!') or line.startswith('*!'):
error_flag = True
if error_flag:
errors.append(line)
if not rerun_flag or errors:
break
# Copy pdf file
file_list = [f for f in os.listdir(tmp_path) if f.lower().endswith('.pdf')]
if file_list:
pdf_file = os.path.join(tmp_path, file_list[0])
if output_file:
shutil.copyfile(pdf_file, output_file)
return not errors, errors, warnings
def inject_latex_command(text: str, command: str) -> str:
lines = text.splitlines()
last_package_index = -1
for i, line in enumerate(lines):
if line.strip().startswith("\\usepackage"):
last_package_index = i
if last_package_index != -1:
lines.insert(last_package_index + 1, f"\n{command}\n")
else:
lines.append(f"\n{command}\n")
return '\n'.join(lines)

View File

@ -0,0 +1,89 @@
unicode_to_latex_dict = {
# Unicode numeric subscripts
'': r'\textsubscript{0}', '': r'\textsubscript{1}', '': r'\textsubscript{2}', '': r'\textsubscript{3}',
'': r'\textsubscript{4}', '': r'\textsubscript{5}', '': r'\textsubscript{6}', '': r'\textsubscript{7}',
'': r'\textsubscript{8}', '': r'\textsubscript{9}',
# Unicode numeric superscripts
'': r'\textsuperscript{0}', '¹': r'\textsuperscript{1}', '²': r'\textsuperscript{2}', '³': r'\textsuperscript{3}',
'': r'\textsuperscript{4}', '': r'\textsuperscript{5}', '': r'\textsuperscript{6}', '': r'\textsuperscript{7}',
'': r'\textsuperscript{8}', '': r'\textsuperscript{9}', '': r'\textsuperscript{+}', '': r'\textsuperscript{-}',
# Often used European non-ascii-characters
'ä': r'{\"a}',
'ö': r'{\"o}',
'ü': r'{\"u}',
'Ä': r'{\"A}',
'Ö': r'{\"O}',
'Ü': r'{\"U}',
'ß': r'{\ss}',
'é': r"{\'e}",
'è': r"{\`e}",
'ê': r"{\^e}",
'à': r"{\`a}",
'â': r"{\^a}",
'ç': r"{\c{c}}",
'É': r"{\'E}",
'È': r"{\`E}",
'Ê': r"{\^E}",
'À': r"{\`A}",
'Â': r"{\^A}",
'Ç': r"{\c{C}}",
'ó': r"{\'o}",
'ò': r"{\`o}",
'ô': r"{\^o}",
'Ó': r"{\'O}",
'Ò': r"{\`O}",
'Ô': r"{\^O}",
'í': r"{\'i}",
'ì': r"{\`i}",
'î': r"{\^i}",
'Í': r"{\'I}",
'Ì': r"{\`I}",
'Î': r"{\^I}",
'ú': r"{\'u}",
'ù': r"{\`u}",
'û': r"{\^u}",
'Ú': r"{\'U}",
'Ù': r"{\`U}",
'Û': r"{\^U}",
'å': r"{\r{a}}",
'Å': r"{\r{A}}",
'ø': r"{\o}",
'Ø': r"{\O}",
'æ': r"{\ae}",
'Æ': r"{\AE}",
'œ': r"{\oe}",
'Œ': r"{\OE}",
# Other unicode
'°': r'{\textdegree}',
'µ': r'{\textmu}',
'π': r'$\pi$',
'': r'$\approx$',
'±': r'$\pm$',
'': r'$\neq$',
'': r'$\Delta$',
'Ω': r'$\Omega$',
'Λ': r'$\Lambda$',
'Σ': r'$\Sigma$',
# '€': r'{\euro}',
'£': r'{\pounds}',
'¥': r'{\yen}',
'\u00A0': r'~', # Non-breaking space
'\u2007': ' ' # Figure space
}
latex_escape_dict = {
'&': r'\&',
'%': r'\%',
r'\$': r'\$',
'#': r'\#',
'_': r'\_',
'{': r'\{',
'}': r'\}',
'<': r'{\textless}',
'>': r'{\textgreater}',
'~': r'\textasciitilde{}',
r'\^': r'\textasciicircum{}',
r'\\': r'\textbackslash{}',
# References:
r'@(\w+:[\w\_\-]+)': r'\ref{\g<1>}'
}

View File

@ -0,0 +1,38 @@
\documentclass[a4paper,12pt]{article}
% Packages
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{lmodern} % Load Latin Modern font
\usepackage{graphicx} % For including images
\usepackage{amsmath} % For mathematical symbols
\usepackage{amssymb} % For additional symbols
\usepackage{hyperref} % For hyperlinks
\usepackage{caption} % For customizing captions
\usepackage{geometry} % To set margins
\usepackage{natbib} % For citations
\usepackage{float} % For fixing figure positions
\usepackage{siunitx} % For scientific units
\usepackage{booktabs} % For professional-looking tables
\usepackage{pgf} % For using pgf grafics
\usepackage{textcomp, gensymb} % provides \degree symbol
\sisetup{
table-align-text-post = false
}
% Geometry Settings
\geometry{margin=1in} % 1-inch margins
% Title and Author Information
% \title{Report Title}
% \author{Your Name \\ Department of XYZ \\ \texttt{email@example.com}}
% \date{\today}
\begin{document}
% Title Page
% # \maketitle
% <!--CONTENT-->
\end{document}

View File

@ -0,0 +1,114 @@
<!doctype html>
<html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Test template</title>
<style>
body
{
background-color: white;
font-family: Lucida Grande,Lucida Sans Unicode,Lucida Sans,Geneva,Verdana,sans-serif;
}
div.document
{
max-width: 820px;
top: 20px;
overflow: visible;
margin: 0 auto;
width: 90%;
padding-bottom: 50px;
}
div h1
{
font-size: 32px;
font-weight: normal;
margin-bottom: 10px;
margin-top: 24px;
color: black;
}
div h2
{
font-size: 24px;
font-weight: normal;
margin-bottom: 10px;
margin-top: 24px;
color: black;
}
div.figure {
text-align: center;
}
div svg {
margin-left: -5%;
max-width: 110%;
height: auto;
object-fit: contain;
}
.equation-container {
display: flex;
justify-content: space-between;
align-items: center;
width: 100%;
}
.equation {
text-align: center;
width: 100%;
}
.equation-number {
text-align: right;
}
table {
border-collapse: collapse;
margin: 20px auto;
}
table th,
table td {
padding: 8px;
font-variant-numeric: tabular-nums;
}
table th {
background-color: #f4f4f4;
font-weight: bold;
}
/* Alternating row colors for readability */
table tr:nth-child(even) {
background-color: #f9f9f9;
}
table tr:nth-child(odd) {
background-color: #ffffff;
}
/* Caption styling */
table caption {
caption-side: top;
font-weight: bold;
font-size: 16px;
margin-bottom: 10px;
}
/* Fine-tuning text in table */
table td, table th {
vertical-align: middle;
line-height: 1.5;
}
</style>
</head>
<body>
<div class="document">
<!--CONTENT-->
</div>
</body>

View File

@ -0,0 +1,64 @@
from typing import Generator, Any
from lxml import etree
from lxml.etree import _Element as EElement # type: ignore
import requests
with open('src/pyladoc/templates/test_template.html', mode='rt', encoding='utf-8') as f:
html_test_template = f.read()
def add_line_numbers(multiline_string: str) -> str:
lines = multiline_string.splitlines()
numbered_lines = [f"{i + 1}: {line}" for i, line in enumerate(lines)]
return "\n".join(numbered_lines)
def validate_html_with_w3c(html_string: str) -> dict[str, Any]:
validator_url = "https://validator.w3.org/nu/"
# Parameters for the POST request
headers = {
"Content-Type": "text/html; charset=utf-8",
"User-Agent": "Python HTML Validator"}
try:
response = requests.post(validator_url, headers=headers, data=html_string, params={"out": "json"})
if response.status_code == 200:
return response.json()
else:
return {
"error": f"Failed to validate HTML. Status code: {response.status_code}",
"details": response.text
}
except requests.RequestException as e:
return {"error": f"An error occurred while connecting to the W3C Validator: {str(e)}"}
def validate_html(html_string: str, validate_online: bool = False, check_for: list['str'] = ['table', 'svg', 'div']):
root = etree.fromstring(html_string, parser=etree.HTMLParser(recover=True))
def recursive_search(element: EElement) -> Generator[str, None, None]:
if isinstance(element.tag, str):
yield element.tag
for child in element:
yield from recursive_search(child)
tags = set(recursive_search(root))
for tag_type in check_for:
assert tag_type in tags, f"Tag {tag_type} not found in the html code"
if validate_online:
test_page = html_test_template.replace('<!--CONTENT-->', html_string)
validation_result = validate_html_with_w3c(test_page)
assert 'messages' in validation_result, 'Validate request failed'
if validation_result['messages']:
print(add_line_numbers(test_page))
for verr in validation_result['messages']:
print(f"- {verr['type']}: {verr['message']} (line: {verr['lastLine']})")
assert len(validation_result['messages']) == 0, f'{len(validation_result["messages"])} validation error, first error: {validation_result["messages"][0]["message"]}'

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -0,0 +1,5 @@
<h1>Special caracters</h1>
<p>Umlaute: ÖÄÜ öäü</p>
<p>Other: ß, €, @, $, %, ~, µ</p>
<p>Units: m³, cm²</p>
<p>Controll characters: &lt;, &gt;, ", ', &amp;, |, /, \</p>

View File

@ -0,0 +1,39 @@
<h1>Source Equations</h1>
<ol>
<li>$4(3x + 2) - 5(x - 1) = 3x + 14$</li>
<li>$
rac{2y + 5}{4} +
rac{3y - 1}{2} = 5$</li>
<li>$
rac{5}{x + 2} +
rac{2}{x - 2} = 3$</li>
<li>$8(3b - 5) + 4(b + 2) = 60$</li>
<li>$2c^2 - 3c - 5 = 0$</li>
<li>$4(2d - 1) + 5(3d + 2) = 7d + 28$</li>
<li>$q^2 + 6q + 9 = 16$</li>
</ol>
<h1>Result Equations</h1>
<ol>
<li>$x =
rac{1}{4}$</li>
<li>$y =
rac{17}{8}$</li>
<li>$z =
rac{7}{3}$</li>
<li>$x = 1$ or $x = -6$</li>
<li>$a =
rac{1}{3}$ or $a = 2$</li>
<li>$x = -
rac{2}{3}$ or $x = 3$</li>
<li>$b =
rac{23}{7}$</li>
</ol>
<h1>Step by Step</h1>
<ol>
<li>Distribute: $12x + 8 - 5x + 5 = 3x + 14$</li>
<li>Combine like terms: $7x + 13 = 3x + 14$</li>
<li>Subtract $3x$: $4x + 13 = 14$</li>
<li>Subtract $13$: $4x = 1$</li>
<li>Divide by $4$: $x =
rac{1}{4}$</li>
</ol>

View File

@ -0,0 +1,44 @@
<p>Below is an in-depth explanation of the AArch64 (ARM64)
unconditional branch instruction—often simply called the
“B” instruction—and how its 26bit immediate field (imm26)
is laid out and later relocated during linking.</p>
<hr>
<h2>Instruction Layout</h2>
<p>The unconditional branch in AArch64 is encoded in a 32bit
instruction. Its layout is as follows:</p>
<pre><code>Bits: 31 26 25 0
+-------------+------------------------------+
| Opcode | imm26 |
+-------------+------------------------------+
</code></pre>
<ul>
<li><strong>Opcode (bits 31:26):</strong></li>
<li>For a plain branch (<code>B</code>), the opcode is <code>000101</code>.</li>
<li>
<p>For a branch with link (<code>BL</code>), which saves the return
address (i.e., a call), the opcode is <code>100101</code>.
These 6 bits determine the instruction type.</p>
</li>
<li>
<p><strong>Immediate Field (imm26, bits 25:0):</strong></p>
</li>
<li>This 26bit field holds a signed immediate value.</li>
<li>
<p><strong>Offset Calculation:</strong> At runtime, the processor:</p>
<ol>
<li><strong>Shifts</strong> the 26bit immediate left by 2 bits.
(Because instructions are 4-byte aligned,
the two least-significant bits are always zero.)</li>
<li><strong>Sign-extends</strong> the resulting 28bit value to
the full register width (typically 64 bits).</li>
<li><strong>Adds</strong> this value to the program counter
(PC) to obtain the branch target.</li>
</ol>
</li>
<li>
<p><strong>Reach:</strong></p>
</li>
<li>With a 26bit signed field thats effectively 28 bits
after the shift, the branch can cover a range
of approximately ±128&nbsp;MB from the current instruction.</li>
</ul>

View File

@ -0,0 +1,77 @@
<h2>Klemmen</h2>
<table>
<thead>
<tr>
<th style="text-align: right;">Anz.</th>
<th>Typ</th>
<th>Beschreibung</th>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align: right;">12</td>
<td>BK9050</td>
<td>Buskoppler</td>
</tr>
<tr>
<td style="text-align: right;">2</td>
<td>KL1104</td>
<td>4 Digitaleingänge</td>
</tr>
<tr>
<td style="text-align: right;">2</td>
<td>KL2404</td>
<td>4 Digitalausgänge (0,5 A)</td>
</tr>
<tr>
<td style="text-align: right;">3</td>
<td>KL2424</td>
<td>4 Digitalausgänge (2 A)</td>
</tr>
<tr>
<td style="text-align: right;">2</td>
<td>KL4004</td>
<td>4 Analogausgänge</td>
</tr>
<tr>
<td style="text-align: right;">1</td>
<td>KL4002</td>
<td>2 Analogausgänge</td>
</tr>
<tr>
<td style="text-align: right;">22</td>
<td>KL9188</td>
<td>Potenzialverteilungsklemme</td>
</tr>
<tr>
<td style="text-align: right;">1</td>
<td>KL9100</td>
<td>Potenzialeinspeiseklemme</td>
</tr>
<tr>
<td style="text-align: right;">3</td>
<td>KL3054</td>
<td>4 Analogeingänge</td>
</tr>
<tr>
<td style="text-align: right;">5</td>
<td>KL3214</td>
<td>PT100 4 Temperatureingänge (3-Leiter)</td>
</tr>
<tr>
<td style="text-align: right;">3</td>
<td>KL3202</td>
<td>PT100 2 Temperatureingänge (3-Leiter)</td>
</tr>
<tr>
<td style="text-align: right;">1</td>
<td>KL2404</td>
<td>4 Digitalausgänge</td>
</tr>
<tr>
<td style="text-align: right;">2</td>
<td>KL9010</td>
<td>Endklemme</td>
</tr>
</tbody>
</table>

151
tests/test_latex_tools.py Normal file
View File

@ -0,0 +1,151 @@
import pyladoc.latex
def normalize_latex_code(latex_code: str) -> str:
return '\n'.join(line.strip() for line in latex_code.splitlines() if line)
def check_only_ascii(latex_code: str) -> bool:
return all(ord(c) < 128 for c in latex_code)
def test_latex_from_html():
html_code = """
<h1>Test</h1>
<p>This is are Umlautes: Ä,Ö and Ü</p>
<p>This is a <b>test</b>.</p>
<p>And this is another <em>test</em>.</p>
<p>And this is a <strong>third</strong> test.</p>
<p>And this is a <i>fourth</i> test.</p>
<p>This is a LaTeX command: \\textbf{test}</p>
<p>This are typical control characters: {, }, <, >, ", ', &, |, /, \\</p>
<ul>
<li>Item 1</li>
<li>Item 2</li>
</ul>
<table>
<tr>
<th>Header 1</th>
<th>Header 2</th>
</tr>
<tr>
<td>Cell 1</td>
<td>Cell 2</td>
</tr>
</table>
"""
latex_code = pyladoc.latex.from_html(html_code)
ref_latex_code = r"""
\section{Test}
This is are Umlautes: {\"A},{\"O} and {\"U}
This is a \textbf{test}.
And this is another \emph{test}.
And this is a \textbf{third} test.
And this is a \emph{fourth} test.
This is a LaTeX command: \textbackslash{}textbf\{test\}
This are typical control characters: \{, \}, {\textless}, {\textgreater}, ", ', \&, |, /, \textbackslash{}
\begin{itemize}
\item Item 1
\item Item 2
\end{itemize}
\begin{tabular}{ll}\toprule
Header 1 & Header 2 \\
\midrule
Cell 1 & Cell 2 \\
\bottomrule
\end{tabular}"""
print(latex_code)
print('--')
# print(pyladoc.latex.escape_text(html_code))
assert check_only_ascii(latex_code), 'Some characters are not ASCII'
assert normalize_latex_code(ref_latex_code) == normalize_latex_code(latex_code)
def test_latex_from_markdown():
markdown_code = """
## Test1
| Anz.| Typ | Beschreibung
|----:|----------|------------------------------------
| 12 | BK9050 | Buskoppler
| 2 | KL1104 | 4 Digitaleingänge
| 2 | KL2404 | 4 Digitalausgänge (0,5 A)
| 3 | KL2424 | 4 Digitalausgänge (2 A)
| 2 | KL4004 | 4 Analogausgänge
| 1 | KL4002 | 2 Analogausgänge
| 22 | KL9188 | Potenzialverteilungsklemme
| 1 | KL9100 | Potenzialeinspeiseklemme
| 3 | KL3054 | 4 Analogeingänge
| 5 | KL3214 | PT100 4 Temperatureingänge (3-Leiter)
| 3 | KL3202 | PT100 2 Temperatureingänge (3-Leiter)
| 1 | KL2404 | 4 Digitalausgänge
| 2 | KL9010 | Endklemme
This is a **test**.
## Test2
| Anz.| Beschreibung
|----:|------------------------------------
| 12 | Buskoppler
| 2 | 4 Digitaleingänge
| 2 | 4 Digitalausgänge (0,5 A)
| 3 | 4 Digitalausgänge (2 A)
| 2 | 4 Analogausgänge
| 1 | 2 Analogausgänge
"""
pyla = pyladoc.DocumentWriter()
pyla.add_markdown(markdown_code)
latex_code = pyladoc.latex.from_html(pyla.to_html())
ref_latex_code = r"""
\subsection{Test1}
\begin{tabular}{rll}\toprule
Anz. & Typ & Beschreibung \\
\midrule
12 & BK9050 & Buskoppler \\
2 & KL1104 & 4 Digitaleing{\"a}nge \\
2 & KL2404 & 4 Digitalausg{\"a}nge (0,5 A) \\
3 & KL2424 & 4 Digitalausg{\"a}nge (2 A) \\
2 & KL4004 & 4 Analogausg{\"a}nge \\
1 & KL4002 & 2 Analogausg{\"a}nge \\
22 & KL9188 & Potenzialverteilungsklemme \\
1 & KL9100 & Potenzialeinspeiseklemme \\
3 & KL3054 & 4 Analogeing{\"a}nge \\
5 & KL3214 & PT100 4 Temperatureing{\"a}nge (3-Leiter) \\
3 & KL3202 & PT100 2 Temperatureing{\"a}nge (3-Leiter) \\
1 & KL2404 & 4 Digitalausg{\"a}nge \\
2 & KL9010 & Endklemme \\
\bottomrule
\end{tabular}
This is a \textbf{test}.
\subsection{Test2}
\begin{tabular}{rl}\toprule
Anz. & Beschreibung \\
\midrule
12 & Buskoppler \\
2 & 4 Digitaleing{\"a}nge \\
2 & 4 Digitalausg{\"a}nge (0,5 A) \\
3 & 4 Digitalausg{\"a}nge (2 A) \\
2 & 4 Analogausg{\"a}nge \\
1 & 2 Analogausg{\"a}nge \\
\bottomrule
\end{tabular}"""
print(latex_code)
assert check_only_ascii(latex_code), 'Some characters are not ASCII'
assert normalize_latex_code(ref_latex_code) == normalize_latex_code(latex_code)
if __name__ == '__main__':
test_latex_from_html()
test_latex_from_markdown()

View File

@ -0,0 +1,108 @@
import pyladoc
import matplotlib.pyplot as plt
import pandas as pd
import document_validation
VALIDATE_HTML_CODE_ONLINE = False
WRITE_RESULT_FILES = True
def make_document():
dw = pyladoc.DocumentWriter()
dw.add_markdown("""
# Special characters
ö ä ü Ö Ä Ü ß @
π ± Σ
£ ¥ $
Œ
# Link
This is a hyperlink: [nonan.net](https://www.nonan.net)
# Table
| Anz.| Typ | Beschreibung
|----:|----------|------------------------------------
| 12 | BK9050 | Buskoppler
| 2 | KL1104 | 4 Digitaleingänge
| 2 | KL2404 | 4 Digitalausgänge (0,5 A)
| 3 | KL2424 | 4 Digitalausgänge (2 A)
| 2 | KL4004 | 4 Analogausgänge
| 1 | KL4002 | 2 Analogausgänge
| 22 | KL9188 | Potenzialverteilungsklemme
| 1 | KL9100 | Potenzialeinspeiseklemme
| 3 | KL3054 | 4 Analogeingänge
| 5 | KL3214 | PT100 4 Temperatureingänge (3-Leiter)
| 3 | KL3202 | PT100 2 Temperatureingänge (3-Leiter)
| 1 | KL2404 | 4 Digitalausgänge
| 2 | KL9010 | Endklemme
---
# Equations
This line represents a reference to the equation @eq:test1.
""")
dw.add_equation(r'y = a + b * \sum_{i=0}^{\infty} a_i x^i', 'test1')
# Figure
fig, ax = plt.subplots()
fruits = ['apple', 'blueberry', 'cherry', 'orange']
counts = [40, 100, 30, 55]
bar_labels = ['red', 'blue', '_red', 'orange']
bar_colors = ['tab:red', 'tab:blue', 'tab:red', 'tab:orange']
ax.bar(fruits, counts, label=bar_labels, color=bar_colors)
ax.set_ylabel('fruit supply')
ax.set_title('Fruit supply by kind and color')
ax.legend(title='Fruit color')
dw.add_diagram(fig, 'Bar chart with individual bar colors')
# Table
mydataset = {
'Row1': ["Line1", "Line2", "Line3", "Line4", "Line5"],
'Row2': [120, '95 km/h', 110, '105 km/h', 130],
'Row3': ['12 g/km', '> 150 g/km', '110 g/km', '1140 g/km', '13.05 g/km'],
'Row4': ['5 stars', '4 stars', '5 stars', '4.5 stars', '5 stars'],
'Row5': [3.5, 7.8, 8.5, 6.9, 4.2],
'Row6': ['1850 kg', '1500 kg', '1400 kg', '1600 kg', '1700 kg'],
'Row7': ['600 Nm', '250 Nm', '280 Nm', '320 Nm', '450 Nm']
}
df = pd.DataFrame(mydataset)
dw.add_table(df.style.hide(axis="index"), 'This is a example table', 'example1')
return dw
def test_html_render():
doc = make_document()
html_code = doc.to_html()
document_validation.validate_html(html_code, VALIDATE_HTML_CODE_ONLINE)
if WRITE_RESULT_FILES:
with open('tests/out/test_html_render.html', 'w', encoding='utf-8') as f:
f.write(pyladoc.inject_to_template(html_code, internal_template='templates/test_template.html'))
def test_latex_render():
doc = make_document()
# print(doc.to_latex())
assert doc.to_pdf('tests/out/test_latex_render.pdf', font_family='serif')
if __name__ == '__main__':
test_html_render()
test_latex_render()

View File

@ -0,0 +1,152 @@
import pyladoc
import document_validation
VALIDATE_HTML_CODE_ONLINE = False
WRITE_RESULT_FILES = True
def test_markdown_styling():
pyla = pyladoc.DocumentWriter()
pyla.add_markdown(
"""
Below is an in-depth explanation of the AArch64 (ARM64)
unconditional branch instructionoften simply called the
B instructionand how its 26bit immediate field (imm26)
is laid out and later relocated during linking.
---
## Instruction Layout
The unconditional branch in AArch64 is encoded in a 32bit
instruction. Its layout is as follows:
```
Bits: 31 26 25 0
+-------------+------------------------------+
| Opcode | imm26 |
+-------------+------------------------------+
```
- **Opcode (bits 31:26):**
- For a plain branch (`B`), the opcode is `000101`.
- For a branch with link (`BL`), which saves the return
address (i.e., a call), the opcode is `100101`.
These 6 bits determine the instruction type.
- **Immediate Field (imm26, bits 25:0):**
- This 26bit field holds a signed immediate value.
- **Offset Calculation:** At runtime, the processor:
1. **Shifts** the 26bit immediate left by 2 bits.
(Because instructions are 4-byte aligned,
the two least-significant bits are always zero.)
2. **Sign-extends** the resulting 28bit value to
the full register width (typically 64 bits).
3. **Adds** this value to the program counter
(PC) to obtain the branch target.
- **Reach:**
- With a 26bit signed field thats effectively 28 bits
after the shift, the branch can cover a range
of approximately ±128 MB from the current instruction.
""")
html_code = pyla.to_html()
document_validation.validate_html(html_code, check_for=['strong', 'ol', 'li', 'code', 'hr'])
if WRITE_RESULT_FILES:
with open('tests/out/test_markdown_style.html', 'w', encoding='utf-8') as f:
f.write(html_code)
def test_markdown_table():
pyla = pyladoc.DocumentWriter()
pyla.add_markdown(
"""
## Klemmen
| Anz.| Typ | Beschreibung
|----:|----------|------------------------------------
| 12 | BK9050 | Buskoppler
| 2 | KL1104 | 4 Digitaleingänge
| 2 | KL2404 | 4 Digitalausgänge (0,5 A)
| 3 | KL2424 | 4 Digitalausgänge (2 A)
| 2 | KL4004 | 4 Analogausgänge
| 1 | KL4002 | 2 Analogausgänge
| 22 | KL9188 | Potenzialverteilungsklemme
| 1 | KL9100 | Potenzialeinspeiseklemme
| 3 | KL3054 | 4 Analogeingänge
| 5 | KL3214 | PT100 4 Temperatureingänge (3-Leiter)
| 3 | KL3202 | PT100 2 Temperatureingänge (3-Leiter)
| 1 | KL2404 | 4 Digitalausgänge
| 2 | KL9010 | Endklemme
""")
html_code = pyla.to_html()
document_validation.validate_html(html_code, check_for=['table'])
if WRITE_RESULT_FILES:
with open('tests/out/test_markdown_table.html', 'w', encoding='utf-8') as f:
f.write(html_code)
def test_markdown_equations():
pyla = pyladoc.DocumentWriter()
pyla.add_markdown(
"""
# Source Equations
1. $4(3x + 2) - 5(x - 1) = 3x + 14$
2. $\frac{2y + 5}{4} + \frac{3y - 1}{2} = 5$
3. $\frac{5}{x + 2} + \frac{2}{x - 2} = 3$
4. $8(3b - 5) + 4(b + 2) = 60$
5. $2c^2 - 3c - 5 = 0$
6. $4(2d - 1) + 5(3d + 2) = 7d + 28$
7. $q^2 + 6q + 9 = 16$
# Result Equations
1. $x = \frac{1}{4}$
2. $y = \frac{17}{8}$
3. $z = \frac{7}{3}$
4. $x = 1$ or $x = -6$
5. $a = \frac{1}{3}$ or $a = 2$
6. $x = -\frac{2}{3}$ or $x = 3$
7. $b = \frac{23}{7}$
# Step by Step
1. Distribute: $12x + 8 - 5x + 5 = 3x + 14$
2. Combine like terms: $7x + 13 = 3x + 14$
3. Subtract $3x$: $4x + 13 = 14$
4. Subtract $13$: $4x = 1$
5. Divide by $4$: $x = \frac{1}{4}$
""")
html_code = pyla.to_html()
document_validation.validate_html(html_code, check_for=['h1'])
if WRITE_RESULT_FILES:
with open('tests/out/test_markdown_equations.html', 'w', encoding='utf-8') as f:
f.write(html_code)
def test_markdown_characters():
pyla = pyladoc.DocumentWriter()
pyla.add_markdown(
"""
# Special caracters
Umlaute: ÖÄÜ öäü
Other: ß, , @, $, %, ~, µ
Units: , cm²
Controll characters: <, >, ", ', &, |, /, \\
""")
html_code = pyla.to_html()
document_validation.validate_html(html_code, check_for=['h1'])
if WRITE_RESULT_FILES:
with open('tests/out/test_markdown_characters.html', 'w', encoding='utf-8') as f:
f.write(html_code)