mirror of https://github.com/Nonannet/pyladoc.git
first commit
This commit is contained in:
commit
67c48776ac
|
@ -0,0 +1,21 @@
|
|||
[flake8]
|
||||
# Specify the maximum allowed line length
|
||||
max-line-length = 88
|
||||
|
||||
# Ignore specific rules
|
||||
# For example, E501: Line too long, W503: Line break before binary operator
|
||||
ignore = E501, W503, W504
|
||||
|
||||
# Exclude specific files or directories
|
||||
exclude =
|
||||
.git,
|
||||
__pycache__,
|
||||
build,
|
||||
dist,
|
||||
.conda
|
||||
.venv
|
||||
venv
|
||||
|
||||
# Enable specific plugins or options
|
||||
# Example: Enabling flake8-docstrings
|
||||
select = C,E,F,W,D
|
|
@ -0,0 +1,134 @@
|
|||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
.pdm.toml
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
.venv/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
pyModbusTCP_old/
|
||||
test.py
|
||||
test_*.ipynb
|
||||
settings.json
|
|
@ -0,0 +1,111 @@
|
|||
# Pyladoc
|
||||
|
||||
## Description
|
||||
Pyladoc is a python package for programmatically generating HTML and
|
||||
PDF/LaTex output. This package targets specifically applications where reports
|
||||
or results with Pandas-tables and Matplotlib-figures are generated programmatically
|
||||
to be displayed as website and as PDF document without any manual formatting
|
||||
steps.
|
||||
|
||||
This package focuses on the "Document in Code" approach for cases
|
||||
where a lot of calculations and data handling is done but not a lot of
|
||||
document text needs to be displayed.
|
||||
|
||||
As backend for PDF generation LaTex is used. There are excellent engines for
|
||||
rendering HTML to PDF available, but even if there is no requirement for an
|
||||
accurate typesetting, placing programmatically content of variable
|
||||
composition and sizes on fixed size pages without manual intervention
|
||||
is a hard problem that LaTeX is very capable of.
|
||||
|
||||
### Sported primitives
|
||||
- Text (can be Markdown or HTML formatted)
|
||||
- Headings
|
||||
- Tables (Pandas, Markdown or HTML)
|
||||
- Matplotlib figures
|
||||
- LaTex equations
|
||||
- Named references for figures, tables and equation
|
||||
|
||||
### Key Features
|
||||
- HTML and PDF/LaTex rendering of the same document
|
||||
- Single file output including figures
|
||||
- Figure and equation embedding in HTML by inline SVG, SVG in Base64 or PNG in Base64
|
||||
- Figure embedding in LaTex as PGF/TikZ
|
||||
|
||||
### Usage Scenarios
|
||||
- Webservices
|
||||
- Report generation for lab equipment
|
||||
|
||||
## Installation
|
||||
It can be installed with pip:
|
||||
|
||||
```bash
|
||||
pip install pyladoc
|
||||
```
|
||||
|
||||
## Usage
|
||||
It is easy to use as the following example code shows:
|
||||
|
||||
```python
|
||||
import pyladoc
|
||||
|
||||
doc = pyladoc.DocumentWriter()
|
||||
|
||||
doc.add_markdown("""
|
||||
# Example
|
||||
This is an example. The @table:pandas_example shows some random data.
|
||||
""")
|
||||
|
||||
some_data = {
|
||||
'Row1': ["Line1", "Line2", "Line3"],
|
||||
'Row2': [120, 100, 110],
|
||||
'Row3': ['12 g/km', '> 150 g/km', '110 g/km']
|
||||
}
|
||||
df = pd.DataFrame(some_data)
|
||||
dw.add_table(df, 'This is a pandas example table', 'pandas_example')
|
||||
|
||||
html_code = doc.to_html()
|
||||
|
||||
doc.to_pdf('test.pdf')
|
||||
```
|
||||
|
||||
## Example outputs
|
||||
The following documents are generated by tests/test_rendering_example_doc.py:
|
||||
|
||||
- HTML: [test_html_render.html](tests/out/test_html_render.html)
|
||||
- PDF: [test_latex_render.pdf](tests/out/test_latex_render.pdf)
|
||||
|
||||
## Contributing
|
||||
Contributions are welcome, please open an issue or submit a pull request on GitHub.
|
||||
|
||||
## Developer Guide
|
||||
To get started with developing the `pyladoc` package, follow these steps.
|
||||
|
||||
First, clone the repository to your local machine using Git:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/Nonannet/pyladoc.git
|
||||
cd pyladoc
|
||||
```
|
||||
|
||||
It's recommended to setup an venv:
|
||||
|
||||
```bash
|
||||
python -m venv venv
|
||||
source venv/bin/activate # On Windows use `venv\Scripts\activate`
|
||||
```
|
||||
|
||||
Install the package and dev-dependencies while keeping files in the
|
||||
current directory:
|
||||
|
||||
```bash
|
||||
pip install -e .[dev]
|
||||
```
|
||||
|
||||
Ensure that everything is set up correctly by running the tests:
|
||||
|
||||
```bash
|
||||
pytest
|
||||
```
|
||||
|
||||
## License
|
||||
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
@ -0,0 +1,58 @@
|
|||
[project]
|
||||
name = "pyladoc"
|
||||
version = "1.0.0"
|
||||
authors = [
|
||||
{ name="Nicolas Kruse", email="nicolas.kruse@nonan.net" },
|
||||
]
|
||||
description = "Package for generating HTML and PDF/latex from python code"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.8"
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
]
|
||||
dependencies = [
|
||||
"markdown>=3.3.0",
|
||||
"beautifulsoup4>=4.9.1"
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"pytest", "flake8", "mypy",
|
||||
"lxml", "types-lxml",
|
||||
"requests",
|
||||
"matplotlib>=3.1.1",
|
||||
"pandas>=2.0.0", "Jinja2",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/Nonannet/pyladoc"
|
||||
Repository = "https://github.com/Nonannet/pyladoc"
|
||||
Issues = "https://github.com/Nonannet/pyladoc/issues"
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=61.0", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["src"]
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
pyladoc = ["templates/*"]
|
||||
|
||||
|
||||
[tool.mypy]
|
||||
files = ["src"]
|
||||
strict = true
|
||||
warn_return_any = true
|
||||
warn_unused_configs = true
|
||||
check_untyped_defs = true
|
||||
no_implicit_optional = true
|
||||
show_error_codes = true
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
minversion = "6.0"
|
||||
addopts = "-ra -q"
|
||||
testpaths = ["tests"]
|
||||
pythonpath = ["src"]
|
|
@ -0,0 +1,570 @@
|
|||
from typing import Callable, Generator, Self, Literal, TYPE_CHECKING
|
||||
import html
|
||||
import markdown
|
||||
from base64 import b64encode
|
||||
import re
|
||||
import io
|
||||
from . import latex
|
||||
import pkgutil
|
||||
|
||||
|
||||
HTML_OUTPUT = 0
|
||||
LATEX_OUTPUT = 1
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas import DataFrame
|
||||
from pandas.io.formats.style import Styler
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.figure import Figure
|
||||
from matplotlib.legend import Legend as Mpl_Legend
|
||||
from matplotlib.text import Text as Mpl_Text
|
||||
|
||||
Table = DataFrame | Styler
|
||||
else:
|
||||
try:
|
||||
from pandas import DataFrame
|
||||
except ImportError:
|
||||
DataFrame = None
|
||||
|
||||
try:
|
||||
from pandas.io.formats.style import Styler
|
||||
Table = DataFrame | Styler
|
||||
except ImportError:
|
||||
Table = DataFrame
|
||||
|
||||
try:
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.figure import Figure
|
||||
from matplotlib.legend import Legend as Mpl_Legend
|
||||
from matplotlib.text import Text as Mpl_Text
|
||||
except ImportError:
|
||||
Figure = None
|
||||
|
||||
|
||||
TRenderer = Literal['pandas', 'simple']
|
||||
FFormat = Literal['svg', 'png', 'pgf']
|
||||
|
||||
|
||||
def _get_pkgutil_string(path: str) -> str:
|
||||
data = pkgutil.get_data(__name__, path)
|
||||
assert data is not None
|
||||
return data.decode()
|
||||
|
||||
|
||||
def _markdown_to_html(text: str) -> str:
|
||||
prep_text = re.sub(r'\u00A0', ' ', text) # non-breaking space
|
||||
html = markdown.markdown(prep_text, extensions=['tables', 'fenced_code', 'def_list', 'abbr', 'sane_lists'])
|
||||
return html.replace('<hr />', '<hr>')
|
||||
|
||||
|
||||
def escape_html(text: str) -> str:
|
||||
"""
|
||||
Escapes special HTML characters in a given string.
|
||||
|
||||
Args:
|
||||
text: The text to escape
|
||||
|
||||
Returns:
|
||||
Escaped text save for inserting into HTML code
|
||||
"""
|
||||
ret = re.sub(r'\u00A0', ' ', text) # non-breaking space
|
||||
ret = html.escape(ret)
|
||||
return ' '.join(ret.strip().splitlines())
|
||||
|
||||
|
||||
def _clean_svg(svg_text: str) -> str:
|
||||
# remove all tags not alllowd for inline svg from metadata:
|
||||
svg_text = re.sub(r'<metadata>.*?</metadata>', '', svg_text, flags=re.DOTALL)
|
||||
|
||||
# remove illegal path-tags without d attribute:
|
||||
return re.sub(r'<path(?![^>]*\sd=)\s.*?/>', '', svg_text, flags=re.DOTALL)
|
||||
|
||||
# def _get_templ_vars(template: str) -> list[str]:
|
||||
# return re.findall("<!---START (.+?)--->.*?<!---END .+?--->", template, re.DOTALL)
|
||||
|
||||
|
||||
def _drop_indent(text: str, amount: int) -> str:
|
||||
"""
|
||||
Drops a specific number of indentation spaces from a multiline text.
|
||||
|
||||
Args:
|
||||
text: The text to drop indentation from
|
||||
amount: The number of indentation space characters to drop
|
||||
|
||||
Returns:
|
||||
The text with the specified amount of indentation removed
|
||||
"""
|
||||
return ''.join(' ' * amount + line for line in text.splitlines(True))
|
||||
|
||||
|
||||
def _save_figure(fig: Figure, buff: io.BytesIO, figure_format: FFormat, font_family: str | None, scale: float) -> None:
|
||||
"""
|
||||
Saves a matplotlib figure to a file-like object.
|
||||
|
||||
Args:
|
||||
fig: The figure to save
|
||||
buff: The file-like object to save the figure to
|
||||
figure_format: The format to save the figure in (svg, png or pgf)
|
||||
font_family: The font family to use for the figure
|
||||
"""
|
||||
def get_all_elements() -> Generator[Mpl_Text, None, None]:
|
||||
for ax in fig.get_axes():
|
||||
yield ax.title
|
||||
yield ax.xaxis.label
|
||||
yield ax.yaxis.label
|
||||
yield from ax.get_xticklabels() + ax.get_yticklabels()
|
||||
legend: Mpl_Legend = ax.get_legend()
|
||||
if legend:
|
||||
yield from legend.get_texts()
|
||||
|
||||
# Store current figure settings
|
||||
old_state = ((e, e.get_fontfamily()) for e in get_all_elements())
|
||||
old_size: tuple[float, float] = tuple(fig.get_size_inches()) # type: ignore[unused-ignore]
|
||||
|
||||
# Adjust figure settings
|
||||
if font_family:
|
||||
for e, _ in old_state:
|
||||
e.set_fontfamily(font_family)
|
||||
|
||||
fig.set_size_inches(old_size[0] * scale, old_size[1] * scale, False)
|
||||
|
||||
# Render figure
|
||||
backends = {'png': 'AGG', 'svg': 'SVG', 'pgf': 'PGF'}
|
||||
assert figure_format in backends, 'Figure format can be pgf (vector), svg (vector) or png (raster)'
|
||||
fig.savefig(buff, format=figure_format, backend=backends[figure_format]) # type: ignore[unused-ignore]
|
||||
|
||||
# Reset figure setting
|
||||
for e, s in old_state:
|
||||
e.set_fontfamily(s)
|
||||
|
||||
fig.set_size_inches(old_size, None, False)
|
||||
|
||||
|
||||
def figure_to_string(fig: Figure,
|
||||
figure_format: FFormat = 'svg',
|
||||
font_family: str | None = None,
|
||||
scale: float = 1,
|
||||
alt_text: str = '',
|
||||
base64: bool = False) -> str:
|
||||
"""
|
||||
Converts a matplotlib figure to a ascii-string. For png base64 encoding is
|
||||
used in general, for svg base64 encoding can be enabled. For base64 encoded
|
||||
figures a img-tag is included in the output.
|
||||
|
||||
Args:
|
||||
fig: The figure to convert
|
||||
figure_format: The format to save the figure in (svg, png or pgf)
|
||||
font_family: The font family to use for the figure
|
||||
scale: Scaling factor for the figure size
|
||||
alt_text: The alt text for the figure
|
||||
base64: If the format is svg this determine if the image is encode in base64
|
||||
|
||||
Returns:
|
||||
The figure as ascii-string
|
||||
"""
|
||||
assert fig and isinstance(fig, Figure), 'fig parameter must be a matplotlib figure'
|
||||
with io.BytesIO() as buff:
|
||||
_save_figure(fig, buff, figure_format, font_family, scale)
|
||||
buff.seek(0)
|
||||
if figure_format == 'pgf':
|
||||
i = buff.read(2028).find(b'\\begingroup%') # skip comments
|
||||
buff.seek(max(i, 0))
|
||||
return latex.to_ascii(buff.read().decode('utf-8'))
|
||||
|
||||
elif figure_format == 'svg' and not base64:
|
||||
i = buff.read(2028).find(b'<svg') # skip xml and DOCTYPE header
|
||||
buff.seek(max(i, 0))
|
||||
return _clean_svg(buff.read().decode('utf-8'))
|
||||
|
||||
else:
|
||||
image_mime = {"png": "image/png", "svg": "image/svg+xml"}
|
||||
assert figure_format in image_mime, 'Unknown image format'
|
||||
return '<img alt="%s" src="data:%s;charset=utf-8;base64,%s">' % \
|
||||
(escape_html(alt_text),
|
||||
image_mime[figure_format],
|
||||
b64encode(buff.read()).decode('ascii')) # base64 assures (7-bit) ascii
|
||||
|
||||
|
||||
def latex_to_figure(latex_code: str) -> Figure:
|
||||
assert Figure, 'Matplotlib is required for rendering LaTex expressions for HTML output.' # type:ignore[truthy-function]
|
||||
fig, ax = plt.subplots()
|
||||
ax.set_xticks([])
|
||||
ax.set_yticks([])
|
||||
ax.axis('off')
|
||||
text = plt.text(0.5, 0.5, f'${latex_code}$', horizontalalignment='center',
|
||||
verticalalignment='center', transform=ax.transAxes)
|
||||
fig.draw_without_rendering()
|
||||
bbox = text.get_window_extent()
|
||||
fig.set_size_inches(bbox.width / fig.dpi * 1.2, bbox.height / fig.dpi * 1.2)
|
||||
return fig
|
||||
|
||||
|
||||
def _fillin_fields(template: str, fields: dict[str, str]) -> str:
|
||||
html_out = template
|
||||
for variable_name, value in fields.items():
|
||||
# Find indentation depths:
|
||||
ret = re.search(f"^(.*?)<!---START {variable_name}--->.*?<!---END {variable_name}--->", html_out, flags=re.MULTILINE)
|
||||
if ret:
|
||||
indent_depths = len(ret.group(1))
|
||||
html_out = html_out[:ret.start(0)] + _drop_indent(value, indent_depths) + html_out[ret.end(0):]
|
||||
return html_out
|
||||
|
||||
|
||||
def _fillin_reference_names(input_string: str, item_index: dict[str, int]) -> str:
|
||||
replacements = [(*m.span(), m.group()) for m in re.finditer(r'(?<=@)\w+:[\w\_\-]+', input_string)]
|
||||
ret: list[str] = []
|
||||
current_pos = 0
|
||||
for start, end, ref in replacements:
|
||||
assert ref in item_index, f"Reference {ref} does not exist in the document"
|
||||
ret.append(input_string[current_pos:start - 1])
|
||||
ret.append(str(item_index[ref]))
|
||||
current_pos = end
|
||||
return ''.join(ret) + input_string[current_pos:]
|
||||
|
||||
|
||||
def _check_latex_references(input_string: str, item_index: dict[str, int]) -> str:
|
||||
replacements = [m.group() for m in re.finditer(r'(?<=\\ref\{)\w+:[\w\_\\\-]+(?=\})', input_string)]
|
||||
escaped_items = set(latex.normalize_label_text(item) for item in item_index)
|
||||
for ref in replacements:
|
||||
assert ref in escaped_items, f"Reference {ref} does not exist in the document"
|
||||
return input_string
|
||||
|
||||
|
||||
def _normalize_text_indent(text: str) -> str:
|
||||
text_lines = text.splitlines()
|
||||
if len(text_lines) > 1 and not text_lines[0].strip():
|
||||
text_lines = text_lines[1:]
|
||||
|
||||
if not text_lines:
|
||||
return ''
|
||||
|
||||
if len(text_lines) > 1 and text_lines[0] and text_lines[0][0] != ' ':
|
||||
indent_amount = len(text_lines[1]) - len(text_lines[1].lstrip())
|
||||
else:
|
||||
indent_amount = len(text_lines[0]) - len(text_lines[0].lstrip())
|
||||
|
||||
return '\n'.join(
|
||||
[' ' * max(0, len(line) - len(line.strip()) - indent_amount) + line.strip()
|
||||
for line in text_lines])
|
||||
|
||||
|
||||
def _create_document_writer() -> 'DocumentWriter':
|
||||
new_dwr = DocumentWriter()
|
||||
return new_dwr
|
||||
|
||||
|
||||
def inject_to_template(content: str, template_path: str = '', internal_template: str = '') -> str:
|
||||
"""
|
||||
injects a content string into a template. The placeholder <!--CONTENT-->
|
||||
will be replaced by the content. If the placeholder is prefixed with a
|
||||
'%' comment character, this character will be replaced as well.
|
||||
|
||||
Args:
|
||||
template_path: Path to a template file
|
||||
internal_template: Path to a internal default template
|
||||
|
||||
Returns:
|
||||
Template with included content
|
||||
"""
|
||||
if template_path:
|
||||
with open(template_path, 'r') as f:
|
||||
template = f.read()
|
||||
elif internal_template:
|
||||
template = _get_pkgutil_string(internal_template)
|
||||
else:
|
||||
raise Exception('No template provided')
|
||||
|
||||
assert '<!--CONTENT-->' in template, 'No <!--CONTENT--> expression in template located'
|
||||
prep_template = re.sub(r"\%?\s*<!--CONTENT-->", '<!--CONTENT-->', template)
|
||||
return prep_template.replace('<!--CONTENT-->', content)
|
||||
|
||||
|
||||
class DocumentWriter():
|
||||
"""
|
||||
A class to create a document for exporting to HTML or LaTeX.
|
||||
"""
|
||||
def __init__(self) -> None:
|
||||
self._doc: list[list[Callable[[], str]]] = []
|
||||
self._fields: dict[str, DocumentWriter] = dict()
|
||||
self._base64_svgs: bool = False
|
||||
self._figure_format: FFormat = 'svg'
|
||||
self._table_renderer: TRenderer = 'simple'
|
||||
self._font_family: str | None = None
|
||||
self._item_count: dict[str, int] = {}
|
||||
self._item_index: dict[str, int] = {}
|
||||
self._fig_scale: float = 1
|
||||
|
||||
def _add_item(self, ref_id: str, ref_type: str, caption_prefix: str) -> str:
|
||||
current_index = self._item_count.get(ref_type, 0) + 1
|
||||
if not ref_id:
|
||||
ref_id = str(current_index)
|
||||
self._item_index[f"{ref_type}:{ref_id}"] = current_index
|
||||
self._item_count[ref_type] = current_index
|
||||
return caption_prefix.format(current_index)
|
||||
|
||||
def new_field(self, name: str) -> 'DocumentWriter':
|
||||
new_dwr = _create_document_writer()
|
||||
self._fields[name] = new_dwr
|
||||
return new_dwr
|
||||
|
||||
def add_document(self, doc: Self) -> None:
|
||||
self._doc += doc._doc
|
||||
|
||||
def add_diagram(self, fig: Figure, caption: str = '', ref_id: str = '',
|
||||
prefix_pattern: str = 'Figure {}: ', ref_type: str = 'fig',
|
||||
centered: bool = True) -> None:
|
||||
caption_prefix = self._add_item(ref_id, ref_type, prefix_pattern)
|
||||
|
||||
def render_to_html() -> str:
|
||||
return '<div class="figure">%s%s</div>' % (
|
||||
figure_to_string(fig, self._figure_format, base64=self._base64_svgs, scale=self._fig_scale),
|
||||
'<br>' + caption_prefix + escape_html(caption) if caption else '')
|
||||
|
||||
def render_to_latex() -> str:
|
||||
return '\\begin{figure}%s\n%s\n\\caption{%s}\n%s\\end{figure}' % (
|
||||
'\n\\centering' if centered else '',
|
||||
figure_to_string(fig, 'pgf', self._font_family, scale=self._fig_scale),
|
||||
latex.escape_text(caption),
|
||||
'\\label{%s}\n' % latex.normalize_label_text(ref_type + ':' + ref_id) if ref_id else '')
|
||||
|
||||
self._doc.append([render_to_html, render_to_latex])
|
||||
|
||||
def add_table(self, table: Table, caption: str = '', ref_id: str = '',
|
||||
prefix_pattern: str = 'Table {}: ', ref_type: str = 'table', centered: bool = True) -> None:
|
||||
assert Table and isinstance(table, Table), 'Table has to be a pandas DataFrame oder DataFrame Styler'
|
||||
caption_prefix = self._add_item(ref_id, ref_type, prefix_pattern)
|
||||
styler = table if isinstance(table, Styler) else getattr(table, 'style', None)
|
||||
assert isinstance(styler, Styler), 'Jinja2 package is required for rendering tables'
|
||||
|
||||
def render_to_html() -> str:
|
||||
html_string = styler.to_html(table_uuid=ref_id, caption=caption_prefix + escape_html(caption))
|
||||
return re.sub(r'<style.*?>.*?</style>', '', html_string, flags=re.DOTALL)
|
||||
|
||||
def render_to_latex() -> str:
|
||||
if self._table_renderer == 'pandas':
|
||||
return styler.to_latex(
|
||||
label=latex.normalize_label_text(ref_type + ':' + ref_id),
|
||||
hrules=True,
|
||||
convert_css=True,
|
||||
siunitx=True,
|
||||
caption=latex.escape_text(caption),
|
||||
position_float='centering' if centered else None)
|
||||
else:
|
||||
return latex.render_pandas_styler_table(styler, caption, ref_type + ':' + ref_id, centered)
|
||||
|
||||
self._doc.append([render_to_html, render_to_latex])
|
||||
|
||||
def add_text(self, text: str, section_class: str = '') -> None:
|
||||
"""
|
||||
Adds a text paragraph to the document.
|
||||
|
||||
Args:
|
||||
text: The text to add
|
||||
section_class: The class for the paragraph
|
||||
"""
|
||||
norm_text = _normalize_text_indent(text)
|
||||
|
||||
def render_to_html() -> str:
|
||||
html = '<p>' + escape_html(norm_text) + '</p>'
|
||||
if section_class:
|
||||
return '<div class="' + section_class + '">' + html + '</div>'
|
||||
else:
|
||||
return html
|
||||
|
||||
def render_to_latex() -> str:
|
||||
return latex.from_html(render_to_html())
|
||||
|
||||
self._doc.append([render_to_html, render_to_latex])
|
||||
|
||||
def add_html(self, text: str) -> None:
|
||||
"""
|
||||
Adds HTML formatted text to the document. For the LaTeX
|
||||
export only basic HTML for text formatting and tables
|
||||
is supported.
|
||||
|
||||
Args:
|
||||
text: The HTML to add to the document
|
||||
"""
|
||||
def render_to_html() -> str:
|
||||
return text
|
||||
|
||||
def render_to_latex() -> str:
|
||||
return latex.from_html(text)
|
||||
|
||||
self._doc.append([render_to_html, render_to_latex])
|
||||
|
||||
def add_h1(self, text: str) -> None:
|
||||
"""
|
||||
Adds a h1 heading to the document.
|
||||
|
||||
Args:
|
||||
text: The text of the heading
|
||||
"""
|
||||
def render_to_html() -> str:
|
||||
return '<h1>' + escape_html(text) + '</h1>'
|
||||
|
||||
def render_to_latex() -> str:
|
||||
return '\\section{' + latex.escape_text(text) + '}\n'
|
||||
|
||||
self._doc.append([render_to_html, render_to_latex])
|
||||
|
||||
def add_h2(self, text: str) -> None:
|
||||
"""
|
||||
Adds a h2 heading to the document.
|
||||
|
||||
Args:
|
||||
text: The text of the heading
|
||||
"""
|
||||
def render_to_html() -> str:
|
||||
return '<h2>' + escape_html(text) + '</h2>'
|
||||
|
||||
def render_to_latex() -> str:
|
||||
return '\\subsection{' + latex.escape_text(text) + '}\n'
|
||||
|
||||
self._doc.append([render_to_html, render_to_latex])
|
||||
|
||||
def add_h3(self, text: str) -> None:
|
||||
"""
|
||||
Adds a h3 heading to the document.
|
||||
|
||||
Args:
|
||||
text: The text of the heading
|
||||
"""
|
||||
def render_to_html() -> str:
|
||||
return '<h3>' + escape_html(text) + '</h3>'
|
||||
|
||||
def render_to_latex() -> str:
|
||||
return '\\subsubsection{' + latex.escape_text(text) + '}\n'
|
||||
|
||||
self._doc.append([render_to_html, render_to_latex])
|
||||
|
||||
def add_equation(self, latex_equation: str, ref_id: str = '', ref_type: str = 'eq') -> None:
|
||||
"""
|
||||
Adds a LaTeX equation to the document.
|
||||
|
||||
Args:
|
||||
latex_equation: LaTeX formatted equation
|
||||
ref_id: If provided, the equation is displayed with
|
||||
a number and can be referenced by the ref_id
|
||||
"""
|
||||
caption = self._add_item(ref_id, ref_type, '({})')
|
||||
|
||||
def render_to_html() -> str:
|
||||
fig = latex_to_figure(latex_equation)
|
||||
return ('<div class="equation-container"><div class="equation">%s</div>'
|
||||
'<div class="equation-number">%s</div></div>') % (
|
||||
figure_to_string(fig, self._figure_format, base64=self._base64_svgs),
|
||||
caption)
|
||||
|
||||
def render_to_latex() -> str:
|
||||
if ref_id:
|
||||
return '\\begin{equation}\\label{%s:%s}%s\\end{equation}' % (
|
||||
ref_type, ref_id, latex_equation)
|
||||
else:
|
||||
return '\\[%s\\]' % latex_equation
|
||||
|
||||
self._doc.append([render_to_html, render_to_latex])
|
||||
|
||||
def add_markdown(self, text: str, section_class: str = '') -> None:
|
||||
"""
|
||||
Adds a markdown formatted text to the document.
|
||||
|
||||
Args:
|
||||
text: The markdown text to add
|
||||
section_class: The class for the text section
|
||||
"""
|
||||
norm_text = _normalize_text_indent(str(text))
|
||||
|
||||
def render_to_html() -> str:
|
||||
html = _markdown_to_html(norm_text)
|
||||
if section_class:
|
||||
return '<div class="' + section_class + '">' + html + '</div>'
|
||||
else:
|
||||
return html
|
||||
|
||||
def render_to_latex() -> str:
|
||||
return latex.from_html(render_to_html())
|
||||
|
||||
self._doc.append([render_to_html, render_to_latex])
|
||||
|
||||
def _render_doc(self, doc_type: int) -> str:
|
||||
fields = {k: f.to_html() for k, f in self._fields.items()}
|
||||
return _fillin_fields(''.join(el[doc_type]() for el in self._doc), fields)
|
||||
|
||||
def to_html(self, figure_format: FFormat = 'svg',
|
||||
base64_svgs: bool = False, figure_scale: float = 1) -> str:
|
||||
"""
|
||||
Export the document to HTML. Figures will bew embedded in the HTML code.
|
||||
The format can be selected between png in base64, inline svg or svg in base64.
|
||||
|
||||
Args:
|
||||
figure_format: The format for embedding the figures in the HTML code (svg or png)
|
||||
base64_svgs: Whether to encode svg images in base64
|
||||
|
||||
Returns:
|
||||
The HTML code
|
||||
"""
|
||||
self._figure_format = figure_format
|
||||
self._base64_svgs = base64_svgs
|
||||
self._fig_scale = figure_scale
|
||||
|
||||
return _fillin_reference_names(self._render_doc(HTML_OUTPUT), self._item_index)
|
||||
|
||||
def to_latex(self, font_family: Literal[None, 'serif', 'sans-serif'] = None,
|
||||
table_renderer: TRenderer = 'simple', figure_scale: float = 1) -> str:
|
||||
"""
|
||||
Export the document to LaTeX. Figures will be embedded as pgf graphics.
|
||||
|
||||
Args:
|
||||
font_family: Overwrites the front family for figures
|
||||
table_renderer: The renderer for tables (simple: renderer with column type
|
||||
guessing for text and numbers; pandas: using the internal pandas LaTeX renderer)
|
||||
|
||||
Returns:
|
||||
The LaTeX code
|
||||
"""
|
||||
self._font_family = font_family
|
||||
assert table_renderer in ['simple', 'pandas'], "table_renderer must be 'simple' or 'pandas'"
|
||||
self._table_renderer = table_renderer
|
||||
self._fig_scale = figure_scale
|
||||
|
||||
return _check_latex_references(self._render_doc(LATEX_OUTPUT), self._item_index)
|
||||
|
||||
def to_pdf(self, file_path: str,
|
||||
font_family: Literal[None, 'serif', 'sans-serif'] = None,
|
||||
table_renderer: TRenderer = 'simple',
|
||||
latex_template_path: str = '') -> bool:
|
||||
"""
|
||||
Export the document to a PDF file using LaTeX.
|
||||
|
||||
Args:
|
||||
file_path: The path to save the PDF file to
|
||||
font_family: Overwrites the front family for figures and the template
|
||||
latex_template_path: Path to a LaTeX template file. The
|
||||
expression <!--CONTENT--> will be replaced by the generated content.
|
||||
If no path is provided a default template is used.
|
||||
|
||||
Returns:
|
||||
True if the PDF was successfully created
|
||||
"""
|
||||
latex_code = inject_to_template(self.to_latex(font_family, table_renderer),
|
||||
latex_template_path,
|
||||
'templates/default_template.tex')
|
||||
|
||||
if font_family == 'sans-serif':
|
||||
latex_code = latex.inject_latex_command(latex_code, '\\renewcommand{\\familydefault}{\\sfdefault}')
|
||||
success, errors, warnings = latex.compile(latex_code, file_path)
|
||||
|
||||
if not success:
|
||||
print('Errors:')
|
||||
print('\n'.join(errors))
|
||||
print('Warnings:')
|
||||
print('\n'.join(warnings))
|
||||
|
||||
return success
|
||||
|
||||
def _repr_html_(self) -> str:
|
||||
return self.to_html()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.to_html()
|
|
@ -0,0 +1,346 @@
|
|||
import bs4
|
||||
from html.parser import HTMLParser
|
||||
from typing import Iterator, Generator, Any
|
||||
from pandas.io.formats.style import Styler
|
||||
import re
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from .latex_escaping import unicode_to_latex_dict, latex_escape_dict
|
||||
|
||||
|
||||
def basic_formatter(value: Any) -> str:
|
||||
return escape_text(str(value))
|
||||
|
||||
|
||||
def to_ascii(text: str) -> str:
|
||||
"""
|
||||
Replaces/escapes often used unicode characters in latex code or text
|
||||
with its LaTex ascii equivalents.
|
||||
|
||||
Args:
|
||||
text: The text to convert.
|
||||
|
||||
Returns:
|
||||
The escaped text.
|
||||
"""
|
||||
regex_filter = ('|'.join(unicode_to_latex_dict))
|
||||
|
||||
last_s = 0
|
||||
ret: list[str] = []
|
||||
for m in re.finditer(regex_filter, text):
|
||||
s1, s2 = m.span()
|
||||
ret.append(text[last_s:s1])
|
||||
ret.append(unicode_to_latex_dict[m.group()])
|
||||
last_s = s2
|
||||
ret.append(text[last_s:])
|
||||
|
||||
return ''.join(ret)
|
||||
|
||||
|
||||
def normalize_label_text(text: str) -> str:
|
||||
"""
|
||||
Replace any special non-allowed character in the lable text.
|
||||
|
||||
Args:
|
||||
text: Input text
|
||||
|
||||
Returns:
|
||||
Normalized text
|
||||
"""
|
||||
return re.sub(r"[^a-zA-Z0-9.:]", '-', text)
|
||||
|
||||
|
||||
def escape_text(text: str) -> str:
|
||||
"""
|
||||
Escapes special LaTeX characters and often used unicode characters in a given string.
|
||||
|
||||
Args:
|
||||
text: The text to escape
|
||||
|
||||
Returns:
|
||||
Escaped text
|
||||
"""
|
||||
|
||||
latex_translation = latex_escape_dict | unicode_to_latex_dict
|
||||
|
||||
regex_filter = '|'.join(latex_translation)
|
||||
|
||||
last_s = 0
|
||||
ret: list[str] = []
|
||||
for m in re.finditer(regex_filter, text):
|
||||
s1, s2 = m.span()
|
||||
ret.append(text[last_s:s1])
|
||||
matches = [v for k, v in latex_translation.items() if re.match(k, m.group())]
|
||||
if m.group(1):
|
||||
ret.append(matches[0].replace(r'\g<1>', normalize_label_text(m.group(1))))
|
||||
else:
|
||||
ret.append(matches[0])
|
||||
last_s = s2
|
||||
ret.append(text[last_s:])
|
||||
|
||||
return ''.join(ret)
|
||||
|
||||
|
||||
def render_pandas_styler_table(df_style: Styler, caption: str = '', label: str = '', centering: bool = True) -> str:
|
||||
"""
|
||||
Converts a pandas Styler object to LaTeX table.
|
||||
|
||||
Args:
|
||||
df_style: The pandas Styler object to convert.
|
||||
caption: The caption for the table.
|
||||
label: Label for referencing the table.
|
||||
centering: Whether to center the table.
|
||||
|
||||
Returns:
|
||||
The LaTeX code.
|
||||
"""
|
||||
def iter_table(table: dict[str, Any]) -> Generator[str, None, None]:
|
||||
yield '\\begin{table}\n'
|
||||
if centering:
|
||||
yield '\\centering\n'
|
||||
|
||||
# Guess column type
|
||||
numeric = re.compile(r'^[<>]?\s*(?:\d+,?)+(?:\.\d+)?(?:\s\D.*)?$')
|
||||
formats = ['S' if all(
|
||||
(numeric.match(line[ci]['display_value'].strip()) for line in table['body'])
|
||||
) else 'l' for ci in range(len(table['body'][0])) if table['body'][0][ci]['is_visible']]
|
||||
|
||||
if caption:
|
||||
yield f"\\caption{{{escape_text(caption)}}}\n"
|
||||
if label:
|
||||
yield f"\\label{{{normalize_label_text(label)}}}\n"
|
||||
yield f"\\begin{{tabular}}{{{''.join(formats)}}}\n\\toprule\n"
|
||||
|
||||
for head in table['head']:
|
||||
yield (' & '.join(f"\\text{{{escape_text(c['display_value'].strip())}}}"
|
||||
for c in head if c['is_visible']))
|
||||
yield ' \\\\\n'
|
||||
|
||||
yield '\\midrule\n'
|
||||
|
||||
for body in table['body']:
|
||||
yield (' & '.join(escape_text(c['display_value'].strip())
|
||||
for c in body if c['is_visible']))
|
||||
yield ' \\\\\n'
|
||||
|
||||
yield '\\bottomrule\n\\end{tabular}\n\\end{table}'
|
||||
|
||||
str_list = iter_table(df_style._translate(False, False, blank='')) # type: ignore[attr-defined]
|
||||
|
||||
return ''.join(str_list)
|
||||
|
||||
|
||||
def from_html_old(html_code: str) -> str:
|
||||
"""
|
||||
Converts HTML code to LaTeX code.
|
||||
|
||||
Args:
|
||||
html_code: The HTML code to convert.
|
||||
|
||||
Returns:
|
||||
The LaTeX code.
|
||||
"""
|
||||
root = bs4.BeautifulSoup(html_code, 'html.parser')
|
||||
|
||||
html_to_latex = {
|
||||
'strong': ('\\textbf{', '}'),
|
||||
'b': ('\\textbf{', '}'),
|
||||
'em': ('\\emph{', '}'),
|
||||
'i': ('\\emph{', '}'),
|
||||
'p': ('', '\n\n'),
|
||||
'h1': ('\\section{', '}'),
|
||||
'h2': ('\\subsection{', '}'),
|
||||
'h3': ('\\subsubsection{', '}'),
|
||||
'ul': ('\\begin{itemize}', '\\end{itemize}'),
|
||||
'ol': ('\\begin{enumerate}', '\\end{enumerate}'),
|
||||
'li': ('\\item ', ''),
|
||||
'latex_eq': ('\\[', '\\]'),
|
||||
}
|
||||
|
||||
def handle_table(table: bs4.element.Tag) -> str:
|
||||
rows = table.find_all('tr')
|
||||
latex_table: str = ''
|
||||
for row in rows:
|
||||
assert isinstance(row, bs4.element.Tag), 'HTML table not valid'
|
||||
cells = row.find_all(['th', 'td'])
|
||||
if not latex_table:
|
||||
latex_table = "\\begin{tabular}{|" + "|".join(['l'] * len(cells)) + "|}\\toprule\n"
|
||||
else:
|
||||
latex_table += " & ".join(escape_text(cell.get_text(strip=True)) for cell in cells) + " \\\\\n"
|
||||
latex_table += "\\bottomrule\n\\end{tabular}"
|
||||
return latex_table
|
||||
|
||||
def parse_node(element: bs4.element.Tag) -> Iterator[str]:
|
||||
prefix, post = html_to_latex.get(element.name, ('', ''))
|
||||
yield prefix
|
||||
|
||||
for c in element.children:
|
||||
if isinstance(c, bs4.element.Tag):
|
||||
if c.name == 'table':
|
||||
yield handle_table(c)
|
||||
else:
|
||||
yield from parse_node(c)
|
||||
else:
|
||||
yield escape_text(c.text)
|
||||
yield post
|
||||
|
||||
return ''.join(parse_node(root))
|
||||
|
||||
|
||||
def from_html(html_code: str) -> str:
|
||||
"""
|
||||
Converts HTML code to LaTeX code using HTMLParser.
|
||||
|
||||
Args:
|
||||
html_code: The HTML code to convert.
|
||||
|
||||
Returns:
|
||||
The LaTeX code.
|
||||
"""
|
||||
html_to_latex = {
|
||||
'strong': ('\\textbf{', '}'),
|
||||
'b': ('\\textbf{', '}'),
|
||||
'em': ('\\emph{', '}'),
|
||||
'i': ('\\emph{', '}'),
|
||||
'p': ('', '\n\n'),
|
||||
'h1': ('\\section{', '}\n'),
|
||||
'h2': ('\\subsection{', '}\n'),
|
||||
'h3': ('\\subsubsection{', '}\n'),
|
||||
'ul': ('\\begin{itemize}\n', '\\end{itemize}\n'),
|
||||
'ol': ('\\begin{enumerate}\n', '\\end{enumerate}\n'),
|
||||
'li': ('\\item ', '\n')
|
||||
}
|
||||
|
||||
class LaTeXHTMLParser(HTMLParser):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.latex_code: list[str] = []
|
||||
self.header_index: int = -1
|
||||
self.column_alignment = ''
|
||||
self.midrule_flag = False
|
||||
self.header_flag = False
|
||||
|
||||
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
|
||||
if tag in html_to_latex:
|
||||
prefix, _ = html_to_latex[tag]
|
||||
self.latex_code.append(prefix)
|
||||
elif tag == 'table':
|
||||
self.header_index = len(self.latex_code)
|
||||
self.latex_code.append('') # Placeholder for column header
|
||||
self.midrule_flag = False
|
||||
self.header_flag = False
|
||||
elif tag == 'tr':
|
||||
self.column_alignment = ''
|
||||
elif tag in ['th', 'td']:
|
||||
style = [v for k, v in attrs if k == 'style']
|
||||
if style and style[0] and 'right' in style[0]:
|
||||
self.column_alignment += 'r'
|
||||
else:
|
||||
self.column_alignment += 'l'
|
||||
elif tag == 'a':
|
||||
href = [v for k, v in attrs if k == 'href']
|
||||
assert href, 'Link href attribute is missing'
|
||||
self.latex_code.append(f"\\href{{{href[0]}}}{{")
|
||||
|
||||
def handle_endtag(self, tag: str) -> None:
|
||||
if tag in html_to_latex:
|
||||
_, postfix = html_to_latex[tag]
|
||||
self.latex_code.append(postfix)
|
||||
elif tag == 'table':
|
||||
self.latex_code.append("\\bottomrule\n\\end{tabular}\n")
|
||||
elif tag == 'tr':
|
||||
self.latex_code.pop() # Remove column separator after last entry
|
||||
if self.header_index >= 0:
|
||||
self.latex_code[self.header_index] = f"\\begin{{tabular}}{{{self.column_alignment}}}\\toprule\n"
|
||||
self.header_index = -1
|
||||
self.latex_code.append(' \\\\\n')
|
||||
if self.header_flag and not self.midrule_flag:
|
||||
self.latex_code.append("\\midrule\n")
|
||||
self.midrule_flag = True
|
||||
elif tag == 'th':
|
||||
self.latex_code.append(" & ")
|
||||
self.header_flag = True
|
||||
elif tag == 'td':
|
||||
self.latex_code.append(" & ")
|
||||
elif tag == 'a':
|
||||
self.latex_code.append("}")
|
||||
|
||||
def handle_data(self, data: str) -> None:
|
||||
if data.strip():
|
||||
self.latex_code.append(escape_text(data))
|
||||
|
||||
parser = LaTeXHTMLParser()
|
||||
parser.feed(html_code)
|
||||
return ''.join(parser.latex_code)
|
||||
|
||||
|
||||
def compile(latex_code: str, output_file: str = '', encoding: str = 'utf-8') -> tuple[bool, list[str], list[str]]:
|
||||
"""
|
||||
Compiles LaTeX code to a PDF file.
|
||||
|
||||
Args:
|
||||
latex_code: The LaTeX code to compile.
|
||||
output_file: The output file path.
|
||||
encoding: The encoding of the LaTeX code.
|
||||
|
||||
Returns:
|
||||
A tuple with three elements:
|
||||
- A boolean indicating whether the compilation was successful.
|
||||
- A list of errors.
|
||||
- A list of warnings.
|
||||
"""
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_path:
|
||||
command = ['pdflatex', '-halt-on-error', '--output-directory', tmp_path]
|
||||
|
||||
errors: list[str] = []
|
||||
warnings: list[str] = []
|
||||
|
||||
for i in range(1, 4):
|
||||
rerun_flag = False
|
||||
error_flag = False
|
||||
process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
output, error = process.communicate(input=latex_code.encode(encoding))
|
||||
|
||||
assert not error, 'Compilation error: ' + output.decode(encoding)
|
||||
|
||||
for line in output.decode(encoding).split('\n'):
|
||||
if 'Warning' in line:
|
||||
warnings.append(f"Run {i}: " + line)
|
||||
if 'reference' in line:
|
||||
rerun_flag = True
|
||||
if line.startswith('!') or line.startswith('*!'):
|
||||
error_flag = True
|
||||
|
||||
if error_flag:
|
||||
errors.append(line)
|
||||
|
||||
if not rerun_flag or errors:
|
||||
break
|
||||
|
||||
# Copy pdf file
|
||||
file_list = [f for f in os.listdir(tmp_path) if f.lower().endswith('.pdf')]
|
||||
if file_list:
|
||||
pdf_file = os.path.join(tmp_path, file_list[0])
|
||||
if output_file:
|
||||
shutil.copyfile(pdf_file, output_file)
|
||||
|
||||
return not errors, errors, warnings
|
||||
|
||||
|
||||
def inject_latex_command(text: str, command: str) -> str:
|
||||
lines = text.splitlines()
|
||||
|
||||
last_package_index = -1
|
||||
for i, line in enumerate(lines):
|
||||
if line.strip().startswith("\\usepackage"):
|
||||
last_package_index = i
|
||||
|
||||
if last_package_index != -1:
|
||||
lines.insert(last_package_index + 1, f"\n{command}\n")
|
||||
else:
|
||||
lines.append(f"\n{command}\n")
|
||||
|
||||
return '\n'.join(lines)
|
|
@ -0,0 +1,89 @@
|
|||
unicode_to_latex_dict = {
|
||||
# Unicode numeric subscripts
|
||||
'₀': r'\textsubscript{0}', '₁': r'\textsubscript{1}', '₂': r'\textsubscript{2}', '₃': r'\textsubscript{3}',
|
||||
'₄': r'\textsubscript{4}', '₅': r'\textsubscript{5}', '₆': r'\textsubscript{6}', '₇': r'\textsubscript{7}',
|
||||
'₈': r'\textsubscript{8}', '₉': r'\textsubscript{9}',
|
||||
# Unicode numeric superscripts
|
||||
'⁰': r'\textsuperscript{0}', '¹': r'\textsuperscript{1}', '²': r'\textsuperscript{2}', '³': r'\textsuperscript{3}',
|
||||
'⁴': r'\textsuperscript{4}', '⁵': r'\textsuperscript{5}', '⁶': r'\textsuperscript{6}', '⁷': r'\textsuperscript{7}',
|
||||
'⁸': r'\textsuperscript{8}', '⁹': r'\textsuperscript{9}', '⁺': r'\textsuperscript{+}', '⁻': r'\textsuperscript{-}',
|
||||
# Often used European non-ascii-characters
|
||||
'ä': r'{\"a}',
|
||||
'ö': r'{\"o}',
|
||||
'ü': r'{\"u}',
|
||||
'Ä': r'{\"A}',
|
||||
'Ö': r'{\"O}',
|
||||
'Ü': r'{\"U}',
|
||||
'ß': r'{\ss}',
|
||||
'é': r"{\'e}",
|
||||
'è': r"{\`e}",
|
||||
'ê': r"{\^e}",
|
||||
'à': r"{\`a}",
|
||||
'â': r"{\^a}",
|
||||
'ç': r"{\c{c}}",
|
||||
'É': r"{\'E}",
|
||||
'È': r"{\`E}",
|
||||
'Ê': r"{\^E}",
|
||||
'À': r"{\`A}",
|
||||
'Â': r"{\^A}",
|
||||
'Ç': r"{\c{C}}",
|
||||
'ó': r"{\'o}",
|
||||
'ò': r"{\`o}",
|
||||
'ô': r"{\^o}",
|
||||
'Ó': r"{\'O}",
|
||||
'Ò': r"{\`O}",
|
||||
'Ô': r"{\^O}",
|
||||
'í': r"{\'i}",
|
||||
'ì': r"{\`i}",
|
||||
'î': r"{\^i}",
|
||||
'Í': r"{\'I}",
|
||||
'Ì': r"{\`I}",
|
||||
'Î': r"{\^I}",
|
||||
'ú': r"{\'u}",
|
||||
'ù': r"{\`u}",
|
||||
'û': r"{\^u}",
|
||||
'Ú': r"{\'U}",
|
||||
'Ù': r"{\`U}",
|
||||
'Û': r"{\^U}",
|
||||
'å': r"{\r{a}}",
|
||||
'Å': r"{\r{A}}",
|
||||
'ø': r"{\o}",
|
||||
'Ø': r"{\O}",
|
||||
'æ': r"{\ae}",
|
||||
'Æ': r"{\AE}",
|
||||
'œ': r"{\oe}",
|
||||
'Œ': r"{\OE}",
|
||||
# Other unicode
|
||||
'°': r'{\textdegree}',
|
||||
'µ': r'{\textmu}',
|
||||
'π': r'$\pi$',
|
||||
'≈': r'$\approx$',
|
||||
'±': r'$\pm$',
|
||||
'≠': r'$\neq$',
|
||||
'∆': r'$\Delta$',
|
||||
'Ω': r'$\Omega$',
|
||||
'Λ': r'$\Lambda$',
|
||||
'Σ': r'$\Sigma$',
|
||||
# '€': r'{\euro}',
|
||||
'£': r'{\pounds}',
|
||||
'¥': r'{\yen}',
|
||||
'\u00A0': r'~', # Non-breaking space
|
||||
'\u2007': ' ' # Figure space
|
||||
}
|
||||
|
||||
latex_escape_dict = {
|
||||
'&': r'\&',
|
||||
'%': r'\%',
|
||||
r'\$': r'\$',
|
||||
'#': r'\#',
|
||||
'_': r'\_',
|
||||
'{': r'\{',
|
||||
'}': r'\}',
|
||||
'<': r'{\textless}',
|
||||
'>': r'{\textgreater}',
|
||||
'~': r'\textasciitilde{}',
|
||||
r'\^': r'\textasciicircum{}',
|
||||
r'\\': r'\textbackslash{}',
|
||||
# References:
|
||||
r'@(\w+:[\w\_\-]+)': r'\ref{\g<1>}'
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
\documentclass[a4paper,12pt]{article}
|
||||
|
||||
% Packages
|
||||
\usepackage[utf8]{inputenc}
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage{lmodern} % Load Latin Modern font
|
||||
\usepackage{graphicx} % For including images
|
||||
\usepackage{amsmath} % For mathematical symbols
|
||||
\usepackage{amssymb} % For additional symbols
|
||||
\usepackage{hyperref} % For hyperlinks
|
||||
\usepackage{caption} % For customizing captions
|
||||
\usepackage{geometry} % To set margins
|
||||
\usepackage{natbib} % For citations
|
||||
\usepackage{float} % For fixing figure positions
|
||||
\usepackage{siunitx} % For scientific units
|
||||
\usepackage{booktabs} % For professional-looking tables
|
||||
\usepackage{pgf} % For using pgf grafics
|
||||
\usepackage{textcomp, gensymb} % provides \degree symbol
|
||||
|
||||
\sisetup{
|
||||
table-align-text-post = false
|
||||
}
|
||||
|
||||
% Geometry Settings
|
||||
\geometry{margin=1in} % 1-inch margins
|
||||
|
||||
% Title and Author Information
|
||||
% \title{Report Title}
|
||||
% \author{Your Name \\ Department of XYZ \\ \texttt{email@example.com}}
|
||||
% \date{\today}
|
||||
|
||||
\begin{document}
|
||||
|
||||
% Title Page
|
||||
% # \maketitle
|
||||
|
||||
% <!--CONTENT-->
|
||||
\end{document}
|
|
@ -0,0 +1,114 @@
|
|||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Test template</title>
|
||||
<style>
|
||||
body
|
||||
{
|
||||
background-color: white;
|
||||
font-family: Lucida Grande,Lucida Sans Unicode,Lucida Sans,Geneva,Verdana,sans-serif;
|
||||
}
|
||||
|
||||
div.document
|
||||
{
|
||||
max-width: 820px;
|
||||
top: 20px;
|
||||
overflow: visible;
|
||||
margin: 0 auto;
|
||||
width: 90%;
|
||||
padding-bottom: 50px;
|
||||
}
|
||||
|
||||
div h1
|
||||
{
|
||||
font-size: 32px;
|
||||
font-weight: normal;
|
||||
margin-bottom: 10px;
|
||||
margin-top: 24px;
|
||||
color: black;
|
||||
}
|
||||
|
||||
div h2
|
||||
{
|
||||
font-size: 24px;
|
||||
font-weight: normal;
|
||||
margin-bottom: 10px;
|
||||
margin-top: 24px;
|
||||
color: black;
|
||||
}
|
||||
|
||||
div.figure {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
div svg {
|
||||
margin-left: -5%;
|
||||
max-width: 110%;
|
||||
height: auto;
|
||||
object-fit: contain;
|
||||
}
|
||||
|
||||
.equation-container {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.equation {
|
||||
text-align: center;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.equation-number {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
table {
|
||||
border-collapse: collapse;
|
||||
margin: 20px auto;
|
||||
}
|
||||
|
||||
|
||||
table th,
|
||||
table td {
|
||||
padding: 8px;
|
||||
font-variant-numeric: tabular-nums;
|
||||
}
|
||||
|
||||
table th {
|
||||
background-color: #f4f4f4;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
/* Alternating row colors for readability */
|
||||
table tr:nth-child(even) {
|
||||
background-color: #f9f9f9;
|
||||
}
|
||||
|
||||
table tr:nth-child(odd) {
|
||||
background-color: #ffffff;
|
||||
}
|
||||
|
||||
/* Caption styling */
|
||||
table caption {
|
||||
caption-side: top;
|
||||
font-weight: bold;
|
||||
font-size: 16px;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
/* Fine-tuning text in table */
|
||||
table td, table th {
|
||||
vertical-align: middle;
|
||||
line-height: 1.5;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="document">
|
||||
<!--CONTENT-->
|
||||
</div>
|
||||
</body>
|
|
@ -0,0 +1,64 @@
|
|||
from typing import Generator, Any
|
||||
from lxml import etree
|
||||
from lxml.etree import _Element as EElement # type: ignore
|
||||
import requests
|
||||
|
||||
|
||||
with open('src/pyladoc/templates/test_template.html', mode='rt', encoding='utf-8') as f:
|
||||
html_test_template = f.read()
|
||||
|
||||
|
||||
def add_line_numbers(multiline_string: str) -> str:
|
||||
lines = multiline_string.splitlines()
|
||||
numbered_lines = [f"{i + 1}: {line}" for i, line in enumerate(lines)]
|
||||
return "\n".join(numbered_lines)
|
||||
|
||||
|
||||
def validate_html_with_w3c(html_string: str) -> dict[str, Any]:
|
||||
validator_url = "https://validator.w3.org/nu/"
|
||||
|
||||
# Parameters for the POST request
|
||||
headers = {
|
||||
"Content-Type": "text/html; charset=utf-8",
|
||||
"User-Agent": "Python HTML Validator"}
|
||||
|
||||
try:
|
||||
response = requests.post(validator_url, headers=headers, data=html_string, params={"out": "json"})
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
return {
|
||||
"error": f"Failed to validate HTML. Status code: {response.status_code}",
|
||||
"details": response.text
|
||||
}
|
||||
|
||||
except requests.RequestException as e:
|
||||
return {"error": f"An error occurred while connecting to the W3C Validator: {str(e)}"}
|
||||
|
||||
|
||||
def validate_html(html_string: str, validate_online: bool = False, check_for: list['str'] = ['table', 'svg', 'div']):
|
||||
root = etree.fromstring(html_string, parser=etree.HTMLParser(recover=True))
|
||||
|
||||
def recursive_search(element: EElement) -> Generator[str, None, None]:
|
||||
if isinstance(element.tag, str):
|
||||
yield element.tag
|
||||
|
||||
for child in element:
|
||||
yield from recursive_search(child)
|
||||
|
||||
tags = set(recursive_search(root))
|
||||
|
||||
for tag_type in check_for:
|
||||
assert tag_type in tags, f"Tag {tag_type} not found in the html code"
|
||||
|
||||
if validate_online:
|
||||
test_page = html_test_template.replace('<!--CONTENT-->', html_string)
|
||||
validation_result = validate_html_with_w3c(test_page)
|
||||
assert 'messages' in validation_result, 'Validate request failed'
|
||||
if validation_result['messages']:
|
||||
print(add_line_numbers(test_page))
|
||||
for verr in validation_result['messages']:
|
||||
print(f"- {verr['type']}: {verr['message']} (line: {verr['lastLine']})")
|
||||
|
||||
assert len(validation_result['messages']) == 0, f'{len(validation_result["messages"])} validation error, first error: {validation_result["messages"][0]["message"]}'
|
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
@ -0,0 +1,5 @@
|
|||
<h1>Special caracters</h1>
|
||||
<p>Umlaute: ÖÄÜ öäü</p>
|
||||
<p>Other: ß, €, @, $, %, ~, µ</p>
|
||||
<p>Units: m³, cm²</p>
|
||||
<p>Controll characters: <, >, ", ', &, |, /, \</p>
|
|
@ -0,0 +1,39 @@
|
|||
<h1>Source Equations</h1>
|
||||
<ol>
|
||||
<li>$4(3x + 2) - 5(x - 1) = 3x + 14$</li>
|
||||
<li>$
|
||||
rac{2y + 5}{4} +
|
||||
rac{3y - 1}{2} = 5$</li>
|
||||
<li>$
|
||||
rac{5}{x + 2} +
|
||||
rac{2}{x - 2} = 3$</li>
|
||||
<li>$8(3b - 5) + 4(b + 2) = 60$</li>
|
||||
<li>$2c^2 - 3c - 5 = 0$</li>
|
||||
<li>$4(2d - 1) + 5(3d + 2) = 7d + 28$</li>
|
||||
<li>$q^2 + 6q + 9 = 16$</li>
|
||||
</ol>
|
||||
<h1>Result Equations</h1>
|
||||
<ol>
|
||||
<li>$x =
|
||||
rac{1}{4}$</li>
|
||||
<li>$y =
|
||||
rac{17}{8}$</li>
|
||||
<li>$z =
|
||||
rac{7}{3}$</li>
|
||||
<li>$x = 1$ or $x = -6$</li>
|
||||
<li>$a =
|
||||
rac{1}{3}$ or $a = 2$</li>
|
||||
<li>$x = -
|
||||
rac{2}{3}$ or $x = 3$</li>
|
||||
<li>$b =
|
||||
rac{23}{7}$</li>
|
||||
</ol>
|
||||
<h1>Step by Step</h1>
|
||||
<ol>
|
||||
<li>Distribute: $12x + 8 - 5x + 5 = 3x + 14$</li>
|
||||
<li>Combine like terms: $7x + 13 = 3x + 14$</li>
|
||||
<li>Subtract $3x$: $4x + 13 = 14$</li>
|
||||
<li>Subtract $13$: $4x = 1$</li>
|
||||
<li>Divide by $4$: $x =
|
||||
rac{1}{4}$</li>
|
||||
</ol>
|
|
@ -0,0 +1,44 @@
|
|||
<p>Below is an in-depth explanation of the AArch64 (ARM64)
|
||||
unconditional branch instruction—often simply called the
|
||||
“B” instruction—and how its 26‐bit immediate field (imm26)
|
||||
is laid out and later relocated during linking.</p>
|
||||
<hr>
|
||||
<h2>Instruction Layout</h2>
|
||||
<p>The unconditional branch in AArch64 is encoded in a 32‑bit
|
||||
instruction. Its layout is as follows:</p>
|
||||
<pre><code>Bits: 31 26 25 0
|
||||
+-------------+------------------------------+
|
||||
| Opcode | imm26 |
|
||||
+-------------+------------------------------+
|
||||
</code></pre>
|
||||
<ul>
|
||||
<li><strong>Opcode (bits 31:26):</strong></li>
|
||||
<li>For a plain branch (<code>B</code>), the opcode is <code>000101</code>.</li>
|
||||
<li>
|
||||
<p>For a branch with link (<code>BL</code>), which saves the return
|
||||
address (i.e., a call), the opcode is <code>100101</code>.
|
||||
These 6 bits determine the instruction type.</p>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>Immediate Field (imm26, bits 25:0):</strong></p>
|
||||
</li>
|
||||
<li>This 26‑bit field holds a signed immediate value.</li>
|
||||
<li>
|
||||
<p><strong>Offset Calculation:</strong> At runtime, the processor:</p>
|
||||
<ol>
|
||||
<li><strong>Shifts</strong> the 26‑bit immediate left by 2 bits.
|
||||
(Because instructions are 4-byte aligned,
|
||||
the two least-significant bits are always zero.)</li>
|
||||
<li><strong>Sign-extends</strong> the resulting 28‑bit value to
|
||||
the full register width (typically 64 bits).</li>
|
||||
<li><strong>Adds</strong> this value to the program counter
|
||||
(PC) to obtain the branch target.</li>
|
||||
</ol>
|
||||
</li>
|
||||
<li>
|
||||
<p><strong>Reach:</strong></p>
|
||||
</li>
|
||||
<li>With a 26‑bit signed field that’s effectively 28 bits
|
||||
after the shift, the branch can cover a range
|
||||
of approximately ±128 MB from the current instruction.</li>
|
||||
</ul>
|
|
@ -0,0 +1,77 @@
|
|||
<h2>Klemmen</h2>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th style="text-align: right;">Anz.</th>
|
||||
<th>Typ</th>
|
||||
<th>Beschreibung</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td style="text-align: right;">12</td>
|
||||
<td>BK9050</td>
|
||||
<td>Buskoppler</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: right;">2</td>
|
||||
<td>KL1104</td>
|
||||
<td>4 Digitaleingänge</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: right;">2</td>
|
||||
<td>KL2404</td>
|
||||
<td>4 Digitalausgänge (0,5 A)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: right;">3</td>
|
||||
<td>KL2424</td>
|
||||
<td>4 Digitalausgänge (2 A)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: right;">2</td>
|
||||
<td>KL4004</td>
|
||||
<td>4 Analogausgänge</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: right;">1</td>
|
||||
<td>KL4002</td>
|
||||
<td>2 Analogausgänge</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: right;">22</td>
|
||||
<td>KL9188</td>
|
||||
<td>Potenzialverteilungsklemme</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: right;">1</td>
|
||||
<td>KL9100</td>
|
||||
<td>Potenzialeinspeiseklemme</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: right;">3</td>
|
||||
<td>KL3054</td>
|
||||
<td>4 Analogeingänge</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: right;">5</td>
|
||||
<td>KL3214</td>
|
||||
<td>PT100 4 Temperatureingänge (3-Leiter)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: right;">3</td>
|
||||
<td>KL3202</td>
|
||||
<td>PT100 2 Temperatureingänge (3-Leiter)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: right;">1</td>
|
||||
<td>KL2404</td>
|
||||
<td>4 Digitalausgänge</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: right;">2</td>
|
||||
<td>KL9010</td>
|
||||
<td>Endklemme</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
|
@ -0,0 +1,151 @@
|
|||
import pyladoc.latex
|
||||
|
||||
|
||||
def normalize_latex_code(latex_code: str) -> str:
|
||||
return '\n'.join(line.strip() for line in latex_code.splitlines() if line)
|
||||
|
||||
|
||||
def check_only_ascii(latex_code: str) -> bool:
|
||||
return all(ord(c) < 128 for c in latex_code)
|
||||
|
||||
|
||||
def test_latex_from_html():
|
||||
html_code = """
|
||||
<h1>Test</h1>
|
||||
<p>This is are Umlautes: Ä,Ö and Ü</p>
|
||||
<p>This is a <b>test</b>.</p>
|
||||
<p>And this is another <em>test</em>.</p>
|
||||
<p>And this is a <strong>third</strong> test.</p>
|
||||
<p>And this is a <i>fourth</i> test.</p>
|
||||
<p>This is a LaTeX command: \\textbf{test}</p>
|
||||
<p>This are typical control characters: {, }, <, >, ", ', &, |, /, \\</p>
|
||||
<ul>
|
||||
<li>Item 1</li>
|
||||
<li>Item 2</li>
|
||||
</ul>
|
||||
<table>
|
||||
<tr>
|
||||
<th>Header 1</th>
|
||||
<th>Header 2</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Cell 1</td>
|
||||
<td>Cell 2</td>
|
||||
</tr>
|
||||
</table>
|
||||
"""
|
||||
|
||||
latex_code = pyladoc.latex.from_html(html_code)
|
||||
|
||||
ref_latex_code = r"""
|
||||
\section{Test}
|
||||
This is are Umlautes: {\"A},{\"O} and {\"U}
|
||||
This is a \textbf{test}.
|
||||
And this is another \emph{test}.
|
||||
And this is a \textbf{third} test.
|
||||
And this is a \emph{fourth} test.
|
||||
This is a LaTeX command: \textbackslash{}textbf\{test\}
|
||||
This are typical control characters: \{, \}, {\textless}, {\textgreater}, ", ', \&, |, /, \textbackslash{}
|
||||
\begin{itemize}
|
||||
\item Item 1
|
||||
\item Item 2
|
||||
\end{itemize}
|
||||
\begin{tabular}{ll}\toprule
|
||||
Header 1 & Header 2 \\
|
||||
\midrule
|
||||
Cell 1 & Cell 2 \\
|
||||
\bottomrule
|
||||
\end{tabular}"""
|
||||
|
||||
print(latex_code)
|
||||
|
||||
print('--')
|
||||
|
||||
# print(pyladoc.latex.escape_text(html_code))
|
||||
|
||||
assert check_only_ascii(latex_code), 'Some characters are not ASCII'
|
||||
assert normalize_latex_code(ref_latex_code) == normalize_latex_code(latex_code)
|
||||
|
||||
|
||||
def test_latex_from_markdown():
|
||||
markdown_code = """
|
||||
## Test1
|
||||
|
||||
| Anz.| Typ | Beschreibung
|
||||
|----:|----------|------------------------------------
|
||||
| 12 | BK9050 | Buskoppler
|
||||
| 2 | KL1104 | 4 Digitaleingänge
|
||||
| 2 | KL2404 | 4 Digitalausgänge (0,5 A)
|
||||
| 3 | KL2424 | 4 Digitalausgänge (2 A)
|
||||
| 2 | KL4004 | 4 Analogausgänge
|
||||
| 1 | KL4002 | 2 Analogausgänge
|
||||
| 22 | KL9188 | Potenzialverteilungsklemme
|
||||
| 1 | KL9100 | Potenzialeinspeiseklemme
|
||||
| 3 | KL3054 | 4 Analogeingänge
|
||||
| 5 | KL3214 | PT100 4 Temperatureingänge (3-Leiter)
|
||||
| 3 | KL3202 | PT100 2 Temperatureingänge (3-Leiter)
|
||||
| 1 | KL2404 | 4 Digitalausgänge
|
||||
| 2 | KL9010 | Endklemme
|
||||
|
||||
This is a **test**.
|
||||
|
||||
## Test2
|
||||
|
||||
| Anz.| Beschreibung
|
||||
|----:|------------------------------------
|
||||
| 12 | Buskoppler
|
||||
| 2 | 4 Digitaleingänge
|
||||
| 2 | 4 Digitalausgänge (0,5 A)
|
||||
| 3 | 4 Digitalausgänge (2 A)
|
||||
| 2 | 4 Analogausgänge
|
||||
| 1 | 2 Analogausgänge
|
||||
"""
|
||||
|
||||
pyla = pyladoc.DocumentWriter()
|
||||
pyla.add_markdown(markdown_code)
|
||||
latex_code = pyladoc.latex.from_html(pyla.to_html())
|
||||
|
||||
ref_latex_code = r"""
|
||||
\subsection{Test1}
|
||||
\begin{tabular}{rll}\toprule
|
||||
Anz. & Typ & Beschreibung \\
|
||||
\midrule
|
||||
12 & BK9050 & Buskoppler \\
|
||||
2 & KL1104 & 4 Digitaleing{\"a}nge \\
|
||||
2 & KL2404 & 4 Digitalausg{\"a}nge (0,5 A) \\
|
||||
3 & KL2424 & 4 Digitalausg{\"a}nge (2 A) \\
|
||||
2 & KL4004 & 4 Analogausg{\"a}nge \\
|
||||
1 & KL4002 & 2 Analogausg{\"a}nge \\
|
||||
22 & KL9188 & Potenzialverteilungsklemme \\
|
||||
1 & KL9100 & Potenzialeinspeiseklemme \\
|
||||
3 & KL3054 & 4 Analogeing{\"a}nge \\
|
||||
5 & KL3214 & PT100 4 Temperatureing{\"a}nge (3-Leiter) \\
|
||||
3 & KL3202 & PT100 2 Temperatureing{\"a}nge (3-Leiter) \\
|
||||
1 & KL2404 & 4 Digitalausg{\"a}nge \\
|
||||
2 & KL9010 & Endklemme \\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
This is a \textbf{test}.
|
||||
|
||||
\subsection{Test2}
|
||||
\begin{tabular}{rl}\toprule
|
||||
Anz. & Beschreibung \\
|
||||
\midrule
|
||||
12 & Buskoppler \\
|
||||
2 & 4 Digitaleing{\"a}nge \\
|
||||
2 & 4 Digitalausg{\"a}nge (0,5 A) \\
|
||||
3 & 4 Digitalausg{\"a}nge (2 A) \\
|
||||
2 & 4 Analogausg{\"a}nge \\
|
||||
1 & 2 Analogausg{\"a}nge \\
|
||||
\bottomrule
|
||||
\end{tabular}"""
|
||||
|
||||
print(latex_code)
|
||||
|
||||
assert check_only_ascii(latex_code), 'Some characters are not ASCII'
|
||||
assert normalize_latex_code(ref_latex_code) == normalize_latex_code(latex_code)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_latex_from_html()
|
||||
test_latex_from_markdown()
|
|
@ -0,0 +1,108 @@
|
|||
import pyladoc
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import document_validation
|
||||
|
||||
VALIDATE_HTML_CODE_ONLINE = False
|
||||
WRITE_RESULT_FILES = True
|
||||
|
||||
|
||||
def make_document():
|
||||
dw = pyladoc.DocumentWriter()
|
||||
|
||||
dw.add_markdown("""
|
||||
# Special characters
|
||||
|
||||
ö ä ü Ö Ä Ü ß @ ∆
|
||||
|
||||
π ≈ ± ∆ Σ
|
||||
|
||||
£ ¥ $ €
|
||||
|
||||
Œ
|
||||
|
||||
# Link
|
||||
|
||||
This is a hyperlink: [nonan.net](https://www.nonan.net)
|
||||
|
||||
# Table
|
||||
|
||||
| Anz.| Typ | Beschreibung
|
||||
|----:|----------|------------------------------------
|
||||
| 12 | BK9050 | Buskoppler
|
||||
| 2 | KL1104 | 4 Digitaleingänge
|
||||
| 2 | KL2404 | 4 Digitalausgänge (0,5 A)
|
||||
| 3 | KL2424 | 4 Digitalausgänge (2 A)
|
||||
| 2 | KL4004 | 4 Analogausgänge
|
||||
| 1 | KL4002 | 2 Analogausgänge
|
||||
| 22 | KL9188 | Potenzialverteilungsklemme
|
||||
| 1 | KL9100 | Potenzialeinspeiseklemme
|
||||
| 3 | KL3054 | 4 Analogeingänge
|
||||
| 5 | KL3214 | PT100 4 Temperatureingänge (3-Leiter)
|
||||
| 3 | KL3202 | PT100 2 Temperatureingänge (3-Leiter)
|
||||
| 1 | KL2404 | 4 Digitalausgänge
|
||||
| 2 | KL9010 | Endklemme
|
||||
|
||||
---
|
||||
|
||||
# Equations
|
||||
|
||||
This line represents a reference to the equation @eq:test1.
|
||||
""")
|
||||
|
||||
dw.add_equation(r'y = a + b * \sum_{i=0}^{\infty} a_i x^i', 'test1')
|
||||
|
||||
# Figure
|
||||
fig, ax = plt.subplots()
|
||||
|
||||
fruits = ['apple', 'blueberry', 'cherry', 'orange']
|
||||
counts = [40, 100, 30, 55]
|
||||
bar_labels = ['red', 'blue', '_red', 'orange']
|
||||
bar_colors = ['tab:red', 'tab:blue', 'tab:red', 'tab:orange']
|
||||
|
||||
ax.bar(fruits, counts, label=bar_labels, color=bar_colors)
|
||||
ax.set_ylabel('fruit supply')
|
||||
ax.set_title('Fruit supply by kind and color')
|
||||
ax.legend(title='Fruit color')
|
||||
|
||||
dw.add_diagram(fig, 'Bar chart with individual bar colors')
|
||||
|
||||
# Table
|
||||
mydataset = {
|
||||
'Row1': ["Line1", "Line2", "Line3", "Line4", "Line5"],
|
||||
'Row2': [120, '95 km/h', 110, '105 km/h', 130],
|
||||
'Row3': ['12 g/km', '> 150 g/km', '110 g/km', '1140 g/km', '13.05 g/km'],
|
||||
'Row4': ['5 stars', '4 stars', '5 stars', '4.5 stars', '5 stars'],
|
||||
'Row5': [3.5, 7.8, 8.5, 6.9, 4.2],
|
||||
'Row6': ['1850 kg', '1500 kg', '1400 kg', '1600 kg', '1700 kg'],
|
||||
'Row7': ['600 Nm', '250 Nm', '280 Nm', '320 Nm', '450 Nm']
|
||||
}
|
||||
df = pd.DataFrame(mydataset)
|
||||
|
||||
dw.add_table(df.style.hide(axis="index"), 'This is a example table', 'example1')
|
||||
|
||||
return dw
|
||||
|
||||
|
||||
def test_html_render():
|
||||
doc = make_document()
|
||||
html_code = doc.to_html()
|
||||
|
||||
document_validation.validate_html(html_code, VALIDATE_HTML_CODE_ONLINE)
|
||||
|
||||
if WRITE_RESULT_FILES:
|
||||
with open('tests/out/test_html_render.html', 'w', encoding='utf-8') as f:
|
||||
f.write(pyladoc.inject_to_template(html_code, internal_template='templates/test_template.html'))
|
||||
|
||||
|
||||
def test_latex_render():
|
||||
doc = make_document()
|
||||
|
||||
# print(doc.to_latex())
|
||||
|
||||
assert doc.to_pdf('tests/out/test_latex_render.pdf', font_family='serif')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_html_render()
|
||||
test_latex_render()
|
|
@ -0,0 +1,152 @@
|
|||
import pyladoc
|
||||
import document_validation
|
||||
|
||||
VALIDATE_HTML_CODE_ONLINE = False
|
||||
WRITE_RESULT_FILES = True
|
||||
|
||||
|
||||
def test_markdown_styling():
|
||||
pyla = pyladoc.DocumentWriter()
|
||||
pyla.add_markdown(
|
||||
"""
|
||||
Below is an in-depth explanation of the AArch64 (ARM64)
|
||||
unconditional branch instruction—often simply called the
|
||||
“B” instruction—and how its 26‐bit immediate field (imm26)
|
||||
is laid out and later relocated during linking.
|
||||
|
||||
---
|
||||
|
||||
## Instruction Layout
|
||||
|
||||
The unconditional branch in AArch64 is encoded in a 32‑bit
|
||||
instruction. Its layout is as follows:
|
||||
|
||||
```
|
||||
Bits: 31 26 25 0
|
||||
+-------------+------------------------------+
|
||||
| Opcode | imm26 |
|
||||
+-------------+------------------------------+
|
||||
```
|
||||
|
||||
- **Opcode (bits 31:26):**
|
||||
- For a plain branch (`B`), the opcode is `000101`.
|
||||
- For a branch with link (`BL`), which saves the return
|
||||
address (i.e., a call), the opcode is `100101`.
|
||||
These 6 bits determine the instruction type.
|
||||
|
||||
- **Immediate Field (imm26, bits 25:0):**
|
||||
- This 26‑bit field holds a signed immediate value.
|
||||
- **Offset Calculation:** At runtime, the processor:
|
||||
1. **Shifts** the 26‑bit immediate left by 2 bits.
|
||||
(Because instructions are 4-byte aligned,
|
||||
the two least-significant bits are always zero.)
|
||||
2. **Sign-extends** the resulting 28‑bit value to
|
||||
the full register width (typically 64 bits).
|
||||
3. **Adds** this value to the program counter
|
||||
(PC) to obtain the branch target.
|
||||
|
||||
- **Reach:**
|
||||
- With a 26‑bit signed field that’s effectively 28 bits
|
||||
after the shift, the branch can cover a range
|
||||
of approximately ±128 MB from the current instruction.
|
||||
""")
|
||||
|
||||
html_code = pyla.to_html()
|
||||
document_validation.validate_html(html_code, check_for=['strong', 'ol', 'li', 'code', 'hr'])
|
||||
|
||||
if WRITE_RESULT_FILES:
|
||||
with open('tests/out/test_markdown_style.html', 'w', encoding='utf-8') as f:
|
||||
f.write(html_code)
|
||||
|
||||
|
||||
def test_markdown_table():
|
||||
pyla = pyladoc.DocumentWriter()
|
||||
pyla.add_markdown(
|
||||
"""
|
||||
## Klemmen
|
||||
|
||||
| Anz.| Typ | Beschreibung
|
||||
|----:|----------|------------------------------------
|
||||
| 12 | BK9050 | Buskoppler
|
||||
| 2 | KL1104 | 4 Digitaleingänge
|
||||
| 2 | KL2404 | 4 Digitalausgänge (0,5 A)
|
||||
| 3 | KL2424 | 4 Digitalausgänge (2 A)
|
||||
| 2 | KL4004 | 4 Analogausgänge
|
||||
| 1 | KL4002 | 2 Analogausgänge
|
||||
| 22 | KL9188 | Potenzialverteilungsklemme
|
||||
| 1 | KL9100 | Potenzialeinspeiseklemme
|
||||
| 3 | KL3054 | 4 Analogeingänge
|
||||
| 5 | KL3214 | PT100 4 Temperatureingänge (3-Leiter)
|
||||
| 3 | KL3202 | PT100 2 Temperatureingänge (3-Leiter)
|
||||
| 1 | KL2404 | 4 Digitalausgänge
|
||||
| 2 | KL9010 | Endklemme
|
||||
""")
|
||||
|
||||
html_code = pyla.to_html()
|
||||
document_validation.validate_html(html_code, check_for=['table'])
|
||||
|
||||
if WRITE_RESULT_FILES:
|
||||
with open('tests/out/test_markdown_table.html', 'w', encoding='utf-8') as f:
|
||||
f.write(html_code)
|
||||
|
||||
|
||||
def test_markdown_equations():
|
||||
pyla = pyladoc.DocumentWriter()
|
||||
pyla.add_markdown(
|
||||
"""
|
||||
# Source Equations
|
||||
1. $4(3x + 2) - 5(x - 1) = 3x + 14$
|
||||
2. $\frac{2y + 5}{4} + \frac{3y - 1}{2} = 5$
|
||||
3. $\frac{5}{x + 2} + \frac{2}{x - 2} = 3$
|
||||
4. $8(3b - 5) + 4(b + 2) = 60$
|
||||
5. $2c^2 - 3c - 5 = 0$
|
||||
6. $4(2d - 1) + 5(3d + 2) = 7d + 28$
|
||||
7. $q^2 + 6q + 9 = 16$
|
||||
|
||||
# Result Equations
|
||||
1. $x = \frac{1}{4}$
|
||||
2. $y = \frac{17}{8}$
|
||||
3. $z = \frac{7}{3}$
|
||||
4. $x = 1$ or $x = -6$
|
||||
5. $a = \frac{1}{3}$ or $a = 2$
|
||||
6. $x = -\frac{2}{3}$ or $x = 3$
|
||||
7. $b = \frac{23}{7}$
|
||||
|
||||
# Step by Step
|
||||
1. Distribute: $12x + 8 - 5x + 5 = 3x + 14$
|
||||
2. Combine like terms: $7x + 13 = 3x + 14$
|
||||
3. Subtract $3x$: $4x + 13 = 14$
|
||||
4. Subtract $13$: $4x = 1$
|
||||
5. Divide by $4$: $x = \frac{1}{4}$
|
||||
""")
|
||||
|
||||
html_code = pyla.to_html()
|
||||
document_validation.validate_html(html_code, check_for=['h1'])
|
||||
|
||||
if WRITE_RESULT_FILES:
|
||||
with open('tests/out/test_markdown_equations.html', 'w', encoding='utf-8') as f:
|
||||
f.write(html_code)
|
||||
|
||||
|
||||
def test_markdown_characters():
|
||||
pyla = pyladoc.DocumentWriter()
|
||||
pyla.add_markdown(
|
||||
"""
|
||||
# Special caracters
|
||||
|
||||
Umlaute: ÖÄÜ öäü
|
||||
|
||||
Other: ß, €, @, $, %, ~, µ
|
||||
|
||||
Units: m³, cm²
|
||||
|
||||
Controll characters: <, >, ", ', &, |, /, \\
|
||||
|
||||
""")
|
||||
|
||||
html_code = pyla.to_html()
|
||||
document_validation.validate_html(html_code, check_for=['h1'])
|
||||
|
||||
if WRITE_RESULT_FILES:
|
||||
with open('tests/out/test_markdown_characters.html', 'w', encoding='utf-8') as f:
|
||||
f.write(html_code)
|
Loading…
Reference in New Issue