pyladoc/tests/document_validation.py

65 lines
2.4 KiB
Python
Raw Normal View History

2025-03-28 12:30:08 +00:00
from typing import Generator, Any
from lxml import etree
from lxml.etree import _Element as EElement # type: ignore
import requests
with open('src/pyladoc/templates/test_template.html', mode='rt', encoding='utf-8') as f:
html_test_template = f.read()
def add_line_numbers(multiline_string: str) -> str:
lines = multiline_string.splitlines()
numbered_lines = [f"{i + 1}: {line}" for i, line in enumerate(lines)]
return "\n".join(numbered_lines)
def validate_html_with_w3c(html_string: str) -> dict[str, Any]:
validator_url = "https://validator.w3.org/nu/"
# Parameters for the POST request
headers = {
"Content-Type": "text/html; charset=utf-8",
"User-Agent": "Python HTML Validator"}
try:
response = requests.post(validator_url, headers=headers, data=html_string, params={"out": "json"})
if response.status_code == 200:
return response.json()
else:
return {
"error": f"Failed to validate HTML. Status code: {response.status_code}",
"details": response.text
}
except requests.RequestException as e:
return {"error": f"An error occurred while connecting to the W3C Validator: {str(e)}"}
def validate_html(html_string: str, validate_online: bool = False, check_for: list['str'] = ['table', 'svg', 'div']):
root = etree.fromstring(html_string, parser=etree.HTMLParser(recover=True))
def recursive_search(element: EElement) -> Generator[str, None, None]:
if isinstance(element.tag, str):
yield element.tag
for child in element:
yield from recursive_search(child)
tags = set(recursive_search(root))
for tag_type in check_for:
assert tag_type in tags, f"Tag {tag_type} not found in the html code"
if validate_online:
test_page = html_test_template.replace('<!--CONTENT-->', html_string)
validation_result = validate_html_with_w3c(test_page)
assert 'messages' in validation_result, 'Validate request failed'
if validation_result['messages']:
print(add_line_numbers(test_page))
for verr in validation_result['messages']:
print(f"- {verr['type']}: {verr['message']} (line: {verr['lastLine']})")
assert len(validation_result['messages']) == 0, f'{len(validation_result["messages"])} validation error, first error: {validation_result["messages"][0]["message"]}'