copapy/docs/source/extract_section.py

35 lines
1.2 KiB
Python
Raw Normal View History

2025-12-06 22:25:15 +00:00
import re
def extract_sections(md_text: str) -> dict[str, str]:
"""
Extracts sections based on headings (#...).
Returns {heading_text: section_content}
Works for simple Markdown, not fully strict.
"""
# regex captures: heading marks (###...), heading text, and the following content
pattern = re.compile(
r'^(#{1,6})\s+(.*?)\s*$' # heading level + heading text
r'(.*?)' # section content (lazy)
r'(?=^#{1,6}\s+|\Z)', # stop at next heading or end of file
re.MULTILINE | re.DOTALL
)
sections: dict[str, str] = {}
for _, title, content in pattern.findall(md_text):
2025-12-22 14:39:17 +00:00
assert isinstance(content, str)
sections[title] = content.strip().replace('](docs/source/media/', '](media/')
2025-12-06 22:25:15 +00:00
return sections
if __name__ == '__main__':
with open('README.md', 'rt') as f:
readme = extract_sections(f.read())
with open('docs/source/start.md', 'wt') as f:
2025-12-22 14:39:17 +00:00
f.write('\n'.join(f"# {s}\n" + readme[s] for s in ['Copapy', 'Current state', 'Install', 'License']))
with open('docs/source/compiler.md', 'wt') as f:
f.write('# How it works\n')
f.write('\n'.join(readme[s] for s in ['How it works']))