gaspype/tests/md_to_code.py

130 lines
4.7 KiB
Python
Raw Normal View History

import re
from typing import Generator, Iterable
from dataclasses import dataclass
import sys
@dataclass
class markdown_segment:
code_block: bool
language: str
text: str
def convert_to(target_format: str, md_filename: str, out_filename: str, language: str = 'python'):
with open(md_filename, "r") as f_in, open(out_filename, "w") as f_out:
segments = segment_markdown(f_in)
if target_format == 'test':
f_out.write('\n'.join(segments_to_test(segments, language)))
elif target_format == 'script':
f_out.write('\n'.join(segments_to_script(segments, language)))
elif target_format == 'striped_markdown':
f_out.write('\n'.join(segments_to_striped_markdown(segments, language)))
else:
raise ValueError('Unknown target format')
def segment_markdown(markdown_file: Iterable[str]) -> Generator[markdown_segment, None, None]:
regex = re.compile(r"(?:^```\s*(?P<language>(?:\w|-)*)$)", re.MULTILINE)
block_language: str = ''
code_block = False
line_buffer: list[str] = []
for line in markdown_file:
match = regex.match(line)
if match:
if line_buffer:
yield markdown_segment(code_block, block_language, ''.join(line_buffer))
line_buffer.clear()
block_language = match.group('language')
code_block = not code_block
else:
line_buffer.append(line)
if line_buffer:
yield markdown_segment(code_block, block_language, '\n'.join(line_buffer))
def segments_to_script(segments: Iterable[markdown_segment], test_language: str = "python") -> Generator[str, None, None]:
for segment in segments:
if segment.code_block:
if segment.language == test_language:
yield segment.text
else:
for line in segment.text.splitlines():
yield '# | ' + line
yield ''
else:
for line in segment.text.strip(' \n').splitlines():
yield '# ' + line
yield ''
def segments_to_striped_markdown(segments: Iterable[markdown_segment], test_language: str = "python") -> Generator[str, None, None]:
for segment in segments:
if segment.code_block:
if segment.language == test_language:
yield "``` " + test_language
yield segment.text
yield "```"
elif segment.language:
for line in segment.text.splitlines():
yield '# | ' + line
yield ''
else:
for line in segment.text.strip(' \n').splitlines():
yield '# ' + line
yield ''
def segments_to_test(segments: Iterable[markdown_segment], script_language: str = "python") -> Generator[str, None, None]:
ret_block_flag = False
yield 'def run_test():'
for segment in segments:
if segment.code_block:
if segment.language == script_language:
lines = [line for line in segment.text.splitlines() if line.strip()]
ret_block_flag = lines[-1] if (not re.match(r'^[^(]*=', lines[-1]) and
not lines[-1].startswith('import ') and
not lines[-1].startswith('from ') and
not lines[-1].startswith('print(') and
not lines[-1].startswith(' ')) else None
# print('Last line: ', ret_block_flag, '-----------', lines[-1])
yield ''
yield ' print("---------------------------------------------------------")'
yield ''
if ret_block_flag:
yield from [' ' + str(line) for line in segment.text.splitlines()[:-1]]
yield f' print("-- Result (({ret_block_flag})):")'
yield f' print(({ret_block_flag}).__repr__().strip())'
else:
yield from [' ' + str(line) for line in segment.text.splitlines()]
elif ret_block_flag:
yield ' ref_str = r"""'
yield from [str(line) for line in segment.text.splitlines()]
yield '"""'
yield f' print("-- Reference (({ret_block_flag})):")'
yield ' print(ref_str.strip())'
yield f' assert ({ret_block_flag}).__repr__().strip() == ref_str.strip()'
ret_block_flag = False
yield '\nif __name__ == "__main__":'
yield ' run_test()'
if __name__ == "__main__":
format = sys.argv[1]
assert format in ['test', 'script']
convert_to(sys.argv[1], sys.argv[2], sys.argv[3])