From 354af7cf00ce29197e02b6b59846125009297bae Mon Sep 17 00:00:00 2001 From: Nicolas Kruse Date: Wed, 22 Oct 2025 23:17:53 +0200 Subject: [PATCH] stencil generation script moved and refactored --- stencils/aux_functions.c | 26 +++++++ {tools => stencils}/generate_stencils.py | 88 +++++++++++++++--------- tools/build.sh | 2 +- tools/crosscompile.sh | 4 +- tools/nativecompile.sh | 4 +- 5 files changed, 85 insertions(+), 39 deletions(-) create mode 100644 stencils/aux_functions.c rename {tools => stencils}/generate_stencils.py (77%) diff --git a/stencils/aux_functions.c b/stencils/aux_functions.c new file mode 100644 index 0000000..de2c562 --- /dev/null +++ b/stencils/aux_functions.c @@ -0,0 +1,26 @@ +#include + +//double (*math_pow)(double, double); + +volatile int dummy_int = 1337; +volatile float dummy_float = 1337; + +__attribute__((noinline)) int floor_div(float arg1, float arg2) { + float x = arg1 / arg2; + int i = (int)x; + if (x < 0 && x != (float)i) i -= 1; + return i; +} + +float fast_pow_float(float base, float exponent) { + union { + float f; + uint32_t i; + } u; + + u.f = base; + int32_t x = u.i; + int32_t y = (int32_t)(exponent * (x - 1072632447) + 1072632447); + u.i = (uint32_t)y; + return u.f; +} \ No newline at end of file diff --git a/tools/generate_stencils.py b/stencils/generate_stencils.py similarity index 77% rename from tools/generate_stencils.py rename to stencils/generate_stencils.py index f81eb32..146c81e 100644 --- a/tools/generate_stencils.py +++ b/stencils/generate_stencils.py @@ -1,5 +1,7 @@ -from typing import Generator +from typing import Generator, Callable import argparse +from pathlib import Path +import os op_signs = {'add': '+', 'sub': '-', 'mul': '*', 'div': '/', 'pow': '**', 'gt': '>', 'eq': '==', 'ne': '!=', 'mod': '%'} @@ -9,8 +11,44 @@ stencil_func_prefix = '__attribute__((naked)) ' # Remove callee prolog stack_size = 64 +includes = ['aux_functions.c'] -def get_aux_funcs() -> str: +def read_files(files: list[str]) -> str: + ret = '' + script_dir = Path(__file__).parent + for file_name in files: + file_path = script_dir / file_name + if not os.path.exists(file_path): + file_path = Path(file_name) + with open(file_path) as f: + ret += f.read().strip(' \n') + '\n\n' + return ret + + +def normalize_indent(text: str) -> str: + text_lines = text.splitlines() + if len(text_lines) > 1 and not text_lines[0].strip(): + text_lines = text_lines[1:] + + if not text_lines: + return '' + + if len(text_lines) > 1 and text_lines[0] and text_lines[0][0] != ' ': + indent_amount = len(text_lines[1]) - len(text_lines[1].lstrip()) + else: + indent_amount = len(text_lines[0]) - len(text_lines[0].lstrip()) + + return '\n' + '\n'.join( + [' ' * max(0, len(line) - len(line.strip()) - indent_amount) + line.strip() + for line in text_lines]) + + +def norm_indent(f: Callable[..., str]) -> Callable[..., str]: + return lambda *x: normalize_indent(f(*x)) + + +@norm_indent +def get_entry_function_shell() -> str: return f""" {entry_func_prefix}int entry_function_shell(){{ volatile char stack_place_holder[{stack_size}]; @@ -18,30 +56,10 @@ def get_aux_funcs() -> str: result_int(0); return 1; }} - - """ + """ - __attribute__((noinline)) int floor_div(float arg1, float arg2) { - float x = arg1 / arg2; - int i = (int)x; - if (x < 0 && x != (float)i) i -= 1; - return i; - } - - float fast_pow_float(float base, float exponent) { - union { - float f; - uint32_t i; - } u; - - u.f = base; - int32_t x = u.i; - int32_t y = (int32_t)(exponent * (x - 1072632447) + 1072632447); - u.i = (uint32_t)y; - return u.f; - } """ +@norm_indent def get_op_code(op: str, type1: str, type2: str, type_out: str) -> str: return f""" {stencil_func_prefix}void {op}_{type1}_{type2}({type1} arg1, {type2} arg2) {{ @@ -50,6 +68,7 @@ def get_op_code(op: str, type1: str, type2: str, type_out: str) -> str: """ +@norm_indent def get_cast(type1: str, type2: str, type_out: str) -> str: return f""" {stencil_func_prefix}void cast_{type_out}_{type1}_{type2}({type1} arg1, {type2} arg2) {{ @@ -58,6 +77,7 @@ def get_cast(type1: str, type2: str, type_out: str) -> str: """ +@norm_indent def get_conv_code(type1: str, type2: str, type_out: str) -> str: return f""" {stencil_func_prefix}void conv_{type1}_{type2}({type1} arg1, {type2} arg2) {{ @@ -66,6 +86,7 @@ def get_conv_code(type1: str, type2: str, type_out: str) -> str: """ +@norm_indent def get_op_code_float(op: str, type1: str, type2: str) -> str: return f""" {stencil_func_prefix}void {op}_{type1}_{type2}({type1} arg1, {type2} arg2) {{ @@ -74,6 +95,7 @@ def get_op_code_float(op: str, type1: str, type2: str) -> str: """ +@norm_indent def get_pow(type1: str, type2: str) -> str: return f""" {stencil_func_prefix}void pow_{type1}_{type2}({type1} arg1, {type2} arg2) {{ @@ -83,6 +105,7 @@ def get_pow(type1: str, type2: str) -> str: """ +@norm_indent def get_floordiv(op: str, type1: str, type2: str) -> str: if type1 == 'int' and type2 == 'int': return f""" @@ -98,18 +121,21 @@ def get_floordiv(op: str, type1: str, type2: str) -> str: """ +@norm_indent def get_result_stubs1(type1: str) -> str: return f""" void result_{type1}({type1} arg1); """ +@norm_indent def get_result_stubs2(type1: str, type2: str) -> str: return f""" void result_{type1}_{type2}({type1} arg1, {type2} arg2); """ +@norm_indent def get_read_reg0_code(type1: str, type2: str, type_out: str) -> str: return f""" {stencil_func_prefix}void read_{type_out}_reg0_{type1}_{type2}({type1} arg1, {type2} arg2) {{ @@ -118,6 +144,7 @@ def get_read_reg0_code(type1: str, type2: str, type_out: str) -> str: """ +@norm_indent def get_read_reg1_code(type1: str, type2: str, type_out: str) -> str: return f""" {stencil_func_prefix}void read_{type_out}_reg1_{type1}_{type2}({type1} arg1, {type2} arg2) {{ @@ -126,6 +153,7 @@ def get_read_reg1_code(type1: str, type2: str, type_out: str) -> str: """ +@norm_indent def get_write_code(type1: str) -> str: return f""" {stencil_func_prefix}void write_{type1}({type1} arg1) {{ @@ -154,17 +182,9 @@ if __name__ == "__main__": if args.abi: entry_func_prefix = f"__attribute__(({args.abi}_abi)) " - code = """ - // Auto-generated stencils for copapy - // Do not edit manually + code = "// Auto-generated stencils for copapy - Do not edit manually\n\n" - #include - - double (*math_pow)(double, double); - - volatile int dummy_int = 1337; - volatile float dummy_float = 1337; - """ + code += read_files(includes) # Scalar arithmetic: types = ['int', 'float'] @@ -176,7 +196,7 @@ if __name__ == "__main__": for t1, t2 in permutate(types, types): code += get_result_stubs2(t1, t2) - code += get_aux_funcs() + code += get_entry_function_shell() for t1, t2 in permutate(types, types): t_out = 'int' if t1 == 'float' else 'float' diff --git a/tools/build.sh b/tools/build.sh index c344e30..4513ed1 100644 --- a/tools/build.sh +++ b/tools/build.sh @@ -4,7 +4,7 @@ set -v mkdir -p bin SRC=bin/stencils.c DEST=src/copapy/obj -python3 tools/generate_stencils.py $SRC +python3 stencils/generate_stencils.py $SRC mkdir -p $DEST gcc --version #gcc -c $SRC -O0 -o $DEST/stencils_x86_64_O0.o diff --git a/tools/crosscompile.sh b/tools/crosscompile.sh index d81b3e8..57d4a2f 100644 --- a/tools/crosscompile.sh +++ b/tools/crosscompile.sh @@ -11,7 +11,7 @@ OPT=O3 mkdir -p $DEST # Windows x86_64 (ARM64) -python3 tools/generate_stencils.py --abi ms $SRC +python3 stencils/generate_stencils.py --abi ms $SRC gcc-13 -$OPT -c $SRC -o $DEST/stencils_AMD64_$OPT.o # Windows x86 @@ -19,7 +19,7 @@ gcc-13 -m32 -$OPT -c $SRC -o $DEST/stencils_x86_$OPT.o # Native x86_64 -python3 tools/generate_stencils.py $SRC +python3 stencils/generate_stencils.py $SRC gcc-13 -$OPT -c $SRC -o $DEST/stencils_x86_64_$OPT.o # Native i686 diff --git a/tools/nativecompile.sh b/tools/nativecompile.sh index a3da168..078e415 100644 --- a/tools/nativecompile.sh +++ b/tools/nativecompile.sh @@ -6,7 +6,7 @@ mkdir -p bin SRC=bin/stencils.c DEST=src/copapy/obj -python tools/generate_stencils.py $SRC +python stencils/generate_stencils.py $SRC mkdir -p $DEST gcc-12 -c $SRC -O0 -o $DEST/stencils_x86_64_O0.o @@ -14,7 +14,7 @@ gcc-12 -c $SRC -O1 -o $DEST/stencils_x86_64_O1.o gcc-12 -c $SRC -O2 -o $DEST/stencils_x86_64_O2.o gcc-12 -c $SRC -O3 -o $DEST/stencils_x86_64_O3.o -python tools/generate_stencils.py --abi ms $SRC +python stencils/generate_stencils.py --abi ms $SRC gcc-12 -c $SRC -O0 -o $DEST/stencils_AMD64_O0.o gcc-12 -c $SRC -O1 -o $DEST/stencils_AMD64_O1.o