From 9d8f5d39306324966e1966d640650d11bc8475ea Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 12 Nov 2025 21:53:10 +0100 Subject: [PATCH] stencil generation and helper code updated to use musl math --- stencils/aux_functions.c | 16 -------- stencils/generate_stencils.py | 68 ++++++++++++++++++------------- stencils/stencil_helper.h | 10 ++--- stencils/trigonometry.c | 76 ----------------------------------- 4 files changed, 43 insertions(+), 127 deletions(-) delete mode 100644 stencils/trigonometry.c diff --git a/stencils/aux_functions.c b/stencils/aux_functions.c index a68d43f..f82a668 100644 --- a/stencils/aux_functions.c +++ b/stencils/aux_functions.c @@ -1,8 +1,6 @@ #include #include "stencil_helper.h" -//double (*math_pow)(double, double); - volatile extern int dummy_int; volatile extern float dummy_float; @@ -13,20 +11,6 @@ int floor_div(float arg1, float arg2) { return i; } -float aux_sqrt(float x) { - return sqrtf(x); -} - float aux_get_42(float n) { return n + 42.0; } - -float aux_log(float x) -{ - return logf(x); -} - -float aux_exp(float x) -{ - return expf(x); -} diff --git a/stencils/generate_stencils.py b/stencils/generate_stencils.py index fac5128..e5c5c66 100644 --- a/stencils/generate_stencils.py +++ b/stencils/generate_stencils.py @@ -9,11 +9,10 @@ op_signs = {'add': '+', 'sub': '-', 'mul': '*', 'div': '/', 'pow': '**', 'bwand': '&', 'bwor': '|', 'bwxor': '^'} entry_func_prefix = '' -stencil_func_prefix = '__attribute__((aligned(1))) ' # Remove function alignment for stencils stack_size = 64 -includes = ['stencil_helper.h', 'aux_functions.c', 'trigonometry.c'] +includes = ['stencil_helper.h', 'aux_functions.c'] def read_files(files: list[str]) -> str: @@ -69,8 +68,7 @@ def get_entry_function_shell() -> str: @norm_indent def get_op_code(op: str, type1: str, type2: str, type_out: str) -> str: return f""" - {stencil_func_prefix}void {op}_{type1}_{type2}({type1} arg1, {type2} arg2) {{ - STENCIL_START({op}_{type1}_{type2}); + STENCIL void {op}_{type1}_{type2}({type1} arg1, {type2} arg2) {{ result_{type_out}_{type2}(arg1 {op_signs[op]} arg2, arg2); }} """ @@ -79,8 +77,7 @@ def get_op_code(op: str, type1: str, type2: str, type_out: str) -> str: @norm_indent def get_cast(type1: str, type2: str, type_out: str) -> str: return f""" - {stencil_func_prefix}void cast_{type_out}_{type1}_{type2}({type1} arg1, {type2} arg2) {{ - STENCIL_START(cast_{type_out}_{type1}_{type2}); + STENCIL void cast_{type_out}_{type1}_{type2}({type1} arg1, {type2} arg2) {{ result_{type_out}_{type2}(({type1})arg1, arg2); }} """ @@ -89,8 +86,7 @@ def get_cast(type1: str, type2: str, type_out: str) -> str: @norm_indent def get_func1(func_name: str, type1: str, type2: str) -> str: return f""" - {stencil_func_prefix}void {func_name}_{type1}_{type2}({type1} arg1, {type2} arg2) {{ - STENCIL_START({func_name}_{type1}_{type2}); + STENCIL void {func_name}_{type1}_{type2}({type1} arg1, {type2} arg2) {{ result_float_{type2}(aux_{func_name}((float)arg1), arg2); }} """ @@ -99,18 +95,34 @@ def get_func1(func_name: str, type1: str, type2: str) -> str: @norm_indent def get_func2(func_name: str, type1: str, type2: str) -> str: return f""" - {stencil_func_prefix}void {func_name}_{type1}_{type2}({type1} arg1, {type2} arg2) {{ - STENCIL_START({func_name}_{type1}_{type2}); + STENCIL void {func_name}_{type1}_{type2}({type1} arg1, {type2} arg2) {{ result_float_{type2}(aux_{func_name}((float)arg1, (float)arg2), arg2); }} """ +@norm_indent +def get_math_func1(func_name: str, type1: str, type2: str) -> str: + return f""" + STENCIL void {func_name}_{type1}_{type2}({type1} arg1, {type2} arg2) {{ + result_float_{type2}({func_name}f((float)arg1), arg2); + }} + """ + + +@norm_indent +def get_math_func2(func_name: str, type1: str, type2: str) -> str: + return f""" + STENCIL void {func_name}_{type1}_{type2}({type1} arg1, {type2} arg2) {{ + result_float_{type2}({func_name}f((float)arg1, (float)arg2), arg2); + }} + """ + + @norm_indent def get_conv_code(type1: str, type2: str, type_out: str) -> str: return f""" - {stencil_func_prefix}void conv_{type1}_{type2}({type1} arg1, {type2} arg2) {{ - STENCIL_START(conv_{type1}_{type2}); + STENCIL void conv_{type1}_{type2}({type1} arg1, {type2} arg2) {{ result_{type_out}_{type2}(({type_out})arg1, arg2); }} """ @@ -119,8 +131,7 @@ def get_conv_code(type1: str, type2: str, type_out: str) -> str: @norm_indent def get_op_code_float(op: str, type1: str, type2: str) -> str: return f""" - {stencil_func_prefix}void {op}_{type1}_{type2}({type1} arg1, {type2} arg2) {{ - STENCIL_START({op}_{type1}_{type2}); + STENCIL void {op}_{type1}_{type2}({type1} arg1, {type2} arg2) {{ result_float_{type2}((float)arg1 {op_signs[op]} (float)arg2, arg2); }} """ @@ -130,16 +141,14 @@ def get_op_code_float(op: str, type1: str, type2: str) -> str: def get_floordiv(op: str, type1: str, type2: str) -> str: if type1 == 'int' and type2 == 'int': return f""" - {stencil_func_prefix}void {op}_{type1}_{type2}({type1} a, {type2} b) {{ - STENCIL_START({op}_{type1}_{type2}); + STENCIL void {op}_{type1}_{type2}({type1} a, {type2} b) {{ int result = a / b - ((a % b != 0) && ((a < 0) != (b < 0))); result_int_{type2}(result, b); }} """ else: return f""" - {stencil_func_prefix}void {op}_{type1}_{type2}({type1} arg1, {type2} arg2) {{ - STENCIL_START({op}_{type1}_{type2}); + STENCIL void {op}_{type1}_{type2}({type1} arg1, {type2} arg2) {{ result_float_{type2}((float)floor_div((float)arg1, (float)arg2), arg2); }} """ @@ -162,8 +171,7 @@ def get_result_stubs2(type1: str, type2: str) -> str: @norm_indent def get_read_reg0_code(type1: str, type2: str, type_out: str) -> str: return f""" - {stencil_func_prefix}void read_{type_out}_reg0_{type1}_{type2}({type1} arg1, {type2} arg2) {{ - STENCIL_START(read_{type_out}_reg0_{type1}_{type2}); + STENCIL void read_{type_out}_reg0_{type1}_{type2}({type1} arg1, {type2} arg2) {{ result_{type_out}_{type2}(dummy_{type_out}, arg2); }} """ @@ -172,8 +180,7 @@ def get_read_reg0_code(type1: str, type2: str, type_out: str) -> str: @norm_indent def get_read_reg1_code(type1: str, type2: str, type_out: str) -> str: return f""" - {stencil_func_prefix}void read_{type_out}_reg1_{type1}_{type2}({type1} arg1, {type2} arg2) {{ - STENCIL_START(read_{type_out}_reg1_{type1}_{type2}); + STENCIL void read_{type_out}_reg1_{type1}_{type2}({type1} arg1, {type2} arg2) {{ result_{type1}_{type_out}(arg1, dummy_{type_out}); }} """ @@ -182,8 +189,7 @@ def get_read_reg1_code(type1: str, type2: str, type_out: str) -> str: @norm_indent def get_write_code(type1: str, type2: str) -> str: return f""" - {stencil_func_prefix}void write_{type1}_reg0_{type1}_{type2}({type1} arg1, {type2} arg2) {{ - STENCIL_START(write_{type1}); + STENCIL void write_{type1}_reg0_{type1}_{type2}({type1} arg1, {type2} arg2) {{ dummy_{type1} = arg1; result_{type1}_{type2}(arg1, arg2); }} @@ -215,7 +221,7 @@ if __name__ == "__main__": # Scalar arithmetic: types = ['int', 'float'] - ops = ['add', 'sub', 'mul', 'div', 'floordiv', 'gt', 'ge', 'eq', 'ne', 'atan2'] + ops = ['add', 'sub', 'mul', 'div', 'floordiv', 'gt', 'ge', 'eq', 'ne'] int_ops = ['bwand', 'bwor', 'bwxor', 'lshift', 'rshift'] for t1 in types: @@ -230,18 +236,24 @@ if __name__ == "__main__": t_out = 'int' if t1 == 'float' else 'float' code += get_cast(t1, t2, t_out) - fnames = ['sqrt', 'exp', 'log', 'sin', 'cos', 'tan', 'asin', 'atan', 'get_42'] + fnames = ['get_42'] for fn, t1 in permutate(fnames, types): code += get_func1(fn, t1, t1) + fnames = ['sqrt', 'exp', 'log', 'sin', 'cos', 'tan', 'asin', 'atan'] + for fn, t1 in permutate(fnames, types): + code += get_math_func1(fn, t1, t1) + + fnames = ['atan2', 'pow'] + for fn, t1, t2 in permutate(fnames, types, types): + code += get_math_func2(fn, t1, t2) + for op, t1, t2 in permutate(ops, types, types): t_out = t1 if t1 == t2 else 'float' if op == 'floordiv': code += get_floordiv('floordiv', t1, t2) elif op == 'div': code += get_op_code_float(op, t1, t2) - elif op in {'atan2'}: - code += get_func2(op, t1, t2) elif op in {'gt', 'eq', 'ge', 'ne'}: code += get_op_code(op, t1, t2, 'int') else: diff --git a/stencils/stencil_helper.h b/stencils/stencil_helper.h index 992330c..e753047 100644 --- a/stencils/stencil_helper.h +++ b/stencils/stencil_helper.h @@ -1,14 +1,10 @@ #include +// Remove function alignment for stencils #if defined(__GNUC__) #define NOINLINE __attribute__((noinline)) -#define STENCIL_START_EX(funcname) \ - __asm__ __volatile__( \ - ".global stencil_start_" #funcname "\n" \ - "stencil_start_" #funcname ":\n" \ - ) -#define STENCIL_START(funcname) +#define STENCIL __attribute__((aligned(1))) #else #define NOINLINE -#define STENCIL_START(funcname) +#define STENCIL #endif \ No newline at end of file diff --git a/stencils/trigonometry.c b/stencils/trigonometry.c deleted file mode 100644 index 79110fe..0000000 --- a/stencils/trigonometry.c +++ /dev/null @@ -1,76 +0,0 @@ -#include "stencil_helper.h" - -const float PI = 3.14159265358979323846f; -const float PI_2 = 1.57079632679489661923f; // pi/2 -const double TWO_OVER_PI = 0.63661977236758134308; // 2/pi -const double PIO2_HI = 1.57079625129699707031; // pi/2 high part -const double PIO2_LO = 7.54978941586159635335e-08; // pi/2 low part - -float aux_sin(float x) { - return sinf(x); -} - -float aux_cos(float x) { - return cosf(x); -} - -float aux_tan(float x) { - return tanf(x); -} - -float aux_atan(float x) { - const float absx = x < 0 ? -x : x; - - // Coefficients for a rational minimax fit on [0,1] - const float a0 = 0.9998660f; - const float a1 = -0.3302995f; - const float b1 = 0.1801410f; - const float b2 = -0.0126492f; - - float y; - if (absx <= 1.0f) { - float x2 = x * x; - y = x * (a0 + a1 * x2) / (1.0f + b1 * x2 + b2 * x2 * x2); - } else { - float inv = 1.0f / absx; - float x2 = inv * inv; - float core = inv * (a0 + a1 * x2) / (1.0f + b1 * x2 + b2 * x2 * x2); - y = PI_2 - core; - } - - return x < 0 ? -y : y; -} - -float aux_atan2(float y, float x) { - if (x == 0.0f) { - if (y > 0.0f) return PI_2; - if (y < 0.0f) return -PI_2; - return 0.0f; // TODO: undefined - } - - float abs_y = y < 0 ? -y : y; - float abs_x = x < 0 ? -x : x; - float angle; - - if (abs_x > abs_y) - angle = aux_atan(y / x); - else - angle = PI_2 - aux_atan(x / y); - - // Quadrant correction - if (x < 0) angle = (y >= 0) ? angle + PI : angle - PI; - return angle; -} - -float aux_asin(float x) { - if (x > 1.0f) x = 1.0f; - if (x < -1.0f) x = -1.0f; - - const float c3 = 0.16666667f; // ≈ 1/6 - const float c5 = 0.07500000f; // ≈ 3/40 - const float c7 = 0.04464286f; // ≈ 5/112 - - float x2 = x * x; - float p = x + x * x2 * (c3 + x2 * (c5 + c7 * x2)); - return p; -} \ No newline at end of file