From 069f79094c454cdfc4edea44d6a60c89b2817be1 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Fri, 26 Dec 2025 14:43:22 +0100 Subject: [PATCH] fast "abs" function for int and float added including test --- src/copapy/_math.py | 7 +++++-- stencils/aux_functions.c | 7 ------- stencils/generate_stencils.py | 30 +++++++++++++++++++-------- stencils/test.c | 1 - tests/test_math.py | 13 ++++++++---- tools/cross_compiler_unix/packobjs.sh | 4 ++-- 6 files changed, 37 insertions(+), 25 deletions(-) diff --git a/src/copapy/_math.py b/src/copapy/_math.py index cea3824..facd90c 100644 --- a/src/copapy/_math.py +++ b/src/copapy/_math.py @@ -295,8 +295,11 @@ def abs(x: U | value[U] | vector[U]) -> Any: Returns: Absolute value of x """ - ret = (x < 0) * -x + (x >= 0) * x - return ret + if isinstance(x, value): + return add_op('abs', [x]) + if isinstance(x, vector): + return x.map(abs) + return (x < 0) * -x + (x >= 0) * x @overload diff --git a/stencils/aux_functions.c b/stencils/aux_functions.c index a7ab664..dd92c72 100644 --- a/stencils/aux_functions.c +++ b/stencils/aux_functions.c @@ -4,13 +4,6 @@ volatile extern int dummy_int; volatile extern float dummy_float; -int floor_div(float arg1, float arg2) { - float x = arg1 / arg2; - int i = (int)x; - if (x < 0 && x != (float)i) i -= 1; - return i; -} - NOINLINE float auxsub_get_42(int n) { return n * 5.0f + 21.0f; } diff --git a/stencils/generate_stencils.py b/stencils/generate_stencils.py index 0a039d4..80bd15a 100644 --- a/stencils/generate_stencils.py +++ b/stencils/generate_stencils.py @@ -84,10 +84,19 @@ def get_cast(type1: str, type2: str, type_out: str) -> str: @norm_indent -def get_func1(func_name: str, type1: str, type2: str) -> str: +def get_func1(func_name: str, type1: str) -> str: return f""" - STENCIL void {func_name}_{type1}_{type2}({type1} arg1, {type2} arg2) {{ - result_float_{type2}(aux_{func_name}((float)arg1), arg2); + STENCIL void {func_name}_{type1}({type1} arg1) {{ + result_float(aux_{func_name}((float)arg1)); + }} + """ + + +@norm_indent +def get_custom_stencil(stencil_signature: str, stencil_body: str) -> str: + return f""" + STENCIL void {stencil_signature} {{ + {stencil_body} }} """ @@ -102,10 +111,10 @@ def get_func2(func_name: str, type1: str, type2: str) -> str: @norm_indent -def get_math_func1(func_name: str, type1: str) -> str: +def get_math_func1(func_name: str, type1: str, stencil_name: str) -> str: return f""" - STENCIL void {func_name}_{type1}({type1} arg1) {{ - result_float({func_name}f((float)arg1)); + STENCIL void {stencil_name}_{type1}({type1} arg1) {{ + result_float({func_name}((float)arg1)); }} """ @@ -149,7 +158,7 @@ def get_floordiv(op: str, type1: str, type2: str) -> str: else: return f""" STENCIL void {op}_{type1}_{type2}({type1} arg1, {type2} arg2) {{ - result_float_{type2}((float)floor_div((float)arg1, (float)arg2), arg2); + result_float_{type2}(floorf((float)arg1 / (float)arg2), arg2); }} """ @@ -238,11 +247,14 @@ if __name__ == "__main__": fnames = ['get_42'] for fn, t1 in permutate(fnames, types): - code += get_func1(fn, t1, t1) + code += get_func1(fn, t1) fnames = ['sqrt', 'exp', 'log', 'sin', 'cos', 'tan', 'asin', 'acos', 'atan'] for fn, t1 in permutate(fnames, types): - code += get_math_func1(fn, t1) + code += get_math_func1(fn + 'f', t1, fn) + + code += get_math_func1('fabsf', 'float', 'abs') + code += get_custom_stencil('abs_int(int arg1)', 'result_int(__builtin_abs(arg1));') fnames = ['atan2', 'pow'] for fn, t1, t2 in permutate(fnames, types, types): diff --git a/stencils/test.c b/stencils/test.c index 43284cf..976d3bd 100644 --- a/stencils/test.c +++ b/stencils/test.c @@ -3,7 +3,6 @@ int main() { // Test aux functions float a = 16.0f; - float div_result = (float)floor_div(-7.0f, 3.0f); float g42 = aux_get_42(0.0f); return 0; } diff --git a/tests/test_math.py b/tests/test_math.py index 534d309..17efda1 100644 --- a/tests/test_math.py +++ b/tests/test_math.py @@ -9,7 +9,6 @@ def test_fine(): a_f = 2.5 c_i = value(a_i) c_f = value(a_f) - # c_b = variable(True) ret_test = (c_f ** 2, c_i ** -1, @@ -19,7 +18,9 @@ def test_fine(): cp.sqrt(c_f), cp.sin(c_f), cp.cos(c_f), - cp.tan(c_f)) # , c_i & 3) + cp.tan(c_f), + cp.abs(-c_i), + cp.abs(-c_f)) re2_test = (a_f ** 2, a_i ** -1, @@ -29,7 +30,9 @@ def test_fine(): cp.sqrt(a_f), cp.sin(a_f), cp.cos(a_f), - cp.tan(a_f)) # , a_i & 3) + cp.tan(a_f), + cp.abs(-a_i), + cp.abs(-a_f)) ret_refe = (a_f ** 2, a_i ** -1, @@ -39,7 +42,9 @@ def test_fine(): ma.sqrt(a_f), ma.sin(a_f), ma.cos(a_f), - ma.tan(a_f)) # , a_i & 3) + ma.tan(a_f), + cp.abs(-a_i), + cp.abs(-a_f)) tg = Target() print('* compile and copy ...') diff --git a/tools/cross_compiler_unix/packobjs.sh b/tools/cross_compiler_unix/packobjs.sh index af79b81..43bd0e9 100644 --- a/tools/cross_compiler_unix/packobjs.sh +++ b/tools/cross_compiler_unix/packobjs.sh @@ -24,14 +24,14 @@ cd ../build/stencil_objs ar x ../../musl/lib/libc.a sinf.o cosf.o tanf.o asinf.o acosf.o atanf.o atan2f.o ar x ../../musl/lib/libc.a sqrtf.o logf.o expf.o sqrt.o ar x ../../musl/lib/libc.a logf_data.o __tandf.o __cosdf.o __sindf.o -ar x ../../musl/lib/libc.a fabsf.o scalbn.o floor.o exp2f_data.o powf.o powf_data.o +ar x ../../musl/lib/libc.a fabsf.o scalbn.o floor.o floorf.o exp2f_data.o powf.o powf_data.o ar x ../../musl/lib/libc.a __rem_pio2f.o __math_invalidf.o __stack_chk_fail.o __math_divzerof.o __math_oflowf.o __rem_pio2_large.o __math_uflowf.o __math_xflowf.o # Check out .lo (PIC) ar x ../../musl/lib/libc.a sinf.lo cosf.lo tanf.lo asinf.lo acosf.lo atanf.lo atan2f.lo ar x ../../musl/lib/libc.a sqrtf.lo logf.lo expf.lo sqrt.lo ar x ../../musl/lib/libc.a logf_data.lo __tandf.lo __cosdf.lo __sindf.lo -ar x ../../musl/lib/libc.a fabsf.lo scalbn.lo floor.lo exp2f_data.lo powf.lo powf_data.lo +ar x ../../musl/lib/libc.a fabsf.lo scalbn.lo floor.lo floorf.o exp2f_data.lo powf.lo powf_data.lo ar x ../../musl/lib/libc.a __rem_pio2f.lo __math_invalidf.lo __stack_chk_fail.lo __math_divzerof.lo __math_oflowf.lo __rem_pio2_large.lo __math_uflowf.lo __math_xflowf.lo cd ../../musl