mirror of https://github.com/Nonannet/copapy.git
stencil generation and helper code updated to use musl math
This commit is contained in:
parent
ee4d22265c
commit
bfc3c1e399
|
|
@ -1,8 +1,6 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include "stencil_helper.h"
|
#include "stencil_helper.h"
|
||||||
|
|
||||||
//double (*math_pow)(double, double);
|
|
||||||
|
|
||||||
volatile extern int dummy_int;
|
volatile extern int dummy_int;
|
||||||
volatile extern float dummy_float;
|
volatile extern float dummy_float;
|
||||||
|
|
||||||
|
|
@ -13,20 +11,6 @@ int floor_div(float arg1, float arg2) {
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
float aux_sqrt(float x) {
|
|
||||||
return sqrtf(x);
|
|
||||||
}
|
|
||||||
|
|
||||||
float aux_get_42(float n) {
|
float aux_get_42(float n) {
|
||||||
return n + 42.0;
|
return n + 42.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
float aux_log(float x)
|
|
||||||
{
|
|
||||||
return logf(x);
|
|
||||||
}
|
|
||||||
|
|
||||||
float aux_exp(float x)
|
|
||||||
{
|
|
||||||
return expf(x);
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -9,11 +9,10 @@ op_signs = {'add': '+', 'sub': '-', 'mul': '*', 'div': '/', 'pow': '**',
|
||||||
'bwand': '&', 'bwor': '|', 'bwxor': '^'}
|
'bwand': '&', 'bwor': '|', 'bwxor': '^'}
|
||||||
|
|
||||||
entry_func_prefix = ''
|
entry_func_prefix = ''
|
||||||
stencil_func_prefix = '__attribute__((aligned(1))) ' # Remove function alignment for stencils
|
|
||||||
|
|
||||||
stack_size = 64
|
stack_size = 64
|
||||||
|
|
||||||
includes = ['stencil_helper.h', 'aux_functions.c', 'trigonometry.c']
|
includes = ['stencil_helper.h', 'aux_functions.c']
|
||||||
|
|
||||||
|
|
||||||
def read_files(files: list[str]) -> str:
|
def read_files(files: list[str]) -> str:
|
||||||
|
|
@ -69,8 +68,7 @@ def get_entry_function_shell() -> str:
|
||||||
@norm_indent
|
@norm_indent
|
||||||
def get_op_code(op: str, type1: str, type2: str, type_out: str) -> str:
|
def get_op_code(op: str, type1: str, type2: str, type_out: str) -> str:
|
||||||
return f"""
|
return f"""
|
||||||
{stencil_func_prefix}void {op}_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
STENCIL void {op}_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
||||||
STENCIL_START({op}_{type1}_{type2});
|
|
||||||
result_{type_out}_{type2}(arg1 {op_signs[op]} arg2, arg2);
|
result_{type_out}_{type2}(arg1 {op_signs[op]} arg2, arg2);
|
||||||
}}
|
}}
|
||||||
"""
|
"""
|
||||||
|
|
@ -79,8 +77,7 @@ def get_op_code(op: str, type1: str, type2: str, type_out: str) -> str:
|
||||||
@norm_indent
|
@norm_indent
|
||||||
def get_cast(type1: str, type2: str, type_out: str) -> str:
|
def get_cast(type1: str, type2: str, type_out: str) -> str:
|
||||||
return f"""
|
return f"""
|
||||||
{stencil_func_prefix}void cast_{type_out}_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
STENCIL void cast_{type_out}_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
||||||
STENCIL_START(cast_{type_out}_{type1}_{type2});
|
|
||||||
result_{type_out}_{type2}(({type1})arg1, arg2);
|
result_{type_out}_{type2}(({type1})arg1, arg2);
|
||||||
}}
|
}}
|
||||||
"""
|
"""
|
||||||
|
|
@ -89,8 +86,7 @@ def get_cast(type1: str, type2: str, type_out: str) -> str:
|
||||||
@norm_indent
|
@norm_indent
|
||||||
def get_func1(func_name: str, type1: str, type2: str) -> str:
|
def get_func1(func_name: str, type1: str, type2: str) -> str:
|
||||||
return f"""
|
return f"""
|
||||||
{stencil_func_prefix}void {func_name}_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
STENCIL void {func_name}_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
||||||
STENCIL_START({func_name}_{type1}_{type2});
|
|
||||||
result_float_{type2}(aux_{func_name}((float)arg1), arg2);
|
result_float_{type2}(aux_{func_name}((float)arg1), arg2);
|
||||||
}}
|
}}
|
||||||
"""
|
"""
|
||||||
|
|
@ -99,18 +95,34 @@ def get_func1(func_name: str, type1: str, type2: str) -> str:
|
||||||
@norm_indent
|
@norm_indent
|
||||||
def get_func2(func_name: str, type1: str, type2: str) -> str:
|
def get_func2(func_name: str, type1: str, type2: str) -> str:
|
||||||
return f"""
|
return f"""
|
||||||
{stencil_func_prefix}void {func_name}_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
STENCIL void {func_name}_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
||||||
STENCIL_START({func_name}_{type1}_{type2});
|
|
||||||
result_float_{type2}(aux_{func_name}((float)arg1, (float)arg2), arg2);
|
result_float_{type2}(aux_{func_name}((float)arg1, (float)arg2), arg2);
|
||||||
}}
|
}}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@norm_indent
|
||||||
|
def get_math_func1(func_name: str, type1: str, type2: str) -> str:
|
||||||
|
return f"""
|
||||||
|
STENCIL void {func_name}_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
||||||
|
result_float_{type2}({func_name}f((float)arg1), arg2);
|
||||||
|
}}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@norm_indent
|
||||||
|
def get_math_func2(func_name: str, type1: str, type2: str) -> str:
|
||||||
|
return f"""
|
||||||
|
STENCIL void {func_name}_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
||||||
|
result_float_{type2}({func_name}f((float)arg1, (float)arg2), arg2);
|
||||||
|
}}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
@norm_indent
|
@norm_indent
|
||||||
def get_conv_code(type1: str, type2: str, type_out: str) -> str:
|
def get_conv_code(type1: str, type2: str, type_out: str) -> str:
|
||||||
return f"""
|
return f"""
|
||||||
{stencil_func_prefix}void conv_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
STENCIL void conv_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
||||||
STENCIL_START(conv_{type1}_{type2});
|
|
||||||
result_{type_out}_{type2}(({type_out})arg1, arg2);
|
result_{type_out}_{type2}(({type_out})arg1, arg2);
|
||||||
}}
|
}}
|
||||||
"""
|
"""
|
||||||
|
|
@ -119,8 +131,7 @@ def get_conv_code(type1: str, type2: str, type_out: str) -> str:
|
||||||
@norm_indent
|
@norm_indent
|
||||||
def get_op_code_float(op: str, type1: str, type2: str) -> str:
|
def get_op_code_float(op: str, type1: str, type2: str) -> str:
|
||||||
return f"""
|
return f"""
|
||||||
{stencil_func_prefix}void {op}_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
STENCIL void {op}_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
||||||
STENCIL_START({op}_{type1}_{type2});
|
|
||||||
result_float_{type2}((float)arg1 {op_signs[op]} (float)arg2, arg2);
|
result_float_{type2}((float)arg1 {op_signs[op]} (float)arg2, arg2);
|
||||||
}}
|
}}
|
||||||
"""
|
"""
|
||||||
|
|
@ -130,16 +141,14 @@ def get_op_code_float(op: str, type1: str, type2: str) -> str:
|
||||||
def get_floordiv(op: str, type1: str, type2: str) -> str:
|
def get_floordiv(op: str, type1: str, type2: str) -> str:
|
||||||
if type1 == 'int' and type2 == 'int':
|
if type1 == 'int' and type2 == 'int':
|
||||||
return f"""
|
return f"""
|
||||||
{stencil_func_prefix}void {op}_{type1}_{type2}({type1} a, {type2} b) {{
|
STENCIL void {op}_{type1}_{type2}({type1} a, {type2} b) {{
|
||||||
STENCIL_START({op}_{type1}_{type2});
|
|
||||||
int result = a / b - ((a % b != 0) && ((a < 0) != (b < 0)));
|
int result = a / b - ((a % b != 0) && ((a < 0) != (b < 0)));
|
||||||
result_int_{type2}(result, b);
|
result_int_{type2}(result, b);
|
||||||
}}
|
}}
|
||||||
"""
|
"""
|
||||||
else:
|
else:
|
||||||
return f"""
|
return f"""
|
||||||
{stencil_func_prefix}void {op}_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
STENCIL void {op}_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
||||||
STENCIL_START({op}_{type1}_{type2});
|
|
||||||
result_float_{type2}((float)floor_div((float)arg1, (float)arg2), arg2);
|
result_float_{type2}((float)floor_div((float)arg1, (float)arg2), arg2);
|
||||||
}}
|
}}
|
||||||
"""
|
"""
|
||||||
|
|
@ -162,8 +171,7 @@ def get_result_stubs2(type1: str, type2: str) -> str:
|
||||||
@norm_indent
|
@norm_indent
|
||||||
def get_read_reg0_code(type1: str, type2: str, type_out: str) -> str:
|
def get_read_reg0_code(type1: str, type2: str, type_out: str) -> str:
|
||||||
return f"""
|
return f"""
|
||||||
{stencil_func_prefix}void read_{type_out}_reg0_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
STENCIL void read_{type_out}_reg0_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
||||||
STENCIL_START(read_{type_out}_reg0_{type1}_{type2});
|
|
||||||
result_{type_out}_{type2}(dummy_{type_out}, arg2);
|
result_{type_out}_{type2}(dummy_{type_out}, arg2);
|
||||||
}}
|
}}
|
||||||
"""
|
"""
|
||||||
|
|
@ -172,8 +180,7 @@ def get_read_reg0_code(type1: str, type2: str, type_out: str) -> str:
|
||||||
@norm_indent
|
@norm_indent
|
||||||
def get_read_reg1_code(type1: str, type2: str, type_out: str) -> str:
|
def get_read_reg1_code(type1: str, type2: str, type_out: str) -> str:
|
||||||
return f"""
|
return f"""
|
||||||
{stencil_func_prefix}void read_{type_out}_reg1_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
STENCIL void read_{type_out}_reg1_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
||||||
STENCIL_START(read_{type_out}_reg1_{type1}_{type2});
|
|
||||||
result_{type1}_{type_out}(arg1, dummy_{type_out});
|
result_{type1}_{type_out}(arg1, dummy_{type_out});
|
||||||
}}
|
}}
|
||||||
"""
|
"""
|
||||||
|
|
@ -182,8 +189,7 @@ def get_read_reg1_code(type1: str, type2: str, type_out: str) -> str:
|
||||||
@norm_indent
|
@norm_indent
|
||||||
def get_write_code(type1: str, type2: str) -> str:
|
def get_write_code(type1: str, type2: str) -> str:
|
||||||
return f"""
|
return f"""
|
||||||
{stencil_func_prefix}void write_{type1}_reg0_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
STENCIL void write_{type1}_reg0_{type1}_{type2}({type1} arg1, {type2} arg2) {{
|
||||||
STENCIL_START(write_{type1});
|
|
||||||
dummy_{type1} = arg1;
|
dummy_{type1} = arg1;
|
||||||
result_{type1}_{type2}(arg1, arg2);
|
result_{type1}_{type2}(arg1, arg2);
|
||||||
}}
|
}}
|
||||||
|
|
@ -215,7 +221,7 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
# Scalar arithmetic:
|
# Scalar arithmetic:
|
||||||
types = ['int', 'float']
|
types = ['int', 'float']
|
||||||
ops = ['add', 'sub', 'mul', 'div', 'floordiv', 'gt', 'ge', 'eq', 'ne', 'atan2']
|
ops = ['add', 'sub', 'mul', 'div', 'floordiv', 'gt', 'ge', 'eq', 'ne']
|
||||||
int_ops = ['bwand', 'bwor', 'bwxor', 'lshift', 'rshift']
|
int_ops = ['bwand', 'bwor', 'bwxor', 'lshift', 'rshift']
|
||||||
|
|
||||||
for t1 in types:
|
for t1 in types:
|
||||||
|
|
@ -230,18 +236,24 @@ if __name__ == "__main__":
|
||||||
t_out = 'int' if t1 == 'float' else 'float'
|
t_out = 'int' if t1 == 'float' else 'float'
|
||||||
code += get_cast(t1, t2, t_out)
|
code += get_cast(t1, t2, t_out)
|
||||||
|
|
||||||
fnames = ['sqrt', 'exp', 'log', 'sin', 'cos', 'tan', 'asin', 'atan', 'get_42']
|
fnames = ['get_42']
|
||||||
for fn, t1 in permutate(fnames, types):
|
for fn, t1 in permutate(fnames, types):
|
||||||
code += get_func1(fn, t1, t1)
|
code += get_func1(fn, t1, t1)
|
||||||
|
|
||||||
|
fnames = ['sqrt', 'exp', 'log', 'sin', 'cos', 'tan', 'asin', 'atan']
|
||||||
|
for fn, t1 in permutate(fnames, types):
|
||||||
|
code += get_math_func1(fn, t1, t1)
|
||||||
|
|
||||||
|
fnames = ['atan2', 'pow']
|
||||||
|
for fn, t1, t2 in permutate(fnames, types, types):
|
||||||
|
code += get_math_func2(fn, t1, t2)
|
||||||
|
|
||||||
for op, t1, t2 in permutate(ops, types, types):
|
for op, t1, t2 in permutate(ops, types, types):
|
||||||
t_out = t1 if t1 == t2 else 'float'
|
t_out = t1 if t1 == t2 else 'float'
|
||||||
if op == 'floordiv':
|
if op == 'floordiv':
|
||||||
code += get_floordiv('floordiv', t1, t2)
|
code += get_floordiv('floordiv', t1, t2)
|
||||||
elif op == 'div':
|
elif op == 'div':
|
||||||
code += get_op_code_float(op, t1, t2)
|
code += get_op_code_float(op, t1, t2)
|
||||||
elif op in {'atan2'}:
|
|
||||||
code += get_func2(op, t1, t2)
|
|
||||||
elif op in {'gt', 'eq', 'ge', 'ne'}:
|
elif op in {'gt', 'eq', 'ge', 'ne'}:
|
||||||
code += get_op_code(op, t1, t2, 'int')
|
code += get_op_code(op, t1, t2, 'int')
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,10 @@
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
|
// Remove function alignment for stencils
|
||||||
#if defined(__GNUC__)
|
#if defined(__GNUC__)
|
||||||
#define NOINLINE __attribute__((noinline))
|
#define NOINLINE __attribute__((noinline))
|
||||||
#define STENCIL_START_EX(funcname) \
|
#define STENCIL __attribute__((aligned(1)))
|
||||||
__asm__ __volatile__( \
|
|
||||||
".global stencil_start_" #funcname "\n" \
|
|
||||||
"stencil_start_" #funcname ":\n" \
|
|
||||||
)
|
|
||||||
#define STENCIL_START(funcname)
|
|
||||||
#else
|
#else
|
||||||
#define NOINLINE
|
#define NOINLINE
|
||||||
#define STENCIL_START(funcname)
|
#define STENCIL
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -1,76 +0,0 @@
|
||||||
#include "stencil_helper.h"
|
|
||||||
|
|
||||||
const float PI = 3.14159265358979323846f;
|
|
||||||
const float PI_2 = 1.57079632679489661923f; // pi/2
|
|
||||||
const double TWO_OVER_PI = 0.63661977236758134308; // 2/pi
|
|
||||||
const double PIO2_HI = 1.57079625129699707031; // pi/2 high part
|
|
||||||
const double PIO2_LO = 7.54978941586159635335e-08; // pi/2 low part
|
|
||||||
|
|
||||||
float aux_sin(float x) {
|
|
||||||
return sinf(x);
|
|
||||||
}
|
|
||||||
|
|
||||||
float aux_cos(float x) {
|
|
||||||
return cosf(x);
|
|
||||||
}
|
|
||||||
|
|
||||||
float aux_tan(float x) {
|
|
||||||
return tanf(x);
|
|
||||||
}
|
|
||||||
|
|
||||||
float aux_atan(float x) {
|
|
||||||
const float absx = x < 0 ? -x : x;
|
|
||||||
|
|
||||||
// Coefficients for a rational minimax fit on [0,1]
|
|
||||||
const float a0 = 0.9998660f;
|
|
||||||
const float a1 = -0.3302995f;
|
|
||||||
const float b1 = 0.1801410f;
|
|
||||||
const float b2 = -0.0126492f;
|
|
||||||
|
|
||||||
float y;
|
|
||||||
if (absx <= 1.0f) {
|
|
||||||
float x2 = x * x;
|
|
||||||
y = x * (a0 + a1 * x2) / (1.0f + b1 * x2 + b2 * x2 * x2);
|
|
||||||
} else {
|
|
||||||
float inv = 1.0f / absx;
|
|
||||||
float x2 = inv * inv;
|
|
||||||
float core = inv * (a0 + a1 * x2) / (1.0f + b1 * x2 + b2 * x2 * x2);
|
|
||||||
y = PI_2 - core;
|
|
||||||
}
|
|
||||||
|
|
||||||
return x < 0 ? -y : y;
|
|
||||||
}
|
|
||||||
|
|
||||||
float aux_atan2(float y, float x) {
|
|
||||||
if (x == 0.0f) {
|
|
||||||
if (y > 0.0f) return PI_2;
|
|
||||||
if (y < 0.0f) return -PI_2;
|
|
||||||
return 0.0f; // TODO: undefined
|
|
||||||
}
|
|
||||||
|
|
||||||
float abs_y = y < 0 ? -y : y;
|
|
||||||
float abs_x = x < 0 ? -x : x;
|
|
||||||
float angle;
|
|
||||||
|
|
||||||
if (abs_x > abs_y)
|
|
||||||
angle = aux_atan(y / x);
|
|
||||||
else
|
|
||||||
angle = PI_2 - aux_atan(x / y);
|
|
||||||
|
|
||||||
// Quadrant correction
|
|
||||||
if (x < 0) angle = (y >= 0) ? angle + PI : angle - PI;
|
|
||||||
return angle;
|
|
||||||
}
|
|
||||||
|
|
||||||
float aux_asin(float x) {
|
|
||||||
if (x > 1.0f) x = 1.0f;
|
|
||||||
if (x < -1.0f) x = -1.0f;
|
|
||||||
|
|
||||||
const float c3 = 0.16666667f; // ≈ 1/6
|
|
||||||
const float c5 = 0.07500000f; // ≈ 3/40
|
|
||||||
const float c7 = 0.04464286f; // ≈ 5/112
|
|
||||||
|
|
||||||
float x2 = x * x;
|
|
||||||
float p = x + x * x2 * (c3 + x2 * (c5 + c7 * x2));
|
|
||||||
return p;
|
|
||||||
}
|
|
||||||
Loading…
Reference in New Issue