inlining of auxiliary functions into stencils

2025-11-11 15:56:15 +01:00 · 2025-11-11 15:56:15 +01:00 · 04a86da641
parent 6416df1335
commit 04a86da641
2 changed files with 11 additions and 11 deletions
--- a/stencils/aux_functions.c
+++ b/stencils/aux_functions.c
@ -6,14 +6,14 @@
 volatile extern int dummy_int;
 volatile extern float dummy_float;

-NOINLINE int floor_div(float arg1, float arg2) {
+int floor_div(float arg1, float arg2) {
    float x = arg1 / arg2;
    int i = (int)x;
    if (x < 0 && x != (float)i) i -= 1;
    return i;
 }

-NOINLINE float aux_sqrt(float x) {
+float aux_sqrt(float x) {
    if (x <= 0.0f) return 0.0f;

    // --- Improved initial guess using bit-level trick ---
@ -29,11 +29,11 @@ NOINLINE float aux_sqrt(float x) {
    return y;
 }

-NOINLINE float aux_get_42(float n) {
+float aux_get_42(float n) {
    return n + 42.0;
 }

-NOINLINE float aux_log(float x)
+float aux_log(float x)
 {
    union { float f; uint32_t i; } vx = { x };
    float e = (float)((vx.i >> 23) & 0xFF) - 127.0f;
@ -48,7 +48,7 @@ NOINLINE float aux_log(float x)
    return log2x * 0.69314718f; // convert log2 → ln
 }

-NOINLINE float aux_exp(float x)
+float aux_exp(float x)
 {
    // Scale by 1/ln(2)
    x = x * 1.44269504089f;
--- a/stencils/trigonometry.c
+++ b/stencils/trigonometry.c
@ -6,7 +6,7 @@ const double TWO_OVER_PI = 0.63661977236758134308; // 2/pi
 const double PIO2_HI = 1.57079625129699707031;     // pi/2 high part
 const double PIO2_LO = 7.54978941586159635335e-08; // pi/2 low part

-NOINLINE float aux_sin(float x) {
+float aux_sin(float x) {
    // convert to double for reduction (better precision)
    double xd = (double)x;

@ -50,7 +50,7 @@ NOINLINE float aux_sin(float x) {
    }
 }

-NOINLINE float aux_cos(float x) {
+float aux_cos(float x) {
    // convert to double for reduction (better precision)
    double xd = (double)x;

@ -94,7 +94,7 @@ NOINLINE float aux_cos(float x) {
    }
 }

-NOINLINE float aux_tan(float x) {
+float aux_tan(float x) {
    // Promote to double for argument reduction (improves precision)
    double xd = (double)x;
    double qd = xd * TWO_OVER_PI;   // how many half-pi multiples
@ -143,7 +143,7 @@ NOINLINE float aux_tan(float x) {
    return sign * t;
 }

-NOINLINE float aux_atan(float x) {
+float aux_atan(float x) {
    const float absx = x < 0 ? -x : x;

    // Coefficients for a rational minimax fit on [0,1]
@ -166,7 +166,7 @@ NOINLINE float aux_atan(float x) {
    return x < 0 ? -y : y;
 }

-NOINLINE float aux_atan2(float y, float x) {
+float aux_atan2(float y, float x) {
    if (x == 0.0f) {
        if (y > 0.0f)  return  PI_2;
        if (y < 0.0f)  return -PI_2;
@ -187,7 +187,7 @@ NOINLINE float aux_atan2(float y, float x) {
    return angle;
 }

-NOINLINE float aux_asin(float x) {
+float aux_asin(float x) {
    if (x > 1.0f) x = 1.0f;
    if (x < -1.0f) x = -1.0f;