inlining of auxiliary functions into stencils

This commit is contained in:
Nicolas 2025-11-11 15:56:15 +01:00
parent 6416df1335
commit 04a86da641
2 changed files with 11 additions and 11 deletions

View File

@ -6,14 +6,14 @@
volatile extern int dummy_int; volatile extern int dummy_int;
volatile extern float dummy_float; volatile extern float dummy_float;
NOINLINE int floor_div(float arg1, float arg2) { int floor_div(float arg1, float arg2) {
float x = arg1 / arg2; float x = arg1 / arg2;
int i = (int)x; int i = (int)x;
if (x < 0 && x != (float)i) i -= 1; if (x < 0 && x != (float)i) i -= 1;
return i; return i;
} }
NOINLINE float aux_sqrt(float x) { float aux_sqrt(float x) {
if (x <= 0.0f) return 0.0f; if (x <= 0.0f) return 0.0f;
// --- Improved initial guess using bit-level trick --- // --- Improved initial guess using bit-level trick ---
@ -29,11 +29,11 @@ NOINLINE float aux_sqrt(float x) {
return y; return y;
} }
NOINLINE float aux_get_42(float n) { float aux_get_42(float n) {
return n + 42.0; return n + 42.0;
} }
NOINLINE float aux_log(float x) float aux_log(float x)
{ {
union { float f; uint32_t i; } vx = { x }; union { float f; uint32_t i; } vx = { x };
float e = (float)((vx.i >> 23) & 0xFF) - 127.0f; float e = (float)((vx.i >> 23) & 0xFF) - 127.0f;
@ -48,7 +48,7 @@ NOINLINE float aux_log(float x)
return log2x * 0.69314718f; // convert log2 → ln return log2x * 0.69314718f; // convert log2 → ln
} }
NOINLINE float aux_exp(float x) float aux_exp(float x)
{ {
// Scale by 1/ln(2) // Scale by 1/ln(2)
x = x * 1.44269504089f; x = x * 1.44269504089f;

View File

@ -6,7 +6,7 @@ const double TWO_OVER_PI = 0.63661977236758134308; // 2/pi
const double PIO2_HI = 1.57079625129699707031; // pi/2 high part const double PIO2_HI = 1.57079625129699707031; // pi/2 high part
const double PIO2_LO = 7.54978941586159635335e-08; // pi/2 low part const double PIO2_LO = 7.54978941586159635335e-08; // pi/2 low part
NOINLINE float aux_sin(float x) { float aux_sin(float x) {
// convert to double for reduction (better precision) // convert to double for reduction (better precision)
double xd = (double)x; double xd = (double)x;
@ -50,7 +50,7 @@ NOINLINE float aux_sin(float x) {
} }
} }
NOINLINE float aux_cos(float x) { float aux_cos(float x) {
// convert to double for reduction (better precision) // convert to double for reduction (better precision)
double xd = (double)x; double xd = (double)x;
@ -94,7 +94,7 @@ NOINLINE float aux_cos(float x) {
} }
} }
NOINLINE float aux_tan(float x) { float aux_tan(float x) {
// Promote to double for argument reduction (improves precision) // Promote to double for argument reduction (improves precision)
double xd = (double)x; double xd = (double)x;
double qd = xd * TWO_OVER_PI; // how many half-pi multiples double qd = xd * TWO_OVER_PI; // how many half-pi multiples
@ -143,7 +143,7 @@ NOINLINE float aux_tan(float x) {
return sign * t; return sign * t;
} }
NOINLINE float aux_atan(float x) { float aux_atan(float x) {
const float absx = x < 0 ? -x : x; const float absx = x < 0 ? -x : x;
// Coefficients for a rational minimax fit on [0,1] // Coefficients for a rational minimax fit on [0,1]
@ -166,7 +166,7 @@ NOINLINE float aux_atan(float x) {
return x < 0 ? -y : y; return x < 0 ? -y : y;
} }
NOINLINE float aux_atan2(float y, float x) { float aux_atan2(float y, float x) {
if (x == 0.0f) { if (x == 0.0f) {
if (y > 0.0f) return PI_2; if (y > 0.0f) return PI_2;
if (y < 0.0f) return -PI_2; if (y < 0.0f) return -PI_2;
@ -187,7 +187,7 @@ NOINLINE float aux_atan2(float y, float x) {
return angle; return angle;
} }
NOINLINE float aux_asin(float x) { float aux_asin(float x) {
if (x > 1.0f) x = 1.0f; if (x > 1.0f) x = 1.0f;
if (x < -1.0f) x = -1.0f; if (x < -1.0f) x = -1.0f;