From 2509fe8077de8dd52f2539f964a94ac4cf77d0b2 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 12 Nov 2025 13:50:33 +0100 Subject: [PATCH] musl math added --- stencils/aux_functions.c | 41 +----------- stencils/stencil_helper.h | 2 + stencils/trigonometry.c | 131 +------------------------------------- 3 files changed, 8 insertions(+), 166 deletions(-) diff --git a/stencils/aux_functions.c b/stencils/aux_functions.c index 1cd3b4a..a68d43f 100644 --- a/stencils/aux_functions.c +++ b/stencils/aux_functions.c @@ -14,19 +14,7 @@ int floor_div(float arg1, float arg2) { } float aux_sqrt(float x) { - if (x <= 0.0f) return 0.0f; - - // --- Improved initial guess using bit-level trick --- - union { float f; uint32_t i; } conv = { x }; - conv.i = (conv.i >> 1) + 0x1fc00000; // better bias constant - float y = conv.f; - - // --- Fixed number of Newton-Raphson iterations --- - y = 0.5f * (y + x / y); - y = 0.5f * (y + x / y); - y = 0.5f * (y + x / y); // 3 fixed iterations - - return y; + return sqrtf(x); } float aux_get_42(float n) { @@ -35,33 +23,10 @@ float aux_get_42(float n) { float aux_log(float x) { - union { float f; uint32_t i; } vx = { x }; - float e = (float)((vx.i >> 23) & 0xFF) - 127.0f; - vx.i = (vx.i & 0x007FFFFF) | 0x3F800000; // normalized mantissa in [1,2) - float m = vx.f; - - // 3rd-degree minimax polynomial approximation of log2(m) - // over [1, 2): log2(m) ≈ p(m) = a*m^3 + b*m^2 + c*m + d - float p = -0.34484843f * m * m * m + 2.02466578f * m * m - 2.67487759f * m + 1.65149613f; - - float log2x = e + p; - return log2x * 0.69314718f; // convert log2 → ln + return logf(x); } float aux_exp(float x) { - // Scale by 1/ln(2) - x = x * 1.44269504089f; - float xi = (float)((int)x); - float f = x - xi; - - // Polynomial approximation of 2^f for f ∈ [0,1) - float p = 1.0f + f * (0.69314718f + f * (0.24022651f + f * (0.05550411f))); - - // Reconstruct exponent - int ei = (int)xi + 127; - if (ei <= 0) ei = 0; else if (ei >= 255) ei = 255; - union { uint32_t i; float f; } v = { (uint32_t)(ei << 23) }; - - return v.f * p; + return expf(x); } diff --git a/stencils/stencil_helper.h b/stencils/stencil_helper.h index 0a7f515..992330c 100644 --- a/stencils/stencil_helper.h +++ b/stencils/stencil_helper.h @@ -1,3 +1,5 @@ +#include + #if defined(__GNUC__) #define NOINLINE __attribute__((noinline)) #define STENCIL_START_EX(funcname) \ diff --git a/stencils/trigonometry.c b/stencils/trigonometry.c index 2c713ba..79110fe 100644 --- a/stencils/trigonometry.c +++ b/stencils/trigonometry.c @@ -7,140 +7,15 @@ const double PIO2_HI = 1.57079625129699707031; // pi/2 high part const double PIO2_LO = 7.54978941586159635335e-08; // pi/2 low part float aux_sin(float x) { - // convert to double for reduction (better precision) - double xd = (double)x; - - // quadrant index q = nearest integer to x * 2/pi - double qd = xd * TWO_OVER_PI; - // round to nearest integer (tie to even rounding not guaranteed) - int q = (int)(qd + (qd >= 0.0 ? 0.5 : -0.5)); - - // range-reduced remainder r = x − q*(pi/2) - // use hi/lo parts for pi/2 to reduce error - double r_d = xd - (double)q * PIO2_HI - (double)q * PIO2_LO; - float r = (float)r_d; - - // Select function and sign based on quadrant - int qm = q & 3; - int use_cos = (qm == 1 || qm == 3); - int sign = (qm == 0 || qm == 1) ? +1 : -1; - - float r2 = r * r; - - if (!use_cos) { - // sin(r) polynomial: r + s3*r^3 + s5*r^5 + s7*r^7 + s9*r^9 - const float s3 = -1.6666667163e-1f; - const float s5 = 8.3333337680e-3f; - const float s7 = -1.9841270114e-4f; - const float s9 = 2.7557314297e-6f; - - float p = ((s9 * r2 + s7) * r2 + s5) * r2 + s3; - float result = r + r * r2 * p; - return sign * result; - } else { - // cos(r) polynomial: 1 + c2*r2 + c4*r4 + c6*r6 + c8*r8 - const float c2 = -0.5f; - const float c4 = 4.1666667908e-2f; - const float c6 = -1.3888889225e-3f; - const float c8 = 2.4801587642e-5f; - - float p = ((c8 * r2 + c6) * r2 + c4) * r2 + c2; - float result = 1.0f + r2 * p; - return sign * result; - } + return sinf(x); } float aux_cos(float x) { - // convert to double for reduction (better precision) - double xd = (double)x; - - // quadrant index q = nearest integer to x * 2/pi - double qd = xd * TWO_OVER_PI; - // round to nearest integer (tie to even rounding not guaranteed) - int q = (int)(qd + (qd >= 0.0 ? 0.5 : -0.5)); - - // range-reduced remainder r = x − q*(pi/2) - // use hi/lo parts for pi/2 to reduce error - double r_d = xd - (double)q * PIO2_HI - (double)q * PIO2_LO; - float r = (float)r_d; - - // Select function and sign based on quadrant - int qm = q & 3; - int use_sin = (qm == 1 || qm == 3); - int sign = (qm == 0 || qm == 3) ? +1 : -1; - - float r2 = r * r; - - if (use_sin) { - // sin(r) polynomial: r + s3*r^3 + s5*r^5 + s7*r^7 + s9*r^9 - const float s3 = -1.6666667163e-1f; - const float s5 = 8.3333337680e-3f; - const float s7 = -1.9841270114e-4f; - const float s9 = 2.7557314297e-6f; - - float p = ((s9 * r2 + s7) * r2 + s5) * r2 + s3; - float result = r + r * r2 * p; - return sign * result; - } else { - // cos(r) polynomial: 1 + c2*r2 + c4*r4 + c6*r6 + c8*r8 - const float c2 = -0.5f; - const float c4 = 4.1666667908e-2f; - const float c6 = -1.3888889225e-3f; - const float c8 = 2.4801587642e-5f; - - float p = ((c8 * r2 + c6) * r2 + c4) * r2 + c2; - float result = 1.0f + r2 * p; - return sign * result; - } + return cosf(x); } float aux_tan(float x) { - // Promote to double for argument reduction (improves precision) - double xd = (double)x; - double qd = xd * TWO_OVER_PI; // how many half-pi multiples - int q = (int)(qd + (qd >= 0.0 ? 0.5 : -0.5)); // nearest integer - - // Range reduce: r = x - q*(pi/2) - double r_d = xd - (double)q * PIO2_HI - (double)q * PIO2_LO; - float r = (float)r_d; - - // For tan: period is pi, so q mod 2 determines sign - int qm = q & 3; - int use_cot = (qm == 1 || qm == 3); // tan(x) = ±cot(r) in odd quadrants - int sign = (qm == 0 || qm == 2) ? +1 : -1; - - // Polynomial approximations - // sin(r) ≈ r + s3*r^3 + s5*r^5 + s7*r^7 + s9*r^9 - const float s3 = -1.6666667163e-1f; - const float s5 = 8.3333337680e-3f; - const float s7 = -1.9841270114e-4f; - const float s9 = 2.7557314297e-6f; - - // cos(r) ≈ 1 + c2*r^2 + c4*r^4 + c6*r^6 + c8*r^8 - const float c2 = -0.5f; - const float c4 = 4.1666667908e-2f; - const float c6 = -1.3888889225e-3f; - const float c8 = 2.4801587642e-5f; - - float r2 = r * r; - float sin_r = r + r * r2 * (((s9 * r2 + s7) * r2 + s5) * r2 + s3); - float cos_r = 1.0f + r2 * (((c8 * r2 + c6) * r2 + c4) * r2 + c2); - - float t; - if (!use_cot) { - // tan(r) = sin(r)/cos(r) - t = sin_r / cos_r; - } else { - // cot(r) = cos(r)/sin(r) - t = cos_r / sin_r; - } - - // Avoid catastrophic explosion near vertical asymptotes - // Clip to a large finite value (~1e8) - if (t > 1e8f) t = 1e8f; - if (t < -1e8f) t = -1e8f; - - return sign * t; + return tanf(x); } float aux_atan(float x) {