feat: add gzip and new headers

author: Kacper <kacper@mail.openlinux.dev> 2025-12-09 19:20:15 +0100
committer: Kacper <kacper@mail.openlinux.dev> 2025-12-09 19:20:15 +0100
commit: 885f5974cdf65b59415837ae97f5a14ef1350670 (patch)
tree: 66ac13de29c7f4932c5fcae11773df574e4e256a /lib/libm/sqrtl.c
parent: 8f9e448b2ef6db7cd905540c21f3c5b190e7a1e7 (diff)
1 files changed, 261 insertions, 0 deletions
diff --git a/lib/libm/sqrtl.c b/lib/libm/sqrtl.c
new file mode 100644
index 00000000..57249e9f
--- /dev/null
+++ b/lib/libm/sqrtl.c
@@ -0,0 +1,261 @@
+#include <stdint.h>
+#include <math.h>
+#include <float.h>
+#include "libm.h"
+
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+long double sqrtl(long double x)
+{
+	return sqrt(x);
+}
+#elif (LDBL_MANT_DIG == 113 || LDBL_MANT_DIG == 64) && LDBL_MAX_EXP == 16384
+#include "sqrt_data.h"
+
+#define FENV_SUPPORT 1
+
+typedef struct {
+	uint64_t hi;
+	uint64_t lo;
+} u128;
+
+/* top: 16 bit sign+exponent, x: significand.  */
+static inline long double mkldbl(uint64_t top, u128 x)
+{
+	union ldshape u;
+#if LDBL_MANT_DIG == 113
+	u.i2.hi = x.hi;
+	u.i2.lo = x.lo;
+	u.i2.hi &= 0x0000ffffffffffff;
+	u.i2.hi |= top << 48;
+#elif LDBL_MANT_DIG == 64
+	u.i.se = top;
+	u.i.m = x.lo;
+	/* force the top bit on non-zero (and non-subnormal) results.  */
+	if (top & 0x7fff)
+		u.i.m |= 0x8000000000000000;
+#endif
+	return u.f;
+}
+
+/* return: top 16 bit is sign+exp and following bits are the significand.  */
+static inline u128 asu128(long double x)
+{
+	union ldshape u = { .f = x };
+	u128 r;
+#if LDBL_MANT_DIG == 113
+	r.hi = u.i2.hi;
+	r.lo = u.i2.lo;
+#elif LDBL_MANT_DIG == 64
+	r.lo = u.i.m << 49;
+	/* ignore the top bit: pseudo numbers are not handled. */
+	r.hi = u.i.m >> 15;
+	r.hi &= 0x0000ffffffffffff;
+	r.hi |= (uint64_t)u.i.se << 48;
+#endif
+	return r;
+}
+
+/* returns a*b*2^-32 - e, with error 0 <= e < 1.  */
+static inline uint32_t mul32(uint32_t a, uint32_t b)
+{
+	return (uint64_t)a * b >> 32;
+}
+
+/* returns a*b*2^-64 - e, with error 0 <= e < 3.  */
+static inline uint64_t mul64(uint64_t a, uint64_t b)
+{
+	uint64_t ahi = a >> 32;
+	uint64_t alo = a & 0xffffffff;
+	uint64_t bhi = b >> 32;
+	uint64_t blo = b & 0xffffffff;
+	return ahi * bhi + (ahi * blo >> 32) + (alo * bhi >> 32);
+}
+
+static inline u128 add64(u128 a, uint64_t b)
+{
+	u128 r;
+	r.lo = a.lo + b;
+	r.hi = a.hi;
+	if (r.lo < a.lo)
+		r.hi++;
+	return r;
+}
+
+static inline u128 add128(u128 a, u128 b)
+{
+	u128 r;
+	r.lo = a.lo + b.lo;
+	r.hi = a.hi + b.hi;
+	if (r.lo < a.lo)
+		r.hi++;
+	return r;
+}
+
+static inline u128 sub64(u128 a, uint64_t b)
+{
+	u128 r;
+	r.lo = a.lo - b;
+	r.hi = a.hi;
+	if (a.lo < b)
+		r.hi--;
+	return r;
+}
+
+static inline u128 sub128(u128 a, u128 b)
+{
+	u128 r;
+	r.lo = a.lo - b.lo;
+	r.hi = a.hi - b.hi;
+	if (a.lo < b.lo)
+		r.hi--;
+	return r;
+}
+
+/* a<<n, 0 <= n <= 127 */
+static inline u128 lsh(u128 a, int n)
+{
+	if (n == 0)
+		return a;
+	if (n >= 64) {
+		a.hi = a.lo << (n - 64);
+		a.lo = 0;
+	} else {
+		a.hi = (a.hi << n) | (a.lo >> (64 - n));
+		a.lo = a.lo << n;
+	}
+	return a;
+}
+
+/* a>>n, 0 <= n <= 127 */
+static inline u128 rsh(u128 a, int n)
+{
+	if (n == 0)
+		return a;
+	if (n >= 64) {
+		a.lo = a.hi >> (n - 64);
+		a.hi = 0;
+	} else {
+		a.lo = (a.lo >> n) | (a.hi << (64 - n));
+		a.hi = a.hi >> n;
+	}
+	return a;
+}
+
+/* returns a*b exactly.  */
+static inline u128 mul64_128(uint64_t a, uint64_t b)
+{
+	u128 r;
+	uint64_t ahi = a >> 32;
+	uint64_t alo = a & 0xffffffff;
+	uint64_t bhi = b >> 32;
+	uint64_t blo = b & 0xffffffff;
+	uint64_t lo1 = ((ahi * blo) & 0xffffffff) + ((alo * bhi) & 0xffffffff) +
+		       (alo * blo >> 32);
+	uint64_t lo2 = (alo * blo) & 0xffffffff;
+	r.hi = ahi * bhi + (ahi * blo >> 32) + (alo * bhi >> 32) + (lo1 >> 32);
+	r.lo = (lo1 << 32) + lo2;
+	return r;
+}
+
+/* returns a*b*2^-128 - e, with error 0 <= e < 7.  */
+static inline u128 mul128(u128 a, u128 b)
+{
+	u128 hi = mul64_128(a.hi, b.hi);
+	uint64_t m1 = mul64(a.hi, b.lo);
+	uint64_t m2 = mul64(a.lo, b.hi);
+	return add64(add64(hi, m1), m2);
+}
+
+/* returns a*b % 2^128.  */
+static inline u128 mul128_tail(u128 a, u128 b)
+{
+	u128 lo = mul64_128(a.lo, b.lo);
+	lo.hi += a.hi * b.lo + a.lo * b.hi;
+	return lo;
+}
+
+/* see sqrt.c for detailed comments.  */
+
+long double sqrtl(long double x)
+{
+	u128 ix, ml;
+	uint64_t top;
+
+	ix = asu128(x);
+	top = ix.hi >> 48;
+	if (predict_false(top - 0x0001 >= 0x7fff - 0x0001)) {
+		/* x < 0x1p-16382 or inf or nan.  */
+		if (2 * ix.hi == 0 && ix.lo == 0)
+			return x;
+		if (ix.hi == 0x7fff000000000000 && ix.lo == 0)
+			return x;
+		if (top >= 0x7fff)
+			return __math_invalidl(x);
+		/* x is subnormal, normalize it.  */
+		ix = asu128(x * 0x1p112);
+		top = ix.hi >> 48;
+		top -= 112;
+	}
+
+	/* x = 4^e m; with int e and m in [1, 4) */
+	int even = top & 1;
+	ml = lsh(ix, 15);
+	ml.hi |= 0x8000000000000000;
+	if (even)
+		ml = rsh(ml, 1);
+	top = (top + 0x3fff) >> 1;
+
+	/* r ~ 1/sqrt(m) */
+	const uint64_t three = 0xc0000000;
+	uint64_t r, s, d, u, i;
+	i = (ix.hi >> 42) % 128;
+	r = (uint32_t)__rsqrt_tab[i] << 16;
+	/* |r sqrt(m) - 1| < 0x1p-8 */
+	s = mul32(ml.hi >> 32, r);
+	d = mul32(s, r);
+	u = three - d;
+	r = mul32(u, r) << 1;
+	/* |r sqrt(m) - 1| < 0x1.7bp-16, switch to 64bit */
+	r = r << 32;
+	s = mul64(ml.hi, r);
+	d = mul64(s, r);
+	u = (three << 32) - d;
+	r = mul64(u, r) << 1;
+	/* |r sqrt(m) - 1| < 0x1.a5p-31 */
+	s = mul64(u, s) << 1;
+	d = mul64(s, r);
+	u = (three << 32) - d;
+	r = mul64(u, r) << 1;
+	/* |r sqrt(m) - 1| < 0x1.c001p-59, switch to 128bit */
+
+	const u128 threel = { .hi = three << 32, .lo = 0 };
+	u128 rl, sl, dl, ul;
+	rl.hi = r;
+	rl.lo = 0;
+	sl = mul128(ml, rl);
+	dl = mul128(sl, rl);
+	ul = sub128(threel, dl);
+	sl = mul128(ul, sl); /* repr: 3.125 */
+	/* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
+	sl = rsh(sub64(sl, 4), 125 - (LDBL_MANT_DIG - 1));
+	/* s < sqrt(m) < s + 1 ULP + tiny */
+
+	long double y;
+	u128 d2, d1, d0;
+	d0 = sub128(lsh(ml, 2 * (LDBL_MANT_DIG - 1) - 126),
+		    mul128_tail(sl, sl));
+	d1 = sub128(sl, d0);
+	d2 = add128(add64(sl, 1), d1);
+	sl = add64(sl, d1.hi >> 63);
+	y = mkldbl(top, sl);
+	if (FENV_SUPPORT) {
+		/* handle rounding modes and inexact exception.  */
+		top = predict_false((d2.hi | d2.lo) == 0) ? 0 : 1;
+		top |= ((d1.hi ^ d2.hi) & 0x8000000000000000) >> 48;
+		y += mkldbl(top, (u128){ 0 });
+	}
+	return y;
+}
+#else
+#error unsupported long double format
+#endif
author	Kacper <kacper@mail.openlinux.dev>	2025-12-09 19:20:15 +0100
committer	Kacper <kacper@mail.openlinux.dev>	2025-12-09 19:20:15 +0100
commit	885f5974cdf65b59415837ae97f5a14ef1350670 (patch)
tree	66ac13de29c7f4932c5fcae11773df574e4e256a /lib/libm/sqrtl.c
parent	8f9e448b2ef6db7cd905540c21f3c5b190e7a1e7 (diff)