mul_add feature + macos CI

youknowone · youknowone · commit 251814ce4d04 · 2025-04-22T10:54:40.000+09:00
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -19,7 +19,7 @@ jobs:
         os: [
           ubuntu-latest,
           windows-latest,
-          # macos-latest  # disabled due to incompatibility. See issue #1
+          macos-latest,
         ]
         rust: [stable]
     steps:
@@ -38,6 +38,8 @@ jobs:
     - name: Build
       run: cargo build --verbose
     - name: Run tests
-      run: cargo test --verbose
-    - name: Run tests on Release
-      run: cargo test --release --verbose
+      if: matrix.os != 'macos-latest'
+      run: cargo test --verbose && cargo test --release --verbose
+    - name: Run tests with FMA (macOS)
+      if: matrix.os == 'macos-latest'
+      run: cargo test --verbose --features mul_add && cargo test --release --verbose --features mul_add
diff --git a/Cargo.toml b/Cargo.toml
@@ -3,6 +3,9 @@ name = "pymath"
 version = "0.1.0"
 edition = "2024"
 
+[features]
+mul_add = []  # Turning on this feature on aarch64-apple-darwin helps bit representation compatibility
+
 [dev-dependencies]
 proptest = "1.6.0"
 pyo3 = { version = "0.24", features = ["abi3"] }
diff --git a/src/gamma.rs b/src/gamma.rs
@@ -37,9 +37,17 @@ const LANCZOS_DEN_COEFFS: [f64; LANCZOS_N] = [
     1.0,
 ];
 
+fn mul_add(a: f64, b: f64, c: f64) -> f64 {
+    if cfg!(feature = "mul_add") {
+        a.mul_add(b, c)
+    } else {
+        a * b + c
+    }
+}
+
 fn lanczos_sum(x: f64) -> f64 {
-    let mut num = 0.0;
-    let mut den = 0.0;
+    let mut num = 0.0f64;
+    let mut den = 0.0f64;
     // evaluate the rational function lanczos_sum(x).  For large
     // x, the obvious algorithm risks overflow, so we instead
     // rescale the denominator and numerator of the rational
@@ -50,8 +58,8 @@ fn lanczos_sum(x: f64) -> f64 {
     // this resulted in lower accuracy.
     if x < 5.0 {
         for i in (0..LANCZOS_N).rev() {
-            num = num * x + LANCZOS_NUM_COEFFS[i];
-            den = den * x + LANCZOS_DEN_COEFFS[i];
+            num = mul_add(num, x, LANCZOS_NUM_COEFFS[i]);
+            den = mul_add(den, x, LANCZOS_DEN_COEFFS[i]);
         }
     } else {
         for i in 0..LANCZOS_N {
@@ -237,7 +245,8 @@ pub fn lgamma(x: f64) -> Result<f64, Error> {
     // absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g
     // subtraction below; it's probably not worth it.
     let mut r = lanczos_sum(absx).ln() - LANCZOS_G;
-    r += (absx - 0.5) * ((absx + LANCZOS_G - 0.5).ln() - 1.0);
+    let t = absx - 0.5;
+    r = mul_add(t, (absx + LANCZOS_G - 0.5).ln() - 1.0, r);
 
     if x < 0.0 {
         // Use reflection formula to get value for negative x