extern math

2021-11-15 16:45:59 -08:00
parent 31d17c3333
commit 5bb809d657
6 changed files with 260 additions and 7 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,10 +6,17 @@ edition = "2018"
 readme = "README.md"
 license = "MIT OR Apache-2.0"
 [lib]
 bench = false
 [[bench]]
 name = "operations"
 harness = false
 [[bench]]
 name = "math"
 harness = false
 [features]
 default = ["num-traits"]
@@ -17,6 +24,8 @@ nalgebra-v021 = ["num-traits", "nalgebra_v021", "simba_v01", "approx_v03"]
 nalgebra-v029 = ["num-traits", "nalgebra_v029", "simba_v06", "approx_v05"]
 [dependencies]
 paste = "1"
 num-traits = { version = "0.2", optional = true }
 approx_v03 = { package = "approx", version = "0.3", optional = true }
@@ -27,6 +36,9 @@ approx_v05 = { package = "approx", version = "0.5", optional = true }
 nalgebra_v029 = { package = "nalgebra", version = "0.29", optional = true }
 simba_v06 = { package = "simba", version = "0.6", optional = true }
 [build-dependencies]
 cc = "1"
 [dev-dependencies]
 criterion = { version = "0.3", features = ["html_reports"] }
 rand = "0.8"
@@ -34,3 +46,7 @@ rand = "0.8"
 [profile.test]
 # run tests at high optimization to exercise typical codegen
 opt-level = 3
 [profile.release]
 lto = "fat"
 codegen-units = 1
--- a/benches/math.rs
+++ b/benches/math.rs
@@ -0,0 +1,38 @@
 use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
 use fast_fp::{ff32, FF32};
 use rand::{distributions::Standard, thread_rng, Rng};
 fn min(c: &mut Criterion) {
    let mut group = c.benchmark_group("min");
    for count in [2, 8, 32, 1024] {
        group.throughput(Throughput::Elements(count as u64));
        let f32_vals = thread_rng()
            .sample_iter(Standard)
            .take(count)
            .collect::<Vec<f32>>();
        // use the same values for both benchmarks
        let ff32_vals = f32_vals
            .clone()
            .into_iter()
            .map(ff32)
            .collect::<Vec<FF32>>();
        group.bench_with_input(BenchmarkId::new("std::f32", count), &f32_vals, |b, vals| {
            b.iter(|| vals.iter().copied().fold(f32::MAX, |acc, val| acc.min(val)));
        });
        group.bench_with_input(BenchmarkId::new("FF32", count), &ff32_vals, |b, vals| {
            b.iter(|| {
                vals.iter()
                    .copied()
                    .fold(FF32::MAX, |acc, val| acc.min(val))
            });
        });
    }
    group.finish();
 }
 criterion_group!(benches, min);
 criterion_main!(benches);
--- a/build.rs
+++ b/build.rs
@@ -0,0 +1,20 @@
 fn main() {
    let mut builder = cc::Build::new();
    if !builder.get_compiler().is_like_clang() {
        // if the default/configured cc is not clang, try to call clang manually
        builder.compiler("clang");
    }
    builder
        .file("src/math/math.c")
        .flag("-O3")
        .flag("-flto=thin")
        .flag("-ffinite-math-only")
        .flag("-fassociative-math")
        .flag("-freciprocal-math")
        .flag("-fno-signed-zeros")
        .flag("-fno-trapping-math")
        .flag("-ffp-contract=fast")
        .compile("math")
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,6 +2,8 @@
 #![feature(core_intrinsics)] // intrinsics for the fast math
 #![feature(asm)] // asm used to emulate freeze
 #![feature(doc_cfg)]
 #![feature(link_llvm_intrinsics)]
 use core::{
    cmp, fmt,
    intrinsics::{fadd_fast, fdiv_fast, fmul_fast, frem_fast, fsub_fast},
@@ -40,6 +42,7 @@ macro_rules! forward_freeze_self {
    };
 }
 mod math;
 mod nalgebra;
 mod num_traits;
@@ -347,7 +350,6 @@ macro_rules! impls {
            // TODO migrate these to native implementations to freeze less and fast-math more
            forward_freeze_self! {
                $fast_ty, $base_ty
                pub fn abs(self) -> Self;
                pub fn acos(self) -> Self;
                pub fn acosh(self) -> Self;
                pub fn asin(self) -> Self;
@@ -358,7 +360,6 @@ macro_rules! impls {
                pub fn cbrt(self) -> Self;
                pub fn ceil(self) -> Self;
                pub fn clamp(self, min: Self, max: Self) -> Self;
                pub fn copysign(self, sign: Self) -> Self;
                pub fn cos(self) -> Self;
                pub fn cosh(self) -> Self;
                pub fn div_euclid(self, rhs: Self) -> Self;
@@ -367,20 +368,17 @@ macro_rules! impls {
                pub fn exp_m1(self) -> Self;
                pub fn floor(self) -> Self;
                pub fn fract(self) -> Self;
                pub fn hypot(self, other: Self) -> Self;
                pub fn ln(self) -> Self;
                pub fn ln_1p(self) -> Self;
                pub fn log(self, base: Self) -> Self;
                pub fn log10(self) -> Self;
                pub fn log2(self) -> Self;
-                pub fn max(self, other: Self) -> Self;
+                //pub fn max(self, other: Self) -> Self;
-                pub fn min(self, other: Self) -> Self;
+                //pub fn min(self, other: Self) -> Self;
                pub fn mul_add(self, a: Self, b: Self) -> Self;
                pub fn powf(self, n: Self) -> Self;
                pub fn recip(self) -> Self;
                pub fn rem_euclid(self, rhs: Self) -> Self;
                pub fn round(self) -> Self;
                pub fn signum(self) -> Self;
                pub fn sin(self) -> Self;
                pub fn sinh(self) -> Self;
                pub fn sqrt(self) -> Self;
--- a/src/math/math.c
+++ b/src/math/math.c
@@ -0,0 +1,76 @@
 #include <stdbool.h>
 #include <math.h>
 #define IMPL_OPERATIONS(C_TYPE, RUST_TYPE)       \
  /* TODO figure out why these don't inline */   \
  __attribute__((always_inline))                 \
  C_TYPE add_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
    return a + b;                                \
  }                                              \
                                                 \
  __attribute__((always_inline))                 \
  C_TYPE sub_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
    return a - b;                                \
  }                                              \
                                                 \
  __attribute__((always_inline))                 \
  C_TYPE mul_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
    return a * b;                                \
  }                                              \
                                                 \
  __attribute__((always_inline))                 \
  C_TYPE div_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
    return a / b;                                \
  }                                              \
                                                 \
  __attribute__((always_inline))                 \
  bool eq_ ## RUST_TYPE(C_TYPE a, C_TYPE b) {    \
    return a == b;                               \
  }                                              \
                                                 \
  __attribute__((always_inline))                 \
  bool lt_ ## RUST_TYPE(C_TYPE a, C_TYPE b) {    \
    return a < b;                                \
  }                                              \
                                                 \
  __attribute__((always_inline))                 \
  bool le_ ## RUST_TYPE(C_TYPE a, C_TYPE b) {    \
    return a <= b;                               \
  }                                              \
                                                 \
  __attribute__((always_inline))                 \
  bool gt_ ## RUST_TYPE(C_TYPE a, C_TYPE b) {    \
    return a > b;                                \
  }                                              \
                                                 \
  __attribute__((always_inline))                 \
  bool ge_ ## RUST_TYPE(C_TYPE a, C_TYPE b) {    \
    return a >= b;                               \
  }                                              \
 #define IMPL_UNARY_FUNCTION(C_TYPE, RUST_TYPE, FN_NAME, FN_IMPL) \
  __attribute__((always_inline))                                 \
  C_TYPE FN_NAME ## _ ## RUST_TYPE(C_TYPE a) {                   \
    return FN_IMPL(a);                                           \
  }                                                              \
 #define IMPL_BINARY_FUNCTION(C_TYPE, RUST_TYPE, FN_NAME, FN_IMPL) \
  __attribute__((always_inline))                                  \
  C_TYPE FN_NAME ## _ ## RUST_TYPE(C_TYPE a, C_TYPE b) {          \
    return FN_IMPL(a, b);                                         \
  }                                                               \
 IMPL_OPERATIONS(float, f32)
 IMPL_OPERATIONS(double, f64)
 IMPL_UNARY_FUNCTION(float, f32, sqrt, sqrtf)
 IMPL_UNARY_FUNCTION(double, f64, sqrt, sqrt)
 IMPL_BINARY_FUNCTION(float, f32, rem, fmodf)
 IMPL_BINARY_FUNCTION(double, f64, rem, fmod)
 IMPL_BINARY_FUNCTION(float, f32, max, fmaxf)
 IMPL_BINARY_FUNCTION(double, f64, max, fmax)
 IMPL_BINARY_FUNCTION(float, f32, min, fminf)
 IMPL_BINARY_FUNCTION(double, f64, min, fmin)
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -0,0 +1,105 @@
 use crate::{poison::MaybePoison, FF32, FF64};
 use paste::paste;
 impl FF32 {
    const SIGN_BIT: u32 = 0x8000_0000;
    const UNSIGNED_MASK: u32 = 0x7fff_ffff;
 }
 impl FF64 {
    const SIGN_BIT: u64 = 0x8000_0000_0000_0000;
    const UNSIGNED_MASK: u64 = 0x7fff_ffff_ffff_ffff;
 }
 macro_rules! impl_generic_math {
    ($fast_ty:ident, $base_ty:ident, $base_int:ident) => {
        impl $fast_ty {
            #[inline]
            fn to_bits(self) -> MaybePoison<$base_int> {
                // Safety:
                //
                // - `to_bits` should be valid for any input bits
                // - poison propagation is controlled with MaybePoison
                MaybePoison::new(unsafe { <$base_ty>::to_bits(*self.0.maybe_poison().as_ptr()) })
            }
            #[inline]
            fn from_bits(bits: MaybePoison<$base_int>) -> Self {
                // Safety:
                //
                // - `from_bits` should be valid for any input bits
                // - poison propagation is controlled with MaybePoison
                Self(MaybePoison::new(unsafe {
                    <$base_ty>::from_bits(*bits.maybe_poison().as_ptr())
                }))
            }
            #[inline]
            pub fn abs(self) -> Self {
                let bits = self.to_bits();
                <$fast_ty>::from_bits(MaybePoison::new(unsafe {
                    *bits.maybe_poison().as_ptr() & Self::UNSIGNED_MASK
                }))
            }
            #[inline]
            pub fn copysign(self, other: Self) -> Self {
                let this = self.to_bits();
                let that = other.to_bits();
                // Safety:
                //
                // - & of poison is safe because & does not produce UB for any input values
                // - poison propagation is handled by wrapping in maybe poison
                <$fast_ty>::from_bits(MaybePoison::new(unsafe {
                    (*this.maybe_poison().as_ptr() & Self::UNSIGNED_MASK)
                        | (*that.maybe_poison().as_ptr() & Self::SIGN_BIT)
                }))
            }
            #[inline]
            pub fn hypot(self, other: Self) -> Self {
                (self * self + other * other).sqrt()
            }
            #[inline]
            pub fn signum(self) -> Self {
                Self::ONE.copysign(self)
            }
            #[inline]
            pub fn recip(self) -> Self {
                Self::ONE / self
            }
        }
    };
 }
 macro_rules! impl_extern_math {
    ($fast_ty:ident, $base_ty:ident) => {
        paste! {
            extern "C" {
                fn [<min_ $base_ty>](a: $fast_ty, b: $fast_ty) -> $fast_ty;
                fn [<max_ $base_ty>](a: $fast_ty, b: $fast_ty) -> $fast_ty;
            }
            impl $fast_ty {
                #[inline]
                pub fn max(self, other: Self) -> Self {
                    unsafe { [<max_ $base_ty>](self, other) }
                }
                #[inline]
                pub fn min(self, other: Self) -> Self {
                    unsafe { [<min_ $base_ty>](self, other) }
                }
            }
        }
    };
 }
 impl_generic_math! { FF32, f32, u32 }
 impl_generic_math! { FF64, f64, u64 }
 impl_extern_math! { FF32, f32 }
 impl_extern_math! { FF64, f64 }