extern math
This commit is contained in:
16
Cargo.toml
16
Cargo.toml
@@ -6,10 +6,17 @@ edition = "2018"
|
||||
readme = "README.md"
|
||||
license = "MIT OR Apache-2.0"
|
||||
|
||||
[lib]
|
||||
bench = false
|
||||
|
||||
[[bench]]
|
||||
name = "operations"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "math"
|
||||
harness = false
|
||||
|
||||
[features]
|
||||
default = ["num-traits"]
|
||||
|
||||
@@ -17,6 +24,8 @@ nalgebra-v021 = ["num-traits", "nalgebra_v021", "simba_v01", "approx_v03"]
|
||||
nalgebra-v029 = ["num-traits", "nalgebra_v029", "simba_v06", "approx_v05"]
|
||||
|
||||
[dependencies]
|
||||
paste = "1"
|
||||
|
||||
num-traits = { version = "0.2", optional = true }
|
||||
|
||||
approx_v03 = { package = "approx", version = "0.3", optional = true }
|
||||
@@ -27,6 +36,9 @@ approx_v05 = { package = "approx", version = "0.5", optional = true }
|
||||
nalgebra_v029 = { package = "nalgebra", version = "0.29", optional = true }
|
||||
simba_v06 = { package = "simba", version = "0.6", optional = true }
|
||||
|
||||
[build-dependencies]
|
||||
cc = "1"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.3", features = ["html_reports"] }
|
||||
rand = "0.8"
|
||||
@@ -34,3 +46,7 @@ rand = "0.8"
|
||||
[profile.test]
|
||||
# run tests at high optimization to exercise typical codegen
|
||||
opt-level = 3
|
||||
|
||||
[profile.release]
|
||||
lto = "fat"
|
||||
codegen-units = 1
|
||||
|
||||
38
benches/math.rs
Normal file
38
benches/math.rs
Normal file
@@ -0,0 +1,38 @@
|
||||
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
|
||||
use fast_fp::{ff32, FF32};
|
||||
use rand::{distributions::Standard, thread_rng, Rng};
|
||||
|
||||
fn min(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("min");
|
||||
for count in [2, 8, 32, 1024] {
|
||||
group.throughput(Throughput::Elements(count as u64));
|
||||
|
||||
let f32_vals = thread_rng()
|
||||
.sample_iter(Standard)
|
||||
.take(count)
|
||||
.collect::<Vec<f32>>();
|
||||
|
||||
// use the same values for both benchmarks
|
||||
let ff32_vals = f32_vals
|
||||
.clone()
|
||||
.into_iter()
|
||||
.map(ff32)
|
||||
.collect::<Vec<FF32>>();
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("std::f32", count), &f32_vals, |b, vals| {
|
||||
b.iter(|| vals.iter().copied().fold(f32::MAX, |acc, val| acc.min(val)));
|
||||
});
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("FF32", count), &ff32_vals, |b, vals| {
|
||||
b.iter(|| {
|
||||
vals.iter()
|
||||
.copied()
|
||||
.fold(FF32::MAX, |acc, val| acc.min(val))
|
||||
});
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(benches, min);
|
||||
criterion_main!(benches);
|
||||
20
build.rs
Normal file
20
build.rs
Normal file
@@ -0,0 +1,20 @@
|
||||
fn main() {
|
||||
let mut builder = cc::Build::new();
|
||||
|
||||
if !builder.get_compiler().is_like_clang() {
|
||||
// if the default/configured cc is not clang, try to call clang manually
|
||||
builder.compiler("clang");
|
||||
}
|
||||
|
||||
builder
|
||||
.file("src/math/math.c")
|
||||
.flag("-O3")
|
||||
.flag("-flto=thin")
|
||||
.flag("-ffinite-math-only")
|
||||
.flag("-fassociative-math")
|
||||
.flag("-freciprocal-math")
|
||||
.flag("-fno-signed-zeros")
|
||||
.flag("-fno-trapping-math")
|
||||
.flag("-ffp-contract=fast")
|
||||
.compile("math")
|
||||
}
|
||||
12
src/lib.rs
12
src/lib.rs
@@ -2,6 +2,8 @@
|
||||
#![feature(core_intrinsics)] // intrinsics for the fast math
|
||||
#![feature(asm)] // asm used to emulate freeze
|
||||
#![feature(doc_cfg)]
|
||||
#![feature(link_llvm_intrinsics)]
|
||||
|
||||
use core::{
|
||||
cmp, fmt,
|
||||
intrinsics::{fadd_fast, fdiv_fast, fmul_fast, frem_fast, fsub_fast},
|
||||
@@ -40,6 +42,7 @@ macro_rules! forward_freeze_self {
|
||||
};
|
||||
}
|
||||
|
||||
mod math;
|
||||
mod nalgebra;
|
||||
mod num_traits;
|
||||
|
||||
@@ -347,7 +350,6 @@ macro_rules! impls {
|
||||
// TODO migrate these to native implementations to freeze less and fast-math more
|
||||
forward_freeze_self! {
|
||||
$fast_ty, $base_ty
|
||||
pub fn abs(self) -> Self;
|
||||
pub fn acos(self) -> Self;
|
||||
pub fn acosh(self) -> Self;
|
||||
pub fn asin(self) -> Self;
|
||||
@@ -358,7 +360,6 @@ macro_rules! impls {
|
||||
pub fn cbrt(self) -> Self;
|
||||
pub fn ceil(self) -> Self;
|
||||
pub fn clamp(self, min: Self, max: Self) -> Self;
|
||||
pub fn copysign(self, sign: Self) -> Self;
|
||||
pub fn cos(self) -> Self;
|
||||
pub fn cosh(self) -> Self;
|
||||
pub fn div_euclid(self, rhs: Self) -> Self;
|
||||
@@ -367,20 +368,17 @@ macro_rules! impls {
|
||||
pub fn exp_m1(self) -> Self;
|
||||
pub fn floor(self) -> Self;
|
||||
pub fn fract(self) -> Self;
|
||||
pub fn hypot(self, other: Self) -> Self;
|
||||
pub fn ln(self) -> Self;
|
||||
pub fn ln_1p(self) -> Self;
|
||||
pub fn log(self, base: Self) -> Self;
|
||||
pub fn log10(self) -> Self;
|
||||
pub fn log2(self) -> Self;
|
||||
pub fn max(self, other: Self) -> Self;
|
||||
pub fn min(self, other: Self) -> Self;
|
||||
//pub fn max(self, other: Self) -> Self;
|
||||
//pub fn min(self, other: Self) -> Self;
|
||||
pub fn mul_add(self, a: Self, b: Self) -> Self;
|
||||
pub fn powf(self, n: Self) -> Self;
|
||||
pub fn recip(self) -> Self;
|
||||
pub fn rem_euclid(self, rhs: Self) -> Self;
|
||||
pub fn round(self) -> Self;
|
||||
pub fn signum(self) -> Self;
|
||||
pub fn sin(self) -> Self;
|
||||
pub fn sinh(self) -> Self;
|
||||
pub fn sqrt(self) -> Self;
|
||||
|
||||
76
src/math/math.c
Normal file
76
src/math/math.c
Normal file
@@ -0,0 +1,76 @@
|
||||
#include <stdbool.h>
|
||||
#include <math.h>
|
||||
|
||||
#define IMPL_OPERATIONS(C_TYPE, RUST_TYPE) \
|
||||
/* TODO figure out why these don't inline */ \
|
||||
__attribute__((always_inline)) \
|
||||
C_TYPE add_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
||||
return a + b; \
|
||||
} \
|
||||
\
|
||||
__attribute__((always_inline)) \
|
||||
C_TYPE sub_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
||||
return a - b; \
|
||||
} \
|
||||
\
|
||||
__attribute__((always_inline)) \
|
||||
C_TYPE mul_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
||||
return a * b; \
|
||||
} \
|
||||
\
|
||||
__attribute__((always_inline)) \
|
||||
C_TYPE div_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
||||
return a / b; \
|
||||
} \
|
||||
\
|
||||
__attribute__((always_inline)) \
|
||||
bool eq_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
||||
return a == b; \
|
||||
} \
|
||||
\
|
||||
__attribute__((always_inline)) \
|
||||
bool lt_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
||||
return a < b; \
|
||||
} \
|
||||
\
|
||||
__attribute__((always_inline)) \
|
||||
bool le_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
||||
return a <= b; \
|
||||
} \
|
||||
\
|
||||
__attribute__((always_inline)) \
|
||||
bool gt_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
||||
return a > b; \
|
||||
} \
|
||||
\
|
||||
__attribute__((always_inline)) \
|
||||
bool ge_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
||||
return a >= b; \
|
||||
} \
|
||||
|
||||
#define IMPL_UNARY_FUNCTION(C_TYPE, RUST_TYPE, FN_NAME, FN_IMPL) \
|
||||
__attribute__((always_inline)) \
|
||||
C_TYPE FN_NAME ## _ ## RUST_TYPE(C_TYPE a) { \
|
||||
return FN_IMPL(a); \
|
||||
} \
|
||||
|
||||
#define IMPL_BINARY_FUNCTION(C_TYPE, RUST_TYPE, FN_NAME, FN_IMPL) \
|
||||
__attribute__((always_inline)) \
|
||||
C_TYPE FN_NAME ## _ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
||||
return FN_IMPL(a, b); \
|
||||
} \
|
||||
|
||||
IMPL_OPERATIONS(float, f32)
|
||||
IMPL_OPERATIONS(double, f64)
|
||||
|
||||
IMPL_UNARY_FUNCTION(float, f32, sqrt, sqrtf)
|
||||
IMPL_UNARY_FUNCTION(double, f64, sqrt, sqrt)
|
||||
|
||||
IMPL_BINARY_FUNCTION(float, f32, rem, fmodf)
|
||||
IMPL_BINARY_FUNCTION(double, f64, rem, fmod)
|
||||
|
||||
IMPL_BINARY_FUNCTION(float, f32, max, fmaxf)
|
||||
IMPL_BINARY_FUNCTION(double, f64, max, fmax)
|
||||
|
||||
IMPL_BINARY_FUNCTION(float, f32, min, fminf)
|
||||
IMPL_BINARY_FUNCTION(double, f64, min, fmin)
|
||||
105
src/math/mod.rs
Normal file
105
src/math/mod.rs
Normal file
@@ -0,0 +1,105 @@
|
||||
use crate::{poison::MaybePoison, FF32, FF64};
|
||||
use paste::paste;
|
||||
|
||||
impl FF32 {
|
||||
const SIGN_BIT: u32 = 0x8000_0000;
|
||||
const UNSIGNED_MASK: u32 = 0x7fff_ffff;
|
||||
}
|
||||
|
||||
impl FF64 {
|
||||
const SIGN_BIT: u64 = 0x8000_0000_0000_0000;
|
||||
const UNSIGNED_MASK: u64 = 0x7fff_ffff_ffff_ffff;
|
||||
}
|
||||
|
||||
macro_rules! impl_generic_math {
|
||||
($fast_ty:ident, $base_ty:ident, $base_int:ident) => {
|
||||
impl $fast_ty {
|
||||
#[inline]
|
||||
fn to_bits(self) -> MaybePoison<$base_int> {
|
||||
// Safety:
|
||||
//
|
||||
// - `to_bits` should be valid for any input bits
|
||||
// - poison propagation is controlled with MaybePoison
|
||||
MaybePoison::new(unsafe { <$base_ty>::to_bits(*self.0.maybe_poison().as_ptr()) })
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn from_bits(bits: MaybePoison<$base_int>) -> Self {
|
||||
// Safety:
|
||||
//
|
||||
// - `from_bits` should be valid for any input bits
|
||||
// - poison propagation is controlled with MaybePoison
|
||||
Self(MaybePoison::new(unsafe {
|
||||
<$base_ty>::from_bits(*bits.maybe_poison().as_ptr())
|
||||
}))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn abs(self) -> Self {
|
||||
let bits = self.to_bits();
|
||||
<$fast_ty>::from_bits(MaybePoison::new(unsafe {
|
||||
*bits.maybe_poison().as_ptr() & Self::UNSIGNED_MASK
|
||||
}))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn copysign(self, other: Self) -> Self {
|
||||
let this = self.to_bits();
|
||||
let that = other.to_bits();
|
||||
|
||||
// Safety:
|
||||
//
|
||||
// - & of poison is safe because & does not produce UB for any input values
|
||||
// - poison propagation is handled by wrapping in maybe poison
|
||||
<$fast_ty>::from_bits(MaybePoison::new(unsafe {
|
||||
(*this.maybe_poison().as_ptr() & Self::UNSIGNED_MASK)
|
||||
| (*that.maybe_poison().as_ptr() & Self::SIGN_BIT)
|
||||
}))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn hypot(self, other: Self) -> Self {
|
||||
(self * self + other * other).sqrt()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn signum(self) -> Self {
|
||||
Self::ONE.copysign(self)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn recip(self) -> Self {
|
||||
Self::ONE / self
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! impl_extern_math {
|
||||
($fast_ty:ident, $base_ty:ident) => {
|
||||
paste! {
|
||||
extern "C" {
|
||||
fn [<min_ $base_ty>](a: $fast_ty, b: $fast_ty) -> $fast_ty;
|
||||
fn [<max_ $base_ty>](a: $fast_ty, b: $fast_ty) -> $fast_ty;
|
||||
}
|
||||
|
||||
impl $fast_ty {
|
||||
#[inline]
|
||||
pub fn max(self, other: Self) -> Self {
|
||||
unsafe { [<max_ $base_ty>](self, other) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn min(self, other: Self) -> Self {
|
||||
unsafe { [<min_ $base_ty>](self, other) }
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_generic_math! { FF32, f32, u32 }
|
||||
impl_generic_math! { FF64, f64, u64 }
|
||||
|
||||
impl_extern_math! { FF32, f32 }
|
||||
impl_extern_math! { FF64, f64 }
|
||||
Reference in New Issue
Block a user