extern math
This commit is contained in:
16
Cargo.toml
16
Cargo.toml
@@ -6,10 +6,17 @@ edition = "2018"
|
|||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
license = "MIT OR Apache-2.0"
|
license = "MIT OR Apache-2.0"
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
bench = false
|
||||||
|
|
||||||
[[bench]]
|
[[bench]]
|
||||||
name = "operations"
|
name = "operations"
|
||||||
harness = false
|
harness = false
|
||||||
|
|
||||||
|
[[bench]]
|
||||||
|
name = "math"
|
||||||
|
harness = false
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["num-traits"]
|
default = ["num-traits"]
|
||||||
|
|
||||||
@@ -17,6 +24,8 @@ nalgebra-v021 = ["num-traits", "nalgebra_v021", "simba_v01", "approx_v03"]
|
|||||||
nalgebra-v029 = ["num-traits", "nalgebra_v029", "simba_v06", "approx_v05"]
|
nalgebra-v029 = ["num-traits", "nalgebra_v029", "simba_v06", "approx_v05"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
paste = "1"
|
||||||
|
|
||||||
num-traits = { version = "0.2", optional = true }
|
num-traits = { version = "0.2", optional = true }
|
||||||
|
|
||||||
approx_v03 = { package = "approx", version = "0.3", optional = true }
|
approx_v03 = { package = "approx", version = "0.3", optional = true }
|
||||||
@@ -27,6 +36,9 @@ approx_v05 = { package = "approx", version = "0.5", optional = true }
|
|||||||
nalgebra_v029 = { package = "nalgebra", version = "0.29", optional = true }
|
nalgebra_v029 = { package = "nalgebra", version = "0.29", optional = true }
|
||||||
simba_v06 = { package = "simba", version = "0.6", optional = true }
|
simba_v06 = { package = "simba", version = "0.6", optional = true }
|
||||||
|
|
||||||
|
[build-dependencies]
|
||||||
|
cc = "1"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
criterion = { version = "0.3", features = ["html_reports"] }
|
criterion = { version = "0.3", features = ["html_reports"] }
|
||||||
rand = "0.8"
|
rand = "0.8"
|
||||||
@@ -34,3 +46,7 @@ rand = "0.8"
|
|||||||
[profile.test]
|
[profile.test]
|
||||||
# run tests at high optimization to exercise typical codegen
|
# run tests at high optimization to exercise typical codegen
|
||||||
opt-level = 3
|
opt-level = 3
|
||||||
|
|
||||||
|
[profile.release]
|
||||||
|
lto = "fat"
|
||||||
|
codegen-units = 1
|
||||||
|
|||||||
38
benches/math.rs
Normal file
38
benches/math.rs
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
|
||||||
|
use fast_fp::{ff32, FF32};
|
||||||
|
use rand::{distributions::Standard, thread_rng, Rng};
|
||||||
|
|
||||||
|
fn min(c: &mut Criterion) {
|
||||||
|
let mut group = c.benchmark_group("min");
|
||||||
|
for count in [2, 8, 32, 1024] {
|
||||||
|
group.throughput(Throughput::Elements(count as u64));
|
||||||
|
|
||||||
|
let f32_vals = thread_rng()
|
||||||
|
.sample_iter(Standard)
|
||||||
|
.take(count)
|
||||||
|
.collect::<Vec<f32>>();
|
||||||
|
|
||||||
|
// use the same values for both benchmarks
|
||||||
|
let ff32_vals = f32_vals
|
||||||
|
.clone()
|
||||||
|
.into_iter()
|
||||||
|
.map(ff32)
|
||||||
|
.collect::<Vec<FF32>>();
|
||||||
|
|
||||||
|
group.bench_with_input(BenchmarkId::new("std::f32", count), &f32_vals, |b, vals| {
|
||||||
|
b.iter(|| vals.iter().copied().fold(f32::MAX, |acc, val| acc.min(val)));
|
||||||
|
});
|
||||||
|
|
||||||
|
group.bench_with_input(BenchmarkId::new("FF32", count), &ff32_vals, |b, vals| {
|
||||||
|
b.iter(|| {
|
||||||
|
vals.iter()
|
||||||
|
.copied()
|
||||||
|
.fold(FF32::MAX, |acc, val| acc.min(val))
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
group.finish();
|
||||||
|
}
|
||||||
|
|
||||||
|
criterion_group!(benches, min);
|
||||||
|
criterion_main!(benches);
|
||||||
20
build.rs
Normal file
20
build.rs
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
fn main() {
|
||||||
|
let mut builder = cc::Build::new();
|
||||||
|
|
||||||
|
if !builder.get_compiler().is_like_clang() {
|
||||||
|
// if the default/configured cc is not clang, try to call clang manually
|
||||||
|
builder.compiler("clang");
|
||||||
|
}
|
||||||
|
|
||||||
|
builder
|
||||||
|
.file("src/math/math.c")
|
||||||
|
.flag("-O3")
|
||||||
|
.flag("-flto=thin")
|
||||||
|
.flag("-ffinite-math-only")
|
||||||
|
.flag("-fassociative-math")
|
||||||
|
.flag("-freciprocal-math")
|
||||||
|
.flag("-fno-signed-zeros")
|
||||||
|
.flag("-fno-trapping-math")
|
||||||
|
.flag("-ffp-contract=fast")
|
||||||
|
.compile("math")
|
||||||
|
}
|
||||||
12
src/lib.rs
12
src/lib.rs
@@ -2,6 +2,8 @@
|
|||||||
#![feature(core_intrinsics)] // intrinsics for the fast math
|
#![feature(core_intrinsics)] // intrinsics for the fast math
|
||||||
#![feature(asm)] // asm used to emulate freeze
|
#![feature(asm)] // asm used to emulate freeze
|
||||||
#![feature(doc_cfg)]
|
#![feature(doc_cfg)]
|
||||||
|
#![feature(link_llvm_intrinsics)]
|
||||||
|
|
||||||
use core::{
|
use core::{
|
||||||
cmp, fmt,
|
cmp, fmt,
|
||||||
intrinsics::{fadd_fast, fdiv_fast, fmul_fast, frem_fast, fsub_fast},
|
intrinsics::{fadd_fast, fdiv_fast, fmul_fast, frem_fast, fsub_fast},
|
||||||
@@ -40,6 +42,7 @@ macro_rules! forward_freeze_self {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mod math;
|
||||||
mod nalgebra;
|
mod nalgebra;
|
||||||
mod num_traits;
|
mod num_traits;
|
||||||
|
|
||||||
@@ -347,7 +350,6 @@ macro_rules! impls {
|
|||||||
// TODO migrate these to native implementations to freeze less and fast-math more
|
// TODO migrate these to native implementations to freeze less and fast-math more
|
||||||
forward_freeze_self! {
|
forward_freeze_self! {
|
||||||
$fast_ty, $base_ty
|
$fast_ty, $base_ty
|
||||||
pub fn abs(self) -> Self;
|
|
||||||
pub fn acos(self) -> Self;
|
pub fn acos(self) -> Self;
|
||||||
pub fn acosh(self) -> Self;
|
pub fn acosh(self) -> Self;
|
||||||
pub fn asin(self) -> Self;
|
pub fn asin(self) -> Self;
|
||||||
@@ -358,7 +360,6 @@ macro_rules! impls {
|
|||||||
pub fn cbrt(self) -> Self;
|
pub fn cbrt(self) -> Self;
|
||||||
pub fn ceil(self) -> Self;
|
pub fn ceil(self) -> Self;
|
||||||
pub fn clamp(self, min: Self, max: Self) -> Self;
|
pub fn clamp(self, min: Self, max: Self) -> Self;
|
||||||
pub fn copysign(self, sign: Self) -> Self;
|
|
||||||
pub fn cos(self) -> Self;
|
pub fn cos(self) -> Self;
|
||||||
pub fn cosh(self) -> Self;
|
pub fn cosh(self) -> Self;
|
||||||
pub fn div_euclid(self, rhs: Self) -> Self;
|
pub fn div_euclid(self, rhs: Self) -> Self;
|
||||||
@@ -367,20 +368,17 @@ macro_rules! impls {
|
|||||||
pub fn exp_m1(self) -> Self;
|
pub fn exp_m1(self) -> Self;
|
||||||
pub fn floor(self) -> Self;
|
pub fn floor(self) -> Self;
|
||||||
pub fn fract(self) -> Self;
|
pub fn fract(self) -> Self;
|
||||||
pub fn hypot(self, other: Self) -> Self;
|
|
||||||
pub fn ln(self) -> Self;
|
pub fn ln(self) -> Self;
|
||||||
pub fn ln_1p(self) -> Self;
|
pub fn ln_1p(self) -> Self;
|
||||||
pub fn log(self, base: Self) -> Self;
|
pub fn log(self, base: Self) -> Self;
|
||||||
pub fn log10(self) -> Self;
|
pub fn log10(self) -> Self;
|
||||||
pub fn log2(self) -> Self;
|
pub fn log2(self) -> Self;
|
||||||
pub fn max(self, other: Self) -> Self;
|
//pub fn max(self, other: Self) -> Self;
|
||||||
pub fn min(self, other: Self) -> Self;
|
//pub fn min(self, other: Self) -> Self;
|
||||||
pub fn mul_add(self, a: Self, b: Self) -> Self;
|
pub fn mul_add(self, a: Self, b: Self) -> Self;
|
||||||
pub fn powf(self, n: Self) -> Self;
|
pub fn powf(self, n: Self) -> Self;
|
||||||
pub fn recip(self) -> Self;
|
|
||||||
pub fn rem_euclid(self, rhs: Self) -> Self;
|
pub fn rem_euclid(self, rhs: Self) -> Self;
|
||||||
pub fn round(self) -> Self;
|
pub fn round(self) -> Self;
|
||||||
pub fn signum(self) -> Self;
|
|
||||||
pub fn sin(self) -> Self;
|
pub fn sin(self) -> Self;
|
||||||
pub fn sinh(self) -> Self;
|
pub fn sinh(self) -> Self;
|
||||||
pub fn sqrt(self) -> Self;
|
pub fn sqrt(self) -> Self;
|
||||||
|
|||||||
76
src/math/math.c
Normal file
76
src/math/math.c
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
#include <stdbool.h>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
#define IMPL_OPERATIONS(C_TYPE, RUST_TYPE) \
|
||||||
|
/* TODO figure out why these don't inline */ \
|
||||||
|
__attribute__((always_inline)) \
|
||||||
|
C_TYPE add_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
||||||
|
return a + b; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
__attribute__((always_inline)) \
|
||||||
|
C_TYPE sub_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
||||||
|
return a - b; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
__attribute__((always_inline)) \
|
||||||
|
C_TYPE mul_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
||||||
|
return a * b; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
__attribute__((always_inline)) \
|
||||||
|
C_TYPE div_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
||||||
|
return a / b; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
__attribute__((always_inline)) \
|
||||||
|
bool eq_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
||||||
|
return a == b; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
__attribute__((always_inline)) \
|
||||||
|
bool lt_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
||||||
|
return a < b; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
__attribute__((always_inline)) \
|
||||||
|
bool le_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
||||||
|
return a <= b; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
__attribute__((always_inline)) \
|
||||||
|
bool gt_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
||||||
|
return a > b; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
__attribute__((always_inline)) \
|
||||||
|
bool ge_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
||||||
|
return a >= b; \
|
||||||
|
} \
|
||||||
|
|
||||||
|
#define IMPL_UNARY_FUNCTION(C_TYPE, RUST_TYPE, FN_NAME, FN_IMPL) \
|
||||||
|
__attribute__((always_inline)) \
|
||||||
|
C_TYPE FN_NAME ## _ ## RUST_TYPE(C_TYPE a) { \
|
||||||
|
return FN_IMPL(a); \
|
||||||
|
} \
|
||||||
|
|
||||||
|
#define IMPL_BINARY_FUNCTION(C_TYPE, RUST_TYPE, FN_NAME, FN_IMPL) \
|
||||||
|
__attribute__((always_inline)) \
|
||||||
|
C_TYPE FN_NAME ## _ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
||||||
|
return FN_IMPL(a, b); \
|
||||||
|
} \
|
||||||
|
|
||||||
|
IMPL_OPERATIONS(float, f32)
|
||||||
|
IMPL_OPERATIONS(double, f64)
|
||||||
|
|
||||||
|
IMPL_UNARY_FUNCTION(float, f32, sqrt, sqrtf)
|
||||||
|
IMPL_UNARY_FUNCTION(double, f64, sqrt, sqrt)
|
||||||
|
|
||||||
|
IMPL_BINARY_FUNCTION(float, f32, rem, fmodf)
|
||||||
|
IMPL_BINARY_FUNCTION(double, f64, rem, fmod)
|
||||||
|
|
||||||
|
IMPL_BINARY_FUNCTION(float, f32, max, fmaxf)
|
||||||
|
IMPL_BINARY_FUNCTION(double, f64, max, fmax)
|
||||||
|
|
||||||
|
IMPL_BINARY_FUNCTION(float, f32, min, fminf)
|
||||||
|
IMPL_BINARY_FUNCTION(double, f64, min, fmin)
|
||||||
105
src/math/mod.rs
Normal file
105
src/math/mod.rs
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
use crate::{poison::MaybePoison, FF32, FF64};
|
||||||
|
use paste::paste;
|
||||||
|
|
||||||
|
impl FF32 {
|
||||||
|
const SIGN_BIT: u32 = 0x8000_0000;
|
||||||
|
const UNSIGNED_MASK: u32 = 0x7fff_ffff;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FF64 {
|
||||||
|
const SIGN_BIT: u64 = 0x8000_0000_0000_0000;
|
||||||
|
const UNSIGNED_MASK: u64 = 0x7fff_ffff_ffff_ffff;
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! impl_generic_math {
|
||||||
|
($fast_ty:ident, $base_ty:ident, $base_int:ident) => {
|
||||||
|
impl $fast_ty {
|
||||||
|
#[inline]
|
||||||
|
fn to_bits(self) -> MaybePoison<$base_int> {
|
||||||
|
// Safety:
|
||||||
|
//
|
||||||
|
// - `to_bits` should be valid for any input bits
|
||||||
|
// - poison propagation is controlled with MaybePoison
|
||||||
|
MaybePoison::new(unsafe { <$base_ty>::to_bits(*self.0.maybe_poison().as_ptr()) })
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn from_bits(bits: MaybePoison<$base_int>) -> Self {
|
||||||
|
// Safety:
|
||||||
|
//
|
||||||
|
// - `from_bits` should be valid for any input bits
|
||||||
|
// - poison propagation is controlled with MaybePoison
|
||||||
|
Self(MaybePoison::new(unsafe {
|
||||||
|
<$base_ty>::from_bits(*bits.maybe_poison().as_ptr())
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn abs(self) -> Self {
|
||||||
|
let bits = self.to_bits();
|
||||||
|
<$fast_ty>::from_bits(MaybePoison::new(unsafe {
|
||||||
|
*bits.maybe_poison().as_ptr() & Self::UNSIGNED_MASK
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn copysign(self, other: Self) -> Self {
|
||||||
|
let this = self.to_bits();
|
||||||
|
let that = other.to_bits();
|
||||||
|
|
||||||
|
// Safety:
|
||||||
|
//
|
||||||
|
// - & of poison is safe because & does not produce UB for any input values
|
||||||
|
// - poison propagation is handled by wrapping in maybe poison
|
||||||
|
<$fast_ty>::from_bits(MaybePoison::new(unsafe {
|
||||||
|
(*this.maybe_poison().as_ptr() & Self::UNSIGNED_MASK)
|
||||||
|
| (*that.maybe_poison().as_ptr() & Self::SIGN_BIT)
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn hypot(self, other: Self) -> Self {
|
||||||
|
(self * self + other * other).sqrt()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn signum(self) -> Self {
|
||||||
|
Self::ONE.copysign(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn recip(self) -> Self {
|
||||||
|
Self::ONE / self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! impl_extern_math {
|
||||||
|
($fast_ty:ident, $base_ty:ident) => {
|
||||||
|
paste! {
|
||||||
|
extern "C" {
|
||||||
|
fn [<min_ $base_ty>](a: $fast_ty, b: $fast_ty) -> $fast_ty;
|
||||||
|
fn [<max_ $base_ty>](a: $fast_ty, b: $fast_ty) -> $fast_ty;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl $fast_ty {
|
||||||
|
#[inline]
|
||||||
|
pub fn max(self, other: Self) -> Self {
|
||||||
|
unsafe { [<max_ $base_ty>](self, other) }
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn min(self, other: Self) -> Self {
|
||||||
|
unsafe { [<min_ $base_ty>](self, other) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
impl_generic_math! { FF32, f32, u32 }
|
||||||
|
impl_generic_math! { FF64, f64, u64 }
|
||||||
|
|
||||||
|
impl_extern_math! { FF32, f32 }
|
||||||
|
impl_extern_math! { FF64, f64 }
|
||||||
Reference in New Issue
Block a user