extern math

This commit is contained in:
Renar Narubin
2021-11-15 16:45:59 -08:00
parent 31d17c3333
commit 5bb809d657
6 changed files with 260 additions and 7 deletions

View File

@@ -6,10 +6,17 @@ edition = "2018"
readme = "README.md"
license = "MIT OR Apache-2.0"
[lib]
bench = false
[[bench]]
name = "operations"
harness = false
[[bench]]
name = "math"
harness = false
[features]
default = ["num-traits"]
@@ -17,6 +24,8 @@ nalgebra-v021 = ["num-traits", "nalgebra_v021", "simba_v01", "approx_v03"]
nalgebra-v029 = ["num-traits", "nalgebra_v029", "simba_v06", "approx_v05"]
[dependencies]
paste = "1"
num-traits = { version = "0.2", optional = true }
approx_v03 = { package = "approx", version = "0.3", optional = true }
@@ -27,6 +36,9 @@ approx_v05 = { package = "approx", version = "0.5", optional = true }
nalgebra_v029 = { package = "nalgebra", version = "0.29", optional = true }
simba_v06 = { package = "simba", version = "0.6", optional = true }
[build-dependencies]
cc = "1"
[dev-dependencies]
criterion = { version = "0.3", features = ["html_reports"] }
rand = "0.8"
@@ -34,3 +46,7 @@ rand = "0.8"
[profile.test]
# run tests at high optimization to exercise typical codegen
opt-level = 3
[profile.release]
lto = "fat"
codegen-units = 1

38
benches/math.rs Normal file
View File

@@ -0,0 +1,38 @@
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use fast_fp::{ff32, FF32};
use rand::{distributions::Standard, thread_rng, Rng};
fn min(c: &mut Criterion) {
let mut group = c.benchmark_group("min");
for count in [2, 8, 32, 1024] {
group.throughput(Throughput::Elements(count as u64));
let f32_vals = thread_rng()
.sample_iter(Standard)
.take(count)
.collect::<Vec<f32>>();
// use the same values for both benchmarks
let ff32_vals = f32_vals
.clone()
.into_iter()
.map(ff32)
.collect::<Vec<FF32>>();
group.bench_with_input(BenchmarkId::new("std::f32", count), &f32_vals, |b, vals| {
b.iter(|| vals.iter().copied().fold(f32::MAX, |acc, val| acc.min(val)));
});
group.bench_with_input(BenchmarkId::new("FF32", count), &ff32_vals, |b, vals| {
b.iter(|| {
vals.iter()
.copied()
.fold(FF32::MAX, |acc, val| acc.min(val))
});
});
}
group.finish();
}
criterion_group!(benches, min);
criterion_main!(benches);

20
build.rs Normal file
View File

@@ -0,0 +1,20 @@
fn main() {
let mut builder = cc::Build::new();
if !builder.get_compiler().is_like_clang() {
// if the default/configured cc is not clang, try to call clang manually
builder.compiler("clang");
}
builder
.file("src/math/math.c")
.flag("-O3")
.flag("-flto=thin")
.flag("-ffinite-math-only")
.flag("-fassociative-math")
.flag("-freciprocal-math")
.flag("-fno-signed-zeros")
.flag("-fno-trapping-math")
.flag("-ffp-contract=fast")
.compile("math")
}

View File

@@ -2,6 +2,8 @@
#![feature(core_intrinsics)] // intrinsics for the fast math
#![feature(asm)] // asm used to emulate freeze
#![feature(doc_cfg)]
#![feature(link_llvm_intrinsics)]
use core::{
cmp, fmt,
intrinsics::{fadd_fast, fdiv_fast, fmul_fast, frem_fast, fsub_fast},
@@ -40,6 +42,7 @@ macro_rules! forward_freeze_self {
};
}
mod math;
mod nalgebra;
mod num_traits;
@@ -347,7 +350,6 @@ macro_rules! impls {
// TODO migrate these to native implementations to freeze less and fast-math more
forward_freeze_self! {
$fast_ty, $base_ty
pub fn abs(self) -> Self;
pub fn acos(self) -> Self;
pub fn acosh(self) -> Self;
pub fn asin(self) -> Self;
@@ -358,7 +360,6 @@ macro_rules! impls {
pub fn cbrt(self) -> Self;
pub fn ceil(self) -> Self;
pub fn clamp(self, min: Self, max: Self) -> Self;
pub fn copysign(self, sign: Self) -> Self;
pub fn cos(self) -> Self;
pub fn cosh(self) -> Self;
pub fn div_euclid(self, rhs: Self) -> Self;
@@ -367,20 +368,17 @@ macro_rules! impls {
pub fn exp_m1(self) -> Self;
pub fn floor(self) -> Self;
pub fn fract(self) -> Self;
pub fn hypot(self, other: Self) -> Self;
pub fn ln(self) -> Self;
pub fn ln_1p(self) -> Self;
pub fn log(self, base: Self) -> Self;
pub fn log10(self) -> Self;
pub fn log2(self) -> Self;
pub fn max(self, other: Self) -> Self;
pub fn min(self, other: Self) -> Self;
//pub fn max(self, other: Self) -> Self;
//pub fn min(self, other: Self) -> Self;
pub fn mul_add(self, a: Self, b: Self) -> Self;
pub fn powf(self, n: Self) -> Self;
pub fn recip(self) -> Self;
pub fn rem_euclid(self, rhs: Self) -> Self;
pub fn round(self) -> Self;
pub fn signum(self) -> Self;
pub fn sin(self) -> Self;
pub fn sinh(self) -> Self;
pub fn sqrt(self) -> Self;

76
src/math/math.c Normal file
View File

@@ -0,0 +1,76 @@
#include <stdbool.h>
#include <math.h>
#define IMPL_OPERATIONS(C_TYPE, RUST_TYPE) \
/* TODO figure out why these don't inline */ \
__attribute__((always_inline)) \
C_TYPE add_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
return a + b; \
} \
\
__attribute__((always_inline)) \
C_TYPE sub_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
return a - b; \
} \
\
__attribute__((always_inline)) \
C_TYPE mul_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
return a * b; \
} \
\
__attribute__((always_inline)) \
C_TYPE div_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
return a / b; \
} \
\
__attribute__((always_inline)) \
bool eq_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
return a == b; \
} \
\
__attribute__((always_inline)) \
bool lt_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
return a < b; \
} \
\
__attribute__((always_inline)) \
bool le_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
return a <= b; \
} \
\
__attribute__((always_inline)) \
bool gt_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
return a > b; \
} \
\
__attribute__((always_inline)) \
bool ge_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
return a >= b; \
} \
#define IMPL_UNARY_FUNCTION(C_TYPE, RUST_TYPE, FN_NAME, FN_IMPL) \
__attribute__((always_inline)) \
C_TYPE FN_NAME ## _ ## RUST_TYPE(C_TYPE a) { \
return FN_IMPL(a); \
} \
#define IMPL_BINARY_FUNCTION(C_TYPE, RUST_TYPE, FN_NAME, FN_IMPL) \
__attribute__((always_inline)) \
C_TYPE FN_NAME ## _ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
return FN_IMPL(a, b); \
} \
IMPL_OPERATIONS(float, f32)
IMPL_OPERATIONS(double, f64)
IMPL_UNARY_FUNCTION(float, f32, sqrt, sqrtf)
IMPL_UNARY_FUNCTION(double, f64, sqrt, sqrt)
IMPL_BINARY_FUNCTION(float, f32, rem, fmodf)
IMPL_BINARY_FUNCTION(double, f64, rem, fmod)
IMPL_BINARY_FUNCTION(float, f32, max, fmaxf)
IMPL_BINARY_FUNCTION(double, f64, max, fmax)
IMPL_BINARY_FUNCTION(float, f32, min, fminf)
IMPL_BINARY_FUNCTION(double, f64, min, fmin)

105
src/math/mod.rs Normal file
View File

@@ -0,0 +1,105 @@
use crate::{poison::MaybePoison, FF32, FF64};
use paste::paste;
impl FF32 {
const SIGN_BIT: u32 = 0x8000_0000;
const UNSIGNED_MASK: u32 = 0x7fff_ffff;
}
impl FF64 {
const SIGN_BIT: u64 = 0x8000_0000_0000_0000;
const UNSIGNED_MASK: u64 = 0x7fff_ffff_ffff_ffff;
}
macro_rules! impl_generic_math {
($fast_ty:ident, $base_ty:ident, $base_int:ident) => {
impl $fast_ty {
#[inline]
fn to_bits(self) -> MaybePoison<$base_int> {
// Safety:
//
// - `to_bits` should be valid for any input bits
// - poison propagation is controlled with MaybePoison
MaybePoison::new(unsafe { <$base_ty>::to_bits(*self.0.maybe_poison().as_ptr()) })
}
#[inline]
fn from_bits(bits: MaybePoison<$base_int>) -> Self {
// Safety:
//
// - `from_bits` should be valid for any input bits
// - poison propagation is controlled with MaybePoison
Self(MaybePoison::new(unsafe {
<$base_ty>::from_bits(*bits.maybe_poison().as_ptr())
}))
}
#[inline]
pub fn abs(self) -> Self {
let bits = self.to_bits();
<$fast_ty>::from_bits(MaybePoison::new(unsafe {
*bits.maybe_poison().as_ptr() & Self::UNSIGNED_MASK
}))
}
#[inline]
pub fn copysign(self, other: Self) -> Self {
let this = self.to_bits();
let that = other.to_bits();
// Safety:
//
// - & of poison is safe because & does not produce UB for any input values
// - poison propagation is handled by wrapping in maybe poison
<$fast_ty>::from_bits(MaybePoison::new(unsafe {
(*this.maybe_poison().as_ptr() & Self::UNSIGNED_MASK)
| (*that.maybe_poison().as_ptr() & Self::SIGN_BIT)
}))
}
#[inline]
pub fn hypot(self, other: Self) -> Self {
(self * self + other * other).sqrt()
}
#[inline]
pub fn signum(self) -> Self {
Self::ONE.copysign(self)
}
#[inline]
pub fn recip(self) -> Self {
Self::ONE / self
}
}
};
}
macro_rules! impl_extern_math {
($fast_ty:ident, $base_ty:ident) => {
paste! {
extern "C" {
fn [<min_ $base_ty>](a: $fast_ty, b: $fast_ty) -> $fast_ty;
fn [<max_ $base_ty>](a: $fast_ty, b: $fast_ty) -> $fast_ty;
}
impl $fast_ty {
#[inline]
pub fn max(self, other: Self) -> Self {
unsafe { [<max_ $base_ty>](self, other) }
}
#[inline]
pub fn min(self, other: Self) -> Self {
unsafe { [<min_ $base_ty>](self, other) }
}
}
}
};
}
impl_generic_math! { FF32, f32, u32 }
impl_generic_math! { FF64, f64, u64 }
impl_extern_math! { FF32, f32 }
impl_extern_math! { FF64, f64 }