From 2c77c05dff14fef79d9f53c701dd1678a36e36a4 Mon Sep 17 00:00:00 2001 From: Renar Narubin Date: Fri, 5 Nov 2021 14:47:17 -0700 Subject: [PATCH] FF64 --- benches/operations.rs | 22 ++- src/lib.rs | 399 +++++++++++++++++++++++------------------- 2 files changed, 241 insertions(+), 180 deletions(-) diff --git a/benches/operations.rs b/benches/operations.rs index e4cabe2..34dd228 100644 --- a/benches/operations.rs +++ b/benches/operations.rs @@ -1,5 +1,5 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; -use fast_fp::{ff32, FF32}; +use fast_fp::{ff32, ff64, FF32, FF64}; use rand::{distributions::Standard, thread_rng, Rng}; fn sum(c: &mut Criterion) { @@ -26,6 +26,26 @@ fn sum(c: &mut Criterion) { group.bench_with_input(BenchmarkId::new("FF32", count), &ff32_vals, |b, vals| { b.iter(|| vals.iter().copied().fold(ff32(0.0), |acc, val| acc + val)); }); + + let f64_vals = thread_rng() + .sample_iter(Standard) + .take(count) + .collect::>(); + + // use the same values for both benchmarks + let ff64_vals = f64_vals + .clone() + .into_iter() + .map(ff64) + .collect::>(); + + group.bench_with_input(BenchmarkId::new("std::f64", count), &f64_vals, |b, vals| { + b.iter(|| vals.iter().copied().fold(0.0, |acc, val| acc + val)); + }); + + group.bench_with_input(BenchmarkId::new("FF64", count), &ff64_vals, |b, vals| { + b.iter(|| vals.iter().copied().fold(ff64(0.0), |acc, val| acc + val)); + }); } group.finish(); } diff --git a/src/lib.rs b/src/lib.rs index 03dcd8e..efb91e9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -62,49 +62,12 @@ impl std::error::Error for InvalidValueError {} // https://github.com/rust-lang/unsafe-code-guidelines/issues/71 // notes on the validity of primitive bit patterns -/// A wrapper over `f32` which enables fast-math optimizations. +/// A wrapper over `f32` which enables some fast-math optimizations. // TODO how best to document unspecified values, including witnessing possibly varying values #[derive(Clone, Copy)] #[repr(transparent)] pub struct FF32(MaybePoison); -impl FF32 { - /// Create a new `FF32` instance from the given float value. - /// - /// The given value **MUST NOT** be infinite or NaN, and any operations involving this value must - /// not produce infinite or NaN results. The output of any such operation is unspecified. - #[inline(always)] - pub const fn new(f: f32) -> Self { - FF32(MaybePoison::new(f)) - } - - /// Create a new `FF32` instance from the given float value, returning an error if the value is - /// infinite or NaN. - /// - /// Note that this check is **not sufficient** to avoid all unspecified outputs, because an - /// operation could otherwise produce an invalid value with valid inputs (for example - /// `ff32(1.0) / ff32(0.0)` is unspecified). Nevertheless, this check can be useful for - /// limited best-effort validation. - #[inline(always)] - pub fn new_checked(f: f32) -> Result { - // finite also checks for NaN - if f.is_finite() { - Ok(FF32::new(f)) - } else { - Err(InvalidValueError { _priv: () }) - } - } - - #[inline(always)] - fn freeze_f32(self) -> f32 { - let inner = self.0.freeze(); - - // Safety: - // every bit pattern is valid in float - unsafe { inner.assume_init() } - } -} - /// Create a new `FF32` instance from the given float value. /// /// This is syntax sugar for constructing the `FF32` type, and equivalent to `FF32::new(f)` @@ -117,42 +80,22 @@ pub fn ff32(f: f32) -> FF32 { FF32::new(f) } -impl Neg for FF32 { - type Output = Self; +/// A wrapper over `f64` which enables some fast-math optimizations. +// TODO how best to document unspecified values, including witnessing possibly varying values +#[derive(Clone, Copy)] +#[repr(transparent)] +pub struct FF64(MaybePoison); - #[inline(always)] - fn neg(self) -> Self::Output { - // Safety: - // - // - dereferencing the pointers is safe because every bit pattern is valid in float - // primitives - // - encountering poison is safe because LLVM's negate instruction documents - // not producing UB on any inputs. The value is also immediately wrapped, so - // poison propagation is controlled - let val = unsafe { *self.0.maybe_poison().as_ptr() }; - FF32::new(-val) - } -} - -impl Neg for &FF32 { - type Output = ::Output; - - #[inline] - fn neg(self) -> Self::Output { - -(*self) - } -} - -impl fmt::Debug for FF32 { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Debug::fmt(&self.freeze_f32(), f) - } -} - -impl fmt::Display for FF32 { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Display::fmt(&self.freeze_f32(), f) - } +/// Create a new `FF64` instance from the given float value. +/// +/// This is syntax sugar for constructing the `FF64` type, and equivalent to `FF64::new(f)` +/// +/// The given value **MUST NOT** be infinite or NaN, and any operations involving this value must +/// not produce infinite or NaN results. The output of any such operation is unspecified. +#[inline(always)] +pub fn ff64(f: f64) -> FF64 { + // TODO maybe a feature flag to make this checked -> panic? + FF64::new(f) } macro_rules! impl_refs { @@ -233,120 +176,218 @@ macro_rules! impl_fast_ops { }; } -impl_fast_ops! { - FF32, f32: - Add, add, fadd_fast, - Sub, sub, fsub_fast, - Mul, mul, fmul_fast, - Div, div, fdiv_fast, - Rem, rem, frem_fast, -} - -// Branching on poison values is UB, so any operation that makes a bool is protected by freezing -// the operands. This includes [Partial]Eq and [Partial]Ord. -// -// Note however that only value copies are frozen; the original values may still be poison, and -// could even yield different concrete values on a subsequent freeze. This means that potentially -// the values are not Eq/Ord consistent. Logical consistency is left as a responsibility of -// the user, to maintain non inf/nan values, while the lib only ensures safety. - -impl PartialEq for FF32 { - #[inline] - fn eq(&self, other: &FF32) -> bool { - let this = self.freeze_f32(); - let that = other.freeze_f32(); - - this == that +macro_rules! impl_fmt { + ($fast_ty:ident, $base_ty:ident, $($fmt_trait:path,)*) => { + $( + impl $fmt_trait for $fast_ty { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + <$base_ty as $fmt_trait>::fmt(&self.freeze_raw(), f) + } + } + )* } } -impl PartialEq for FF32 { - #[inline] - fn eq(&self, other: &f32) -> bool { - let this = self.freeze_f32(); - let that = *other; +macro_rules! impls { + ($fast_ty:ident, $base_ty: ident) => { + impl $fast_ty { + #[doc = "Create a new `"] + #[doc= stringify!($fast_ty)] + #[doc = "` instance from the given float value."] + /// + /// The given value **MUST NOT** be infinite or NaN, and any operations involving this value must + /// not produce infinite or NaN results. The output of any such operation is unspecified. + #[inline(always)] + pub const fn new(f: $base_ty) -> Self { + $fast_ty(MaybePoison::new(f)) + } - this == that - } -} + #[doc = "Create a new `"] + #[doc= stringify!($fast_ty)] + #[doc = "` instance from the given float value, returning an error if the value is infinite or NaN."] + /// + /// Note that this check is **not sufficient** to avoid all unspecified outputs, because an + /// operation could otherwise produce an invalid value with valid inputs (for example + /// `ff32(1.0) / ff32(0.0)` is unspecified). Nevertheless, this check can be useful for + /// limited best-effort validation. + #[inline(always)] + pub fn new_checked(f: $base_ty) -> Result { + // finite also checks for NaN + if f.is_finite() { + Ok($fast_ty::new(f)) + } else { + Err(InvalidValueError { _priv: () }) + } + } -impl PartialEq for f32 { - #[inline] - fn eq(&self, other: &FF32) -> bool { - let this = *self; - let that = other.freeze_f32(); + #[inline(always)] + fn freeze_raw(self) -> $base_ty { + let inner = self.0.freeze(); - this == that - } -} - -impl Eq for FF32 {} - -impl PartialOrd for FF32 { - #[inline(always)] - fn partial_cmp(&self, other: &FF32) -> Option { - Some(self.cmp(other)) - } - - #[inline(always)] - fn lt(&self, other: &FF32) -> bool { - self.freeze_f32() < other.freeze_f32() - } - - #[inline(always)] - fn le(&self, other: &FF32) -> bool { - self.freeze_f32() <= other.freeze_f32() - } - - #[inline(always)] - fn gt(&self, other: &FF32) -> bool { - self.freeze_f32() > other.freeze_f32() - } - - #[inline(always)] - fn ge(&self, other: &FF32) -> bool { - self.freeze_f32() >= other.freeze_f32() - } -} - -impl Ord for FF32 { - #[inline(always)] - fn cmp(&self, other: &FF32) -> cmp::Ordering { - let this = self.freeze_f32(); - let that = other.freeze_f32(); - - // Note NaNs are not supported (and would break everything else anyway) so we ignore them - // and implement full Ord - if this < that { - cmp::Ordering::Less - } else if this > that { - cmp::Ordering::Greater - } else { - cmp::Ordering::Equal + // Safety: + // every bit pattern is valid in float + unsafe { inner.assume_init() } + } } - } - #[inline] - fn clamp(self, min: FF32, max: FF32) -> FF32 { - ff32(f32::clamp( - self.freeze_f32(), - min.freeze_f32(), - max.freeze_f32(), - )) - } + impl_fmt! { + $fast_ty, $base_ty, + fmt::Debug, fmt::Display, fmt::LowerExp, fmt::UpperExp, + } + + impl Neg for $fast_ty { + type Output = Self; + + #[inline(always)] + fn neg(self) -> Self::Output { + // Safety: + // + // - dereferencing the pointers is safe because every bit pattern is valid in float + // primitives + // - encountering poison is safe because LLVM's negate instruction documents + // not producing UB on any inputs. The value is also immediately wrapped, so + // poison propagation is controlled + let val = unsafe { *self.0.maybe_poison().as_ptr() }; + $fast_ty::new(-val) + } + } + + impl Neg for &$fast_ty { + type Output = <$fast_ty as Neg>::Output; + + #[inline] + fn neg(self) -> Self::Output { + -(*self) + } + } + + // Branching on poison values is UB, so any operation that makes a bool is protected by + // freezing the operands. This includes [Partial]Eq and [Partial]Ord. Unfortunately + // freezing has a nontrivial impact on performance, so non-bool methods should be preferred + // when applicable, such as min/max/clamp + // + // Note however that only value copies are frozen; the original values may still be poison, and + // could even yield different concrete values on a subsequent freeze. This means that potentially + // the values are not Eq/Ord consistent. Logical consistency is left as a responsibility of + // the user, to maintain non inf/nan values, while the lib only ensures safety. + + impl PartialEq<$fast_ty> for $fast_ty { + #[inline] + fn eq(&self, other: &$fast_ty) -> bool { + let this = self.freeze_raw(); + let that = other.freeze_raw(); + + this == that + } + } + + impl PartialEq<$base_ty> for $fast_ty { + #[inline] + fn eq(&self, other: &$base_ty) -> bool { + let this = self.freeze_raw(); + let that = *other; + + this == that + } + } + + impl PartialEq<$fast_ty> for $base_ty { + #[inline] + fn eq(&self, other: &$fast_ty) -> bool { + let this = *self; + let that = other.freeze_raw(); + + this == that + } + } + + impl Eq for $fast_ty {} + + impl PartialOrd<$fast_ty> for $fast_ty { + #[inline(always)] + fn partial_cmp(&self, other: &$fast_ty) -> Option { + Some(self.cmp(other)) + } + + // TODO specialize a MaybePoison with `x & 0b1`? + // then comparisons can freeze only once on output instead of twice on input + + #[inline(always)] + fn lt(&self, other: &$fast_ty) -> bool { + self.freeze_raw() < other.freeze_raw() + } + + #[inline(always)] + fn le(&self, other: &$fast_ty) -> bool { + self.freeze_raw() <= other.freeze_raw() + } + + #[inline(always)] + fn gt(&self, other: &$fast_ty) -> bool { + self.freeze_raw() > other.freeze_raw() + } + + #[inline(always)] + fn ge(&self, other: &$fast_ty) -> bool { + self.freeze_raw() >= other.freeze_raw() + } + } + + impl Ord for $fast_ty { + #[inline(always)] + fn cmp(&self, other: &$fast_ty) -> cmp::Ordering { + let this = self.freeze_raw(); + let that = other.freeze_raw(); + + // Note NaNs are not supported (and would break everything else anyway) so we ignore them + // and implement full Ord + if this < that { + cmp::Ordering::Less + } else if this > that { + cmp::Ordering::Greater + } else { + cmp::Ordering::Equal + } + } + + #[inline] + fn clamp(self, min: $fast_ty, max: $fast_ty) -> $fast_ty { + // TODO implement in terms of min/max, + // TODO also implement min/max (intrinsics? we don't want branches) + <$fast_ty>::new($base_ty::clamp( + self.freeze_raw(), + min.freeze_raw(), + max.freeze_raw(), + )) + } + } + + impl From<$fast_ty> for $base_ty { + fn from(from: $fast_ty) -> Self { + // base primitives are no longer in our API control, so we must stop poison + // propagation by freezing + from.freeze_raw() + } + } + + impl From<$base_ty> for $fast_ty { + fn from(from: $base_ty) -> Self { + <$fast_ty>::new(from) + } + } + + impl_fast_ops! { + $fast_ty, $base_ty: + Add, add, fadd_fast, + Sub, sub, fsub_fast, + Mul, mul, fmul_fast, + Div, div, fdiv_fast, + Rem, rem, frem_fast, + } + }; } -impl From for f32 { - fn from(from: FF32) -> Self { - // f32 is no longer in our API control, so we must stop poison propagation by freezing - from.freeze_f32() - } -} +impls! { FF32, f32 } +impls! { FF64, f64 } -impl From for FF32 { - fn from(from: f32) -> Self { - ff32(from) - } -} - -// TODO FF64, macro everything, more ops, libm? +// TODO num_traits, libm?