extern arithmetic
This commit is contained in:
30
Cargo.toml
30
Cargo.toml
@@ -2,7 +2,7 @@
|
|||||||
name = "fast_fp"
|
name = "fast_fp"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
authors = ["Renar Narubin <renar@standard.ai>"]
|
authors = ["Renar Narubin <renar@standard.ai>"]
|
||||||
edition = "2018"
|
edition = "2021"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
license = "MIT OR Apache-2.0"
|
license = "MIT OR Apache-2.0"
|
||||||
|
|
||||||
@@ -18,8 +18,30 @@ name = "math"
|
|||||||
harness = false
|
harness = false
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["num-traits"]
|
default = [
|
||||||
|
"num-traits",
|
||||||
|
"finite-math-only",
|
||||||
|
"associative-math",
|
||||||
|
"reciprocal-math",
|
||||||
|
"no-signed-zeros",
|
||||||
|
"no-trapping-math",
|
||||||
|
"fp-contract-fast",
|
||||||
|
"approx-func",
|
||||||
|
]
|
||||||
|
|
||||||
|
# default fast-math features
|
||||||
|
finite-math-only = []
|
||||||
|
associative-math = []
|
||||||
|
reciprocal-math = []
|
||||||
|
no-signed-zeros = []
|
||||||
|
no-trapping-math = []
|
||||||
|
fp-contract-fast = []
|
||||||
|
approx-func = []
|
||||||
|
|
||||||
|
# non-default fast-math-like features
|
||||||
|
denormal-fp-math-preserve-sign = []
|
||||||
|
|
||||||
|
# optional trait implementations
|
||||||
nalgebra-v021 = ["num-traits", "nalgebra_v021", "simba_v01", "approx_v03"]
|
nalgebra-v021 = ["num-traits", "nalgebra_v021", "simba_v01", "approx_v03"]
|
||||||
nalgebra-v029 = ["num-traits", "nalgebra_v029", "simba_v06", "approx_v05"]
|
nalgebra-v029 = ["num-traits", "nalgebra_v029", "simba_v06", "approx_v05"]
|
||||||
|
|
||||||
@@ -48,5 +70,5 @@ rand = "0.8"
|
|||||||
opt-level = 3
|
opt-level = 3
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
lto = "fat"
|
lto="thin"
|
||||||
codegen-units = 1
|
codegen-units=1
|
||||||
|
|||||||
@@ -1,54 +1,122 @@
|
|||||||
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
|
use criterion::{
|
||||||
|
criterion_group, criterion_main, measurement::Measurement, BatchSize, BenchmarkGroup,
|
||||||
|
BenchmarkId, Criterion, Throughput,
|
||||||
|
};
|
||||||
use fast_fp::{ff32, ff64, FF32, FF64};
|
use fast_fp::{ff32, ff64, FF32, FF64};
|
||||||
use rand::{distributions::Standard, thread_rng, Rng};
|
use rand::{
|
||||||
|
distributions::{self, Distribution},
|
||||||
|
rngs::StdRng,
|
||||||
|
Rng, SeedableRng,
|
||||||
|
};
|
||||||
|
use std::ops::{Add, Div, Mul};
|
||||||
|
|
||||||
fn sum(c: &mut Criterion) {
|
fn add(c: &mut Criterion) {
|
||||||
let mut group = c.benchmark_group("sum");
|
let mut group = c.benchmark_group("add");
|
||||||
for count in [2, 4, 8, 16, 64, 1024, 1 << 15] {
|
|
||||||
group.throughput(Throughput::Elements(count as u64));
|
|
||||||
|
|
||||||
let f32_vals = thread_rng()
|
let rng = StdRng::from_entropy();
|
||||||
.sample_iter(Standard)
|
let f32s = distributions::Uniform::<f32>::new(0.0, 1.0);
|
||||||
.take(count)
|
let f64s = distributions::Uniform::<f64>::new(0.0, 1.0);
|
||||||
.collect::<Vec<f32>>();
|
|
||||||
|
|
||||||
// use the same values for both benchmarks
|
// clone the rng for each benched type to keep the generated values identical
|
||||||
let ff32_vals = f32_vals
|
fold(&mut group, "std::f32", f32::add, 0.0, rng.clone(), f32s);
|
||||||
.clone()
|
fold(&mut group, "FF32", FF32::add, ff32(0.0), rng.clone(), f32s);
|
||||||
.into_iter()
|
fold(&mut group, "std::f64", f64::add, 0.0, rng.clone(), f64s);
|
||||||
.map(ff32)
|
fold(&mut group, "FF64", FF64::add, ff64(0.0), rng.clone(), f64s);
|
||||||
.collect::<Vec<FF32>>();
|
|
||||||
|
|
||||||
group.bench_with_input(BenchmarkId::new("std::f32", count), &f32_vals, |b, vals| {
|
|
||||||
b.iter(|| vals.iter().copied().fold(0.0, |acc, val| acc + val));
|
|
||||||
});
|
|
||||||
|
|
||||||
group.bench_with_input(BenchmarkId::new("FF32", count), &ff32_vals, |b, vals| {
|
|
||||||
b.iter(|| vals.iter().copied().fold(ff32(0.0), |acc, val| acc + val));
|
|
||||||
});
|
|
||||||
|
|
||||||
let f64_vals = thread_rng()
|
|
||||||
.sample_iter(Standard)
|
|
||||||
.take(count)
|
|
||||||
.collect::<Vec<f64>>();
|
|
||||||
|
|
||||||
// use the same values for both benchmarks
|
|
||||||
let ff64_vals = f64_vals
|
|
||||||
.clone()
|
|
||||||
.into_iter()
|
|
||||||
.map(ff64)
|
|
||||||
.collect::<Vec<FF64>>();
|
|
||||||
|
|
||||||
group.bench_with_input(BenchmarkId::new("std::f64", count), &f64_vals, |b, vals| {
|
|
||||||
b.iter(|| vals.iter().copied().fold(0.0, |acc, val| acc + val));
|
|
||||||
});
|
|
||||||
|
|
||||||
group.bench_with_input(BenchmarkId::new("FF64", count), &ff64_vals, |b, vals| {
|
|
||||||
b.iter(|| vals.iter().copied().fold(ff64(0.0), |acc, val| acc + val));
|
|
||||||
});
|
|
||||||
}
|
|
||||||
group.finish();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
criterion_group!(benches, sum);
|
fn mul(c: &mut Criterion) {
|
||||||
|
let mut group = c.benchmark_group("mul");
|
||||||
|
|
||||||
|
let rng = StdRng::from_entropy();
|
||||||
|
|
||||||
|
// try to avoid subnormals/explosions by limiting the values near 1
|
||||||
|
let f32s = distributions::Uniform::<f32>::new(0.9, 1.1);
|
||||||
|
let f64s = distributions::Uniform::<f64>::new(0.9, 1.1);
|
||||||
|
|
||||||
|
// clone the rng for each benched type to keep the generated values identical
|
||||||
|
fold(&mut group, "std::f32", f32::mul, 0.0, rng.clone(), f32s);
|
||||||
|
fold(&mut group, "FF32", FF32::mul, ff32(0.0), rng.clone(), f32s);
|
||||||
|
fold(&mut group, "std::f64", f64::mul, 0.0, rng.clone(), f64s);
|
||||||
|
fold(&mut group, "FF64", FF64::mul, ff64(0.0), rng.clone(), f64s);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn div(c: &mut Criterion) {
|
||||||
|
let mut group = c.benchmark_group("div");
|
||||||
|
|
||||||
|
let rng = StdRng::from_entropy();
|
||||||
|
|
||||||
|
// try to avoid subnormals/explosions by limiting the values near 1
|
||||||
|
let f32s = distributions::Uniform::<f32>::new(0.9, 1.1);
|
||||||
|
let f64s = distributions::Uniform::<f64>::new(0.9, 1.1);
|
||||||
|
|
||||||
|
// clone the rng for each benched type to keep the generated values identical
|
||||||
|
fold(&mut group, "std::f32", f32::div, 0.0, rng.clone(), f32s);
|
||||||
|
fold(&mut group, "FF32", FF32::div, ff32(0.0), rng.clone(), f32s);
|
||||||
|
fold(&mut group, "std::f64", f64::div, 0.0, rng.clone(), f64s);
|
||||||
|
fold(&mut group, "FF64", FF64::div, ff64(0.0), rng.clone(), f64s);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn min(c: &mut Criterion) {
|
||||||
|
let mut group = c.benchmark_group("min");
|
||||||
|
|
||||||
|
let rng = StdRng::from_entropy();
|
||||||
|
let f32s = distributions::Uniform::<f32>::new(0.0, 1.0);
|
||||||
|
let f64s = distributions::Uniform::<f64>::new(0.0, 1.0);
|
||||||
|
|
||||||
|
// clone the rng for each benched type to keep the generated values identical
|
||||||
|
fold(&mut group, "std::f32", f32::min, 0.0, rng.clone(), f32s);
|
||||||
|
fold(&mut group, "FF32", FF32::min, ff32(0.0), rng.clone(), f32s);
|
||||||
|
fold(&mut group, "std::f64", f64::min, 0.0, rng.clone(), f64s);
|
||||||
|
fold(&mut group, "FF64", FF64::min, ff64(0.0), rng.clone(), f64s);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fold<T, S>(
|
||||||
|
group: &mut BenchmarkGroup<'_, impl Measurement>,
|
||||||
|
id: &str,
|
||||||
|
op: impl Fn(T, T) -> T + Copy,
|
||||||
|
init: T,
|
||||||
|
mut rng: impl Rng,
|
||||||
|
vals: impl Distribution<S> + Copy,
|
||||||
|
) where
|
||||||
|
T: From<S> + Copy,
|
||||||
|
{
|
||||||
|
fold_count([init; 1], group, id, op, init, &mut rng, vals);
|
||||||
|
fold_count([init; 2], group, id, op, init, &mut rng, vals);
|
||||||
|
fold_count([init; 4], group, id, op, init, &mut rng, vals);
|
||||||
|
fold_count([init; 8], group, id, op, init, &mut rng, vals);
|
||||||
|
fold_count([init; 64], group, id, op, init, &mut rng, vals);
|
||||||
|
fold_count([init; 256], group, id, op, init, &mut rng, vals);
|
||||||
|
fold_count([init; 1024], group, id, op, init, &mut rng, vals);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fold_count<T, S, const N: usize>(
|
||||||
|
arr: [T; N],
|
||||||
|
group: &mut BenchmarkGroup<'_, impl Measurement>,
|
||||||
|
id: &str,
|
||||||
|
op: impl Fn(T, T) -> T + Copy,
|
||||||
|
init: T,
|
||||||
|
mut rng: impl Rng,
|
||||||
|
vals: impl Distribution<S> + Copy,
|
||||||
|
) where
|
||||||
|
T: From<S> + Copy,
|
||||||
|
{
|
||||||
|
group.throughput(Throughput::Elements(N as u64));
|
||||||
|
|
||||||
|
group.bench_function(BenchmarkId::new(id, N), |b| {
|
||||||
|
b.iter_batched_ref(
|
||||||
|
|| {
|
||||||
|
let mut inputs = arr;
|
||||||
|
inputs
|
||||||
|
.iter_mut()
|
||||||
|
.zip((&mut rng).sample_iter(&vals))
|
||||||
|
.for_each(|(dst, val)| *dst = T::from(val));
|
||||||
|
inputs
|
||||||
|
},
|
||||||
|
|vals| vals.iter().copied().fold(init, op),
|
||||||
|
BatchSize::SmallInput,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
criterion_group!(benches, add, mul, div, min);
|
||||||
criterion_main!(benches);
|
criterion_main!(benches);
|
||||||
|
|||||||
39
build.rs
39
build.rs
@@ -6,7 +6,7 @@ fn main() {
|
|||||||
builder.compiler("clang");
|
builder.compiler("clang");
|
||||||
}
|
}
|
||||||
|
|
||||||
builder.flag("-O3").flag("-flto=thin");
|
builder.flag("-flto=thin");
|
||||||
|
|
||||||
build_ll(builder.clone());
|
build_ll(builder.clone());
|
||||||
build_c(builder);
|
build_c(builder);
|
||||||
@@ -21,13 +21,32 @@ fn build_ll(mut builder: cc::Build) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn build_c(mut builder: cc::Build) {
|
fn build_c(mut builder: cc::Build) {
|
||||||
builder
|
builder.flag("-O3");
|
||||||
.file("src/math/math.c")
|
|
||||||
.flag("-ffinite-math-only")
|
#[cfg(feature = "finite-math-only")]
|
||||||
.flag("-fassociative-math")
|
builder.flag("-ffinite-math-only");
|
||||||
.flag("-freciprocal-math")
|
|
||||||
.flag("-fno-signed-zeros")
|
#[cfg(feature = "associative-math")]
|
||||||
.flag("-fno-trapping-math")
|
builder.flag("-fassociative-math");
|
||||||
.flag("-ffp-contract=fast")
|
|
||||||
.compile("math")
|
#[cfg(feature = "reciprocal-math")]
|
||||||
|
builder.flag("-freciprocal-math");
|
||||||
|
|
||||||
|
#[cfg(feature = "no-signed-zeros")]
|
||||||
|
builder.flag("-fno-signed-zeros");
|
||||||
|
|
||||||
|
#[cfg(feature = "no-trapping-math")]
|
||||||
|
builder.flag("-fno-trapping-math");
|
||||||
|
|
||||||
|
#[cfg(feature = "fp-contract-fast")]
|
||||||
|
builder.flag("-ffp-contract=fast");
|
||||||
|
|
||||||
|
// TODO figure out if this works
|
||||||
|
//#[cfg(feature = "approx-func")]
|
||||||
|
//builder.flag("-Xclang -fapprox-func");
|
||||||
|
|
||||||
|
#[cfg(feature = "denormal-fp-math-preserve-sign")]
|
||||||
|
builder.flag("-fdenormal-fp-math=preserve-sign");
|
||||||
|
|
||||||
|
builder.file("src/math/math.c").compile("math")
|
||||||
}
|
}
|
||||||
|
|||||||
91
src/lib.rs
91
src/lib.rs
@@ -1,12 +1,8 @@
|
|||||||
#![doc = include_str!("../README.md")]
|
#![doc = include_str!("../README.md")]
|
||||||
#![feature(core_intrinsics)] // intrinsics for the fast math
|
|
||||||
#![feature(asm)] // asm used to emulate freeze
|
|
||||||
#![feature(doc_cfg)]
|
#![feature(doc_cfg)]
|
||||||
#![feature(link_llvm_intrinsics)]
|
|
||||||
|
|
||||||
use core::{
|
use core::{
|
||||||
cmp, fmt,
|
cmp, fmt,
|
||||||
intrinsics::{fadd_fast, fdiv_fast, fmul_fast, frem_fast, fsub_fast},
|
|
||||||
iter::{Product, Sum},
|
iter::{Product, Sum},
|
||||||
num::FpCategory,
|
num::FpCategory,
|
||||||
ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Rem, RemAssign, Sub, SubAssign},
|
ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Rem, RemAssign, Sub, SubAssign},
|
||||||
@@ -137,82 +133,6 @@ pub fn ff64(f: f64) -> FF64 {
|
|||||||
FF64::new(f)
|
FF64::new(f)
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! impl_binary_refs {
|
|
||||||
($lhs:ident, $rhs:ident, $op_trait:ident, $op_fn:ident) => {
|
|
||||||
impl $op_trait<$rhs> for &$lhs {
|
|
||||||
type Output = <$lhs as $op_trait<$rhs>>::Output;
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn $op_fn(self, other: $rhs) -> Self::Output {
|
|
||||||
(*self).$op_fn(other)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
impl $op_trait<&$rhs> for $lhs {
|
|
||||||
type Output = <$lhs as $op_trait<$rhs>>::Output;
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn $op_fn(self, other: &$rhs) -> Self::Output {
|
|
||||||
self.$op_fn(*other)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
impl $op_trait<&$rhs> for &$lhs {
|
|
||||||
type Output = <$lhs as $op_trait<$rhs>>::Output;
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn $op_fn(self, other: &$rhs) -> Self::Output {
|
|
||||||
(*self).$op_fn(*other)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
macro_rules! impl_fast_ops {
|
|
||||||
($fast_ty:ident, $base_ty: ident: $($op_trait:ident, $op_fn:ident, $op_impl:ident,)*) => {
|
|
||||||
$(
|
|
||||||
impl $op_trait <$fast_ty> for $fast_ty {
|
|
||||||
type Output = $fast_ty;
|
|
||||||
|
|
||||||
#[inline(always)]
|
|
||||||
fn $op_fn(self, other: $fast_ty) -> Self::Output {
|
|
||||||
// Safety:
|
|
||||||
//
|
|
||||||
// - encountering poison operands is safe because LLVM's fast ops documents not producing
|
|
||||||
// UB on any inputs; it may produce poison on inf/nan (or if the sum is inf/nan), but these
|
|
||||||
// are then wrapped in the MaybePoison to control propagation
|
|
||||||
<$fast_ty>::new(unsafe {
|
|
||||||
$op_impl(
|
|
||||||
self.0.maybe_poison(),
|
|
||||||
other.0.maybe_poison(),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl $op_trait <$base_ty> for $fast_ty {
|
|
||||||
type Output = $fast_ty;
|
|
||||||
|
|
||||||
#[inline(always)]
|
|
||||||
fn $op_fn(self, other: $base_ty) -> Self::Output {
|
|
||||||
self.$op_fn(<$fast_ty>::new(other))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl $op_trait <$fast_ty> for $base_ty {
|
|
||||||
type Output = $fast_ty;
|
|
||||||
|
|
||||||
#[inline(always)]
|
|
||||||
fn $op_fn(self, other: $fast_ty) -> Self::Output {
|
|
||||||
<$fast_ty>::new(self).$op_fn(other)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl_binary_refs! { $fast_ty, $fast_ty, $op_trait, $op_fn }
|
|
||||||
impl_binary_refs! { $fast_ty, $base_ty, $op_trait, $op_fn }
|
|
||||||
impl_binary_refs! { $base_ty, $fast_ty, $op_trait, $op_fn }
|
|
||||||
)*
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
macro_rules! impl_assign_ops {
|
macro_rules! impl_assign_ops {
|
||||||
($fast_ty:ident, $base_ty: ident: $($op_trait:ident, $op_fn:ident, $op:ident,)*) => {
|
($fast_ty:ident, $base_ty: ident: $($op_trait:ident, $op_fn:ident, $op:ident,)*) => {
|
||||||
$(
|
$(
|
||||||
@@ -375,7 +295,7 @@ macro_rules! impls {
|
|||||||
pub fn round(self) -> Self;
|
pub fn round(self) -> Self;
|
||||||
pub fn sin(self) -> Self;
|
pub fn sin(self) -> Self;
|
||||||
pub fn sinh(self) -> Self;
|
pub fn sinh(self) -> Self;
|
||||||
pub fn sqrt(self) -> Self;
|
//pub fn sqrt(self) -> Self;
|
||||||
pub fn tan(self) -> Self;
|
pub fn tan(self) -> Self;
|
||||||
pub fn tanh(self) -> Self;
|
pub fn tanh(self) -> Self;
|
||||||
pub fn to_degrees(self) -> Self;
|
pub fn to_degrees(self) -> Self;
|
||||||
@@ -442,15 +362,6 @@ macro_rules! impls {
|
|||||||
fmt::Debug, fmt::Display, fmt::LowerExp, fmt::UpperExp,
|
fmt::Debug, fmt::Display, fmt::LowerExp, fmt::UpperExp,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl_fast_ops! {
|
|
||||||
$fast_ty, $base_ty:
|
|
||||||
Add, add, fadd_fast,
|
|
||||||
Sub, sub, fsub_fast,
|
|
||||||
Mul, mul, fmul_fast,
|
|
||||||
Div, div, fdiv_fast,
|
|
||||||
Rem, rem, frem_fast,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl_assign_ops! {
|
impl_assign_ops! {
|
||||||
$fast_ty, $base_ty:
|
$fast_ty, $base_ty:
|
||||||
AddAssign, add_assign, add,
|
AddAssign, add_assign, add,
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
#define IMPL_OPERATIONS(C_TYPE, RUST_TYPE) \
|
#define IMPL_OPERATIONS(C_TYPE, RUST_TYPE) \
|
||||||
/* TODO figure out why these don't inline */ \
|
|
||||||
__attribute__((always_inline)) \
|
__attribute__((always_inline)) \
|
||||||
C_TYPE add_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
C_TYPE add_ ## RUST_TYPE(C_TYPE a, C_TYPE b) { \
|
||||||
return a + b; \
|
return a + b; \
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
use crate::{poison::MaybePoison, FF32, FF64};
|
use crate::{poison::MaybePoison, FF32, FF64};
|
||||||
|
use core::ops::{Add, Div, Mul, Rem, Sub};
|
||||||
use paste::paste;
|
use paste::paste;
|
||||||
|
|
||||||
impl FF32 {
|
impl FF32 {
|
||||||
@@ -75,12 +76,95 @@ macro_rules! impl_generic_math {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
macro_rules! impl_binary_refs {
|
||||||
|
($lhs:ident, $rhs:ident, $op_trait:ident, $op_fn:ident) => {
|
||||||
|
impl $op_trait<$rhs> for &$lhs {
|
||||||
|
type Output = <$lhs as $op_trait<$rhs>>::Output;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn $op_fn(self, other: $rhs) -> Self::Output {
|
||||||
|
(*self).$op_fn(other)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl $op_trait<&$rhs> for $lhs {
|
||||||
|
type Output = <$lhs as $op_trait<$rhs>>::Output;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn $op_fn(self, other: &$rhs) -> Self::Output {
|
||||||
|
self.$op_fn(*other)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl $op_trait<&$rhs> for &$lhs {
|
||||||
|
type Output = <$lhs as $op_trait<$rhs>>::Output;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn $op_fn(self, other: &$rhs) -> Self::Output {
|
||||||
|
(*self).$op_fn(*other)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! impl_fast_ops {
|
||||||
|
($fast_ty:ident, $base_ty: ident: $($op_trait:ident, $op_fn:ident, $op_impl:ident,)*) => {
|
||||||
|
$(
|
||||||
|
impl $op_trait <$fast_ty> for $fast_ty {
|
||||||
|
type Output = $fast_ty;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn $op_fn(self, other: $fast_ty) -> Self::Output {
|
||||||
|
unsafe { $op_impl(self, other) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl $op_trait <$base_ty> for $fast_ty {
|
||||||
|
type Output = $fast_ty;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn $op_fn(self, other: $base_ty) -> Self::Output {
|
||||||
|
self.$op_fn(<$fast_ty>::new(other))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl $op_trait <$fast_ty> for $base_ty {
|
||||||
|
type Output = $fast_ty;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn $op_fn(self, other: $fast_ty) -> Self::Output {
|
||||||
|
<$fast_ty>::new(self).$op_fn(other)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl_binary_refs! { $fast_ty, $fast_ty, $op_trait, $op_fn }
|
||||||
|
impl_binary_refs! { $fast_ty, $base_ty, $op_trait, $op_fn }
|
||||||
|
impl_binary_refs! { $base_ty, $fast_ty, $op_trait, $op_fn }
|
||||||
|
)*
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
macro_rules! impl_extern_math {
|
macro_rules! impl_extern_math {
|
||||||
($fast_ty:ident, $base_ty:ident) => {
|
($fast_ty:ident, $base_ty:ident) => {
|
||||||
paste! {
|
paste! {
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
fn [<add_ $base_ty>](a: $fast_ty, b: $fast_ty) -> $fast_ty;
|
||||||
|
fn [<sub_ $base_ty>](a: $fast_ty, b: $fast_ty) -> $fast_ty;
|
||||||
|
fn [<mul_ $base_ty>](a: $fast_ty, b: $fast_ty) -> $fast_ty;
|
||||||
|
fn [<div_ $base_ty>](a: $fast_ty, b: $fast_ty) -> $fast_ty;
|
||||||
|
fn [<rem_ $base_ty>](a: $fast_ty, b: $fast_ty) -> $fast_ty;
|
||||||
|
|
||||||
fn [<min_ $base_ty>](a: $fast_ty, b: $fast_ty) -> $fast_ty;
|
fn [<min_ $base_ty>](a: $fast_ty, b: $fast_ty) -> $fast_ty;
|
||||||
fn [<max_ $base_ty>](a: $fast_ty, b: $fast_ty) -> $fast_ty;
|
fn [<max_ $base_ty>](a: $fast_ty, b: $fast_ty) -> $fast_ty;
|
||||||
|
|
||||||
|
fn [<sqrt_ $base_ty>](a: $fast_ty) -> $fast_ty;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl_fast_ops! {
|
||||||
|
$fast_ty, $base_ty:
|
||||||
|
Add, add, [<add_ $base_ty>],
|
||||||
|
Sub, sub, [<sub_ $base_ty>],
|
||||||
|
Mul, mul, [<mul_ $base_ty>],
|
||||||
|
Div, div, [<div_ $base_ty>],
|
||||||
|
Rem, rem, [<rem_ $base_ty>],
|
||||||
}
|
}
|
||||||
|
|
||||||
impl $fast_ty {
|
impl $fast_ty {
|
||||||
@@ -93,6 +177,11 @@ macro_rules! impl_extern_math {
|
|||||||
pub fn min(self, other: Self) -> Self {
|
pub fn min(self, other: Self) -> Self {
|
||||||
unsafe { [<min_ $base_ty>](self, other) }
|
unsafe { [<min_ $base_ty>](self, other) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn sqrt(self) -> Self {
|
||||||
|
unsafe { [<sqrt_ $base_ty>](self) }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user