123 lines
4.3 KiB
Rust
123 lines
4.3 KiB
Rust
use criterion::{
|
|
criterion_group, criterion_main, measurement::Measurement, BatchSize, BenchmarkGroup,
|
|
BenchmarkId, Criterion, Throughput,
|
|
};
|
|
use fast_fp::{ff32, ff64, FF32, FF64};
|
|
use rand::{
|
|
distributions::{self, Distribution},
|
|
rngs::StdRng,
|
|
Rng, SeedableRng,
|
|
};
|
|
use std::ops::{Add, Div, Mul};
|
|
|
|
fn add(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("add");
|
|
|
|
let rng = StdRng::from_entropy();
|
|
let f32s = distributions::Uniform::<f32>::new(0.0, 1.0);
|
|
let f64s = distributions::Uniform::<f64>::new(0.0, 1.0);
|
|
|
|
// clone the rng for each benched type to keep the generated values identical
|
|
fold(&mut group, "f32", f32::add, 0.0, rng.clone(), f32s);
|
|
fold(&mut group, "FF32", FF32::add, ff32(0.0), rng.clone(), f32s);
|
|
fold(&mut group, "f64", f64::add, 0.0, rng.clone(), f64s);
|
|
fold(&mut group, "FF64", FF64::add, ff64(0.0), rng.clone(), f64s);
|
|
}
|
|
|
|
fn mul(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("mul");
|
|
|
|
let rng = StdRng::from_entropy();
|
|
|
|
// try to avoid subnormals/explosions by limiting the values near 1
|
|
let f32s = distributions::Uniform::<f32>::new(0.9, 1.1);
|
|
let f64s = distributions::Uniform::<f64>::new(0.9, 1.1);
|
|
|
|
// clone the rng for each benched type to keep the generated values identical
|
|
fold(&mut group, "f32", f32::mul, 0.0, rng.clone(), f32s);
|
|
fold(&mut group, "FF32", FF32::mul, ff32(0.0), rng.clone(), f32s);
|
|
fold(&mut group, "f64", f64::mul, 0.0, rng.clone(), f64s);
|
|
fold(&mut group, "FF64", FF64::mul, ff64(0.0), rng.clone(), f64s);
|
|
}
|
|
|
|
fn div(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("div");
|
|
|
|
let rng = StdRng::from_entropy();
|
|
|
|
// try to avoid subnormals/explosions by limiting the values near 1
|
|
let f32s = distributions::Uniform::<f32>::new(0.9, 1.1);
|
|
let f64s = distributions::Uniform::<f64>::new(0.9, 1.1);
|
|
|
|
// clone the rng for each benched type to keep the generated values identical
|
|
fold(&mut group, "f32", f32::div, 0.0, rng.clone(), f32s);
|
|
fold(&mut group, "FF32", FF32::div, ff32(0.0), rng.clone(), f32s);
|
|
fold(&mut group, "f64", f64::div, 0.0, rng.clone(), f64s);
|
|
fold(&mut group, "FF64", FF64::div, ff64(0.0), rng.clone(), f64s);
|
|
}
|
|
|
|
fn min(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("min");
|
|
|
|
let rng = StdRng::from_entropy();
|
|
let f32s = distributions::Uniform::<f32>::new(0.0, 1.0);
|
|
let f64s = distributions::Uniform::<f64>::new(0.0, 1.0);
|
|
|
|
// clone the rng for each benched type to keep the generated values identical
|
|
fold(&mut group, "f32", f32::min, 0.0, rng.clone(), f32s);
|
|
fold(&mut group, "FF32", FF32::min, ff32(0.0), rng.clone(), f32s);
|
|
fold(&mut group, "f64", f64::min, 0.0, rng.clone(), f64s);
|
|
fold(&mut group, "FF64", FF64::min, ff64(0.0), rng.clone(), f64s);
|
|
}
|
|
|
|
fn fold<T, S>(
|
|
group: &mut BenchmarkGroup<'_, impl Measurement>,
|
|
id: &str,
|
|
op: impl Fn(T, T) -> T + Copy,
|
|
init: T,
|
|
mut rng: impl Rng,
|
|
vals: impl Distribution<S> + Copy,
|
|
) where
|
|
T: From<S> + Copy,
|
|
{
|
|
fold_count([init; 1], group, id, op, init, &mut rng, vals);
|
|
fold_count([init; 2], group, id, op, init, &mut rng, vals);
|
|
fold_count([init; 4], group, id, op, init, &mut rng, vals);
|
|
fold_count([init; 8], group, id, op, init, &mut rng, vals);
|
|
fold_count([init; 64], group, id, op, init, &mut rng, vals);
|
|
fold_count([init; 256], group, id, op, init, &mut rng, vals);
|
|
fold_count([init; 1024], group, id, op, init, &mut rng, vals);
|
|
}
|
|
|
|
fn fold_count<T, S, const N: usize>(
|
|
arr: [T; N],
|
|
group: &mut BenchmarkGroup<'_, impl Measurement>,
|
|
id: &str,
|
|
op: impl Fn(T, T) -> T + Copy,
|
|
init: T,
|
|
mut rng: impl Rng,
|
|
vals: impl Distribution<S> + Copy,
|
|
) where
|
|
T: From<S> + Copy,
|
|
{
|
|
group.throughput(Throughput::Elements(N as u64));
|
|
|
|
group.bench_function(BenchmarkId::new(id, N), |b| {
|
|
b.iter_batched_ref(
|
|
|| {
|
|
let mut inputs = arr;
|
|
inputs
|
|
.iter_mut()
|
|
.zip((&mut rng).sample_iter(&vals))
|
|
.for_each(|(dst, val)| *dst = T::from(val));
|
|
inputs
|
|
},
|
|
|vals| vals.iter().copied().fold(init, op),
|
|
BatchSize::SmallInput,
|
|
);
|
|
});
|
|
}
|
|
|
|
criterion_group!(benches, add, mul, div, min);
|
|
criterion_main!(benches);
|