extern arithmetic

This commit is contained in:
Renar Narubin
2021-11-23 16:52:04 -08:00
parent 43dc1419a8
commit 5a5289f43e
6 changed files with 260 additions and 152 deletions

View File

@@ -1,54 +1,122 @@
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use criterion::{
criterion_group, criterion_main, measurement::Measurement, BatchSize, BenchmarkGroup,
BenchmarkId, Criterion, Throughput,
};
use fast_fp::{ff32, ff64, FF32, FF64};
use rand::{distributions::Standard, thread_rng, Rng};
use rand::{
distributions::{self, Distribution},
rngs::StdRng,
Rng, SeedableRng,
};
use std::ops::{Add, Div, Mul};
fn sum(c: &mut Criterion) {
let mut group = c.benchmark_group("sum");
for count in [2, 4, 8, 16, 64, 1024, 1 << 15] {
group.throughput(Throughput::Elements(count as u64));
fn add(c: &mut Criterion) {
let mut group = c.benchmark_group("add");
let f32_vals = thread_rng()
.sample_iter(Standard)
.take(count)
.collect::<Vec<f32>>();
let rng = StdRng::from_entropy();
let f32s = distributions::Uniform::<f32>::new(0.0, 1.0);
let f64s = distributions::Uniform::<f64>::new(0.0, 1.0);
// use the same values for both benchmarks
let ff32_vals = f32_vals
.clone()
.into_iter()
.map(ff32)
.collect::<Vec<FF32>>();
group.bench_with_input(BenchmarkId::new("std::f32", count), &f32_vals, |b, vals| {
b.iter(|| vals.iter().copied().fold(0.0, |acc, val| acc + val));
});
group.bench_with_input(BenchmarkId::new("FF32", count), &ff32_vals, |b, vals| {
b.iter(|| vals.iter().copied().fold(ff32(0.0), |acc, val| acc + val));
});
let f64_vals = thread_rng()
.sample_iter(Standard)
.take(count)
.collect::<Vec<f64>>();
// use the same values for both benchmarks
let ff64_vals = f64_vals
.clone()
.into_iter()
.map(ff64)
.collect::<Vec<FF64>>();
group.bench_with_input(BenchmarkId::new("std::f64", count), &f64_vals, |b, vals| {
b.iter(|| vals.iter().copied().fold(0.0, |acc, val| acc + val));
});
group.bench_with_input(BenchmarkId::new("FF64", count), &ff64_vals, |b, vals| {
b.iter(|| vals.iter().copied().fold(ff64(0.0), |acc, val| acc + val));
});
}
group.finish();
// clone the rng for each benched type to keep the generated values identical
fold(&mut group, "std::f32", f32::add, 0.0, rng.clone(), f32s);
fold(&mut group, "FF32", FF32::add, ff32(0.0), rng.clone(), f32s);
fold(&mut group, "std::f64", f64::add, 0.0, rng.clone(), f64s);
fold(&mut group, "FF64", FF64::add, ff64(0.0), rng.clone(), f64s);
}
criterion_group!(benches, sum);
fn mul(c: &mut Criterion) {
let mut group = c.benchmark_group("mul");
let rng = StdRng::from_entropy();
// try to avoid subnormals/explosions by limiting the values near 1
let f32s = distributions::Uniform::<f32>::new(0.9, 1.1);
let f64s = distributions::Uniform::<f64>::new(0.9, 1.1);
// clone the rng for each benched type to keep the generated values identical
fold(&mut group, "std::f32", f32::mul, 0.0, rng.clone(), f32s);
fold(&mut group, "FF32", FF32::mul, ff32(0.0), rng.clone(), f32s);
fold(&mut group, "std::f64", f64::mul, 0.0, rng.clone(), f64s);
fold(&mut group, "FF64", FF64::mul, ff64(0.0), rng.clone(), f64s);
}
fn div(c: &mut Criterion) {
let mut group = c.benchmark_group("div");
let rng = StdRng::from_entropy();
// try to avoid subnormals/explosions by limiting the values near 1
let f32s = distributions::Uniform::<f32>::new(0.9, 1.1);
let f64s = distributions::Uniform::<f64>::new(0.9, 1.1);
// clone the rng for each benched type to keep the generated values identical
fold(&mut group, "std::f32", f32::div, 0.0, rng.clone(), f32s);
fold(&mut group, "FF32", FF32::div, ff32(0.0), rng.clone(), f32s);
fold(&mut group, "std::f64", f64::div, 0.0, rng.clone(), f64s);
fold(&mut group, "FF64", FF64::div, ff64(0.0), rng.clone(), f64s);
}
fn min(c: &mut Criterion) {
let mut group = c.benchmark_group("min");
let rng = StdRng::from_entropy();
let f32s = distributions::Uniform::<f32>::new(0.0, 1.0);
let f64s = distributions::Uniform::<f64>::new(0.0, 1.0);
// clone the rng for each benched type to keep the generated values identical
fold(&mut group, "std::f32", f32::min, 0.0, rng.clone(), f32s);
fold(&mut group, "FF32", FF32::min, ff32(0.0), rng.clone(), f32s);
fold(&mut group, "std::f64", f64::min, 0.0, rng.clone(), f64s);
fold(&mut group, "FF64", FF64::min, ff64(0.0), rng.clone(), f64s);
}
fn fold<T, S>(
group: &mut BenchmarkGroup<'_, impl Measurement>,
id: &str,
op: impl Fn(T, T) -> T + Copy,
init: T,
mut rng: impl Rng,
vals: impl Distribution<S> + Copy,
) where
T: From<S> + Copy,
{
fold_count([init; 1], group, id, op, init, &mut rng, vals);
fold_count([init; 2], group, id, op, init, &mut rng, vals);
fold_count([init; 4], group, id, op, init, &mut rng, vals);
fold_count([init; 8], group, id, op, init, &mut rng, vals);
fold_count([init; 64], group, id, op, init, &mut rng, vals);
fold_count([init; 256], group, id, op, init, &mut rng, vals);
fold_count([init; 1024], group, id, op, init, &mut rng, vals);
}
fn fold_count<T, S, const N: usize>(
arr: [T; N],
group: &mut BenchmarkGroup<'_, impl Measurement>,
id: &str,
op: impl Fn(T, T) -> T + Copy,
init: T,
mut rng: impl Rng,
vals: impl Distribution<S> + Copy,
) where
T: From<S> + Copy,
{
group.throughput(Throughput::Elements(N as u64));
group.bench_function(BenchmarkId::new(id, N), |b| {
b.iter_batched_ref(
|| {
let mut inputs = arr;
inputs
.iter_mut()
.zip((&mut rng).sample_iter(&vals))
.for_each(|(dst, val)| *dst = T::from(val));
inputs
},
|vals| vals.iter().copied().fold(init, op),
BatchSize::SmallInput,
);
});
}
criterion_group!(benches, add, mul, div, min);
criterion_main!(benches);