Skip to content

Commit

Permalink
Speed Up Benchmarks in Test (#7129)
Browse files Browse the repository at this point in the history
  • Loading branch information
cwfitzgerald authored Feb 13, 2025
1 parent 2f50426 commit 03a01df
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 16 deletions.
30 changes: 25 additions & 5 deletions benches/benches/bind_groups.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,17 @@ use criterion::{criterion_group, Criterion, Throughput};
use nanorand::{Rng, WyRand};
use std::sync::LazyLock;

use crate::DeviceState;
use crate::{is_test, DeviceState};

// Creating 50_000 textures takes a considerable amount of time with syncval enabled.
//
// We greatly reduce the number of textures for the test case to keep the runtime
// reasonable for testing.
const MAX_TEXTURE_COUNT_BENCHMARK: u32 = 50_000;
const TEXTURE_COUNTS_BENCHMARK: &[u32] = &[5, 50, 500, 5_000, 50_000];

const MAX_TEXTURE_COUNT_TEST: u32 = 5;
const TEXTURE_COUNTS_TEST: &[u32] = &[5];

struct BindGroupState {
device_state: DeviceState,
Expand All @@ -19,16 +29,20 @@ impl BindGroupState {
fn new() -> Self {
let device_state = DeviceState::new();

const TEXTURE_COUNT: u32 = 50_000;
let texture_count = if is_test() {
MAX_TEXTURE_COUNT_TEST
} else {
MAX_TEXTURE_COUNT_BENCHMARK
};

// Performance gets considerably worse if the resources are shuffled.
//
// This more closely matches the real-world use case where resources have no
// well defined usage order.
let mut random = WyRand::new_seed(0x8BADF00D);

let mut texture_views = Vec::with_capacity(TEXTURE_COUNT as usize);
for i in 0..TEXTURE_COUNT {
let mut texture_views = Vec::with_capacity(texture_count as usize);
for i in 0..texture_count {
let texture = device_state
.device
.create_texture(&wgpu::TextureDescriptor {
Expand Down Expand Up @@ -64,7 +78,13 @@ fn run_bench(ctx: &mut Criterion) {

let mut group = ctx.benchmark_group("Bind Group Creation");

for count in [5, 50, 500, 5_000, 50_000] {
let count_list = if is_test() {
TEXTURE_COUNTS_TEST
} else {
TEXTURE_COUNTS_BENCHMARK
};

for &count in count_list {
group.throughput(Throughput::Elements(count as u64));
group.bench_with_input(
format!("{} Element Bind Group", count),
Expand Down
18 changes: 13 additions & 5 deletions benches/benches/computepass.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ use nanorand::{Rng, WyRand};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use std::sync::LazyLock;

use crate::DeviceState;
use crate::{is_test, DeviceState};

fn dispatch_count() -> usize {
// When testing we only want to run a very lightweight version of the benchmark
// to ensure that it does not break.
if std::env::var("NEXTEST").is_ok() {
if is_test() {
8
} else {
10_000
Expand All @@ -28,13 +28,21 @@ fn dispatch_count() -> usize {
fn dispatch_count_bindless() -> usize {
// On CI we only want to run a very lightweight version of the benchmark
// to ensure that it does not break.
if std::env::var("NEXTEST").is_ok() {
if is_test() {
8
} else {
1_000
}
}

fn thread_count_list() -> &'static [usize] {
if is_test() {
&[2]
} else {
&[2, 4, 8]
}
}

// Must match the number of textures in the computepass.wgsl shader
const TEXTURES_PER_DISPATCH: usize = 2;
const STORAGE_TEXTURES_PER_DISPATCH: usize = 2;
Expand Down Expand Up @@ -437,7 +445,7 @@ fn run_bench(ctx: &mut Criterion) {
group.throughput(Throughput::Elements(dispatch_count as _));

for time_submit in [false, true] {
for cpasses in [1, 2, 4, 8] {
for &cpasses in thread_count_list() {
let dispatch_per_pass = dispatch_count / cpasses;

let label = if time_submit {
Expand Down Expand Up @@ -493,7 +501,7 @@ fn run_bench(ctx: &mut Criterion) {
let mut group = ctx.benchmark_group("Computepass: Multi Threaded");
group.throughput(Throughput::Elements(dispatch_count as _));

for threads in [2, 4, 8] {
for &threads in thread_count_list() {
let dispatch_per_pass = dispatch_count / threads;
group.bench_function(
format!("{threads} threads x {dispatch_per_pass} dispatch"),
Expand Down
16 changes: 12 additions & 4 deletions benches/benches/renderpass.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,26 @@ use nanorand::{Rng, WyRand};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use std::sync::LazyLock;

use crate::DeviceState;
use crate::{is_test, DeviceState};

fn draw_count() -> usize {
// When testing we only want to run a very lightweight version of the benchmark
// to ensure that it does not break.
if std::env::var("NEXTEST").is_ok() {
if is_test() {
8
} else {
10_000
}
}

fn thread_count_list() -> &'static [usize] {
if is_test() {
&[2]
} else {
&[1, 2, 4, 8]
}
}

// Must match the number of textures in the renderpass.wgsl shader
const TEXTURES_PER_DRAW: usize = 7;
const VERTEX_BUFFERS_PER_DRAW: usize = 2;
Expand Down Expand Up @@ -438,7 +446,7 @@ fn run_bench(ctx: &mut Criterion) {
group.throughput(Throughput::Elements(draw_count as _));

for time_submit in [false, true] {
for rpasses in [1, 2, 4, 8] {
for &rpasses in thread_count_list() {
let draws_per_pass = draw_count / rpasses;

let label = if time_submit {
Expand Down Expand Up @@ -499,7 +507,7 @@ fn run_bench(ctx: &mut Criterion) {
let mut group = ctx.benchmark_group("Renderpass: Multi Threaded");
group.throughput(Throughput::Elements(draw_count as _));

for threads in [2, 4, 8] {
for &threads in thread_count_list() {
let draws_per_pass = draw_count / threads;
group.bench_function(format!("{threads} threads x {draws_per_pass} draws"), |b| {
LazyLock::force(&state);
Expand Down
12 changes: 10 additions & 2 deletions benches/benches/resource_creation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,15 @@ use criterion::{criterion_group, Criterion, Throughput};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use std::sync::LazyLock;

use crate::DeviceState;
use crate::{is_test, DeviceState};

fn thread_count_list() -> &'static [usize] {
if is_test() {
&[2]
} else {
&[1, 2, 4, 8]
}
}

fn run_bench(ctx: &mut Criterion) {
let state = LazyLock::new(DeviceState::new);
Expand All @@ -14,7 +22,7 @@ fn run_bench(ctx: &mut Criterion) {
let mut group = ctx.benchmark_group("Resource Creation: Large Buffer");
group.throughput(Throughput::Elements(RESOURCES_TO_CREATE as _));

for threads in [1, 2, 4, 8] {
for &threads in thread_count_list() {
let resources_per_thread = RESOURCES_TO_CREATE / threads;
group.bench_function(
format!("{threads} threads x {resources_per_thread} resource"),
Expand Down
4 changes: 4 additions & 0 deletions benches/benches/root.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ mod renderpass;
mod resource_creation;
mod shader;

fn is_test() -> bool {
std::env::var("NEXTEST").is_ok()
}

struct DeviceState {
adapter_info: wgpu::AdapterInfo,
device: wgpu::Device,
Expand Down

0 comments on commit 03a01df

Please sign in to comment.