Skip to content

Commit

Permalink
wrap_fn_ptr!: Generate a decl_fn! macro inside.
Browse files Browse the repository at this point in the history
  • Loading branch information
kkysen committed Nov 27, 2023
1 parent 0cf2a8f commit 5bda2e8
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 108 deletions.
142 changes: 34 additions & 108 deletions src/filmgrain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,55 +199,12 @@ pub(crate) struct Rav1dFilmGrainDSPContext {
pub fguv_32x32xn: enum_map_ty!(Rav1dPixelLayoutSubSampled, fguv_32x32xn::Fn),
}

#[cfg(feature = "asm")]
macro_rules! decl_generate_grain_y_fn {
(fn $name:ident) => {{
extern "C" {
fn $name(buf: *mut GrainLut<DynEntry>, data: &Dav1dFilmGrainData, bitdepth_max: c_int);
}

generate_grain_y::Fn::new($name)
}};
}

#[cfg(feature = "asm")]
macro_rules! decl_generate_grain_uv_fn {
(fn $name:ident) => {{
extern "C" {
fn $name(
buf: *mut GrainLut<DynEntry>,
buf_y: *const GrainLut<DynEntry>,
data: &Dav1dFilmGrainData,
uv: intptr_t,
bitdepth_max: c_int,
);
}

generate_grain_uv::Fn::new($name)
}};
}

#[cfg(feature = "asm")]
macro_rules! decl_fgy_32x32xn_fn {
#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
macro_rules! decl_fgy_32x32xn_neon_fn {
(fn $name:ident) => {{
extern "C" {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn $name(
dst_row: *mut DynPixel,
src_row: *const DynPixel,
stride: ptrdiff_t,
data: &Dav1dFilmGrainData,
pw: usize,
scaling: *const DynScaling,
grain_lut: *const GrainLut<DynEntry>,
bh: c_int,
row_num: c_int,
bitdepth_max: c_int,
);

// Use [`ptrdiff_t`] instead of [`c_int`] for the last few parameters,
// to get the same layout of parameters on the stack across platforms.
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
fn $name(
dst: *mut DynPixel,
src: *const DynPixel,
Expand All @@ -263,41 +220,16 @@ macro_rules! decl_fgy_32x32xn_fn {
);
}

#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
let fn_ = fgy_32x32xn::Fn::new($name);

#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
let fn_ = $name;

fn_
$name
}};
}

#[cfg(feature = "asm")]
macro_rules! decl_fguv_32x32xn_fn {
#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))]
macro_rules! decl_fguv_32x32xn_neon_fn {
(fn $name:ident) => {{
extern "C" {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn $name(
dst_row: *mut DynPixel,
src_row: *const DynPixel,
stride: ptrdiff_t,
data: &Dav1dFilmGrainData,
pw: usize,
scaling: *const DynScaling,
grain_lut: *const GrainLut<DynEntry>,
bh: c_int,
row_num: c_int,
luma_row: *const DynPixel,
luma_stride: ptrdiff_t,
uv_pl: c_int,
is_id: c_int,
bitdepth_max: c_int,
);

// Use [`ptrdiff_t`] instead of [`c_int`] for the last few parameters,
// to get the parameters on the stack with the same layout across platforms.
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
fn $name(
dst: *mut DynPixel,
src: *const DynPixel,
Expand All @@ -316,13 +248,7 @@ macro_rules! decl_fguv_32x32xn_fn {
);
}

#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
let fn_ = fguv_32x32xn::Fn::new($name);

#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
let fn_ = $name;

fn_
$name
}};
}

Expand Down Expand Up @@ -1025,7 +951,7 @@ unsafe fn fgy_32x32xn_neon<BD: BitDepth>(
r#type |= 2; // overlap x
}

bd_fn!(decl_fgy_32x32xn_fn, BD, fgy_32x32, neon)(
bd_fn!(decl_fgy_32x32xn_neon_fn, BD, fgy_32x32, neon)(
dst_row.add(bx).cast(),
src_row.add(bx).cast(),
stride,
Expand Down Expand Up @@ -1142,9 +1068,9 @@ unsafe fn fguv_32x32xn_neon<BD: BitDepth, const NM: usize, const IS_SX: bool, co
r#type |= 4;
}
(match NM {
420 => bd_fn!(decl_fguv_32x32xn_fn, BD, fguv_32x32_420, neon),
422 => bd_fn!(decl_fguv_32x32xn_fn, BD, fguv_32x32_422, neon),
444 => bd_fn!(decl_fguv_32x32xn_fn, BD, fguv_32x32_444, neon),
420 => bd_fn!(decl_fguv_32x32xn_neon_fn, BD, fguv_32x32_420, neon),
422 => bd_fn!(decl_fguv_32x32xn_neon_fn, BD, fguv_32x32_422, neon),
444 => bd_fn!(decl_fguv_32x32xn_neon_fn, BD, fguv_32x32_444, neon),
_ => unreachable!(),
})(
dst_row.add(bx).cast(),
Expand Down Expand Up @@ -1190,18 +1116,18 @@ impl Rav1dFilmGrainDSPContext {
return self;
}

self.generate_grain_y = bd_fn!(decl_generate_grain_y_fn, BD, generate_grain_y, ssse3);
self.generate_grain_y = bd_fn!(generate_grain_y::decl_fn, BD, generate_grain_y, ssse3);
self.generate_grain_uv = enum_map!(Rav1dPixelLayoutSubSampled => generate_grain_uv::Fn; match key {
I420 => bd_fn!(decl_generate_grain_uv_fn, BD, generate_grain_uv_420, ssse3),
I422 => bd_fn!(decl_generate_grain_uv_fn, BD, generate_grain_uv_422, ssse3),
I444 => bd_fn!(decl_generate_grain_uv_fn, BD, generate_grain_uv_444, ssse3),
I420 => bd_fn!(generate_grain_uv::decl_fn, BD, generate_grain_uv_420, ssse3),
I422 => bd_fn!(generate_grain_uv::decl_fn, BD, generate_grain_uv_422, ssse3),
I444 => bd_fn!(generate_grain_uv::decl_fn, BD, generate_grain_uv_444, ssse3),
});

self.fgy_32x32xn = bd_fn!(decl_fgy_32x32xn_fn, BD, fgy_32x32xn, ssse3);
self.fgy_32x32xn = bd_fn!(fgy_32x32xn::decl_fn, BD, fgy_32x32xn, ssse3);
self.fguv_32x32xn = enum_map!(Rav1dPixelLayoutSubSampled => fguv_32x32xn::Fn; match key {
I420 => bd_fn!(decl_fguv_32x32xn_fn, BD, fguv_32x32xn_i420, ssse3),
I422 => bd_fn!(decl_fguv_32x32xn_fn, BD, fguv_32x32xn_i422, ssse3),
I444 => bd_fn!(decl_fguv_32x32xn_fn, BD, fguv_32x32xn_i444, ssse3),
I420 => bd_fn!(fguv_32x32xn::decl_fn, BD, fguv_32x32xn_i420, ssse3),
I422 => bd_fn!(fguv_32x32xn::decl_fn, BD, fguv_32x32xn_i422, ssse3),
I444 => bd_fn!(fguv_32x32xn::decl_fn, BD, fguv_32x32xn_i444, ssse3),
});

#[cfg(target_arch = "x86_64")]
Expand All @@ -1210,31 +1136,31 @@ impl Rav1dFilmGrainDSPContext {
return self;
}

self.generate_grain_y = bd_fn!(decl_generate_grain_y_fn, BD, generate_grain_y, avx2);
self.generate_grain_y = bd_fn!(generate_grain_y::decl_fn, BD, generate_grain_y, avx2);
self.generate_grain_uv = enum_map!(Rav1dPixelLayoutSubSampled => generate_grain_uv::Fn; match key {
I420 => bd_fn!(decl_generate_grain_uv_fn, BD, generate_grain_uv_420, avx2),
I422 => bd_fn!(decl_generate_grain_uv_fn, BD, generate_grain_uv_422, avx2),
I444 => bd_fn!(decl_generate_grain_uv_fn, BD, generate_grain_uv_444, avx2),
I420 => bd_fn!(generate_grain_uv::decl_fn, BD, generate_grain_uv_420, avx2),
I422 => bd_fn!(generate_grain_uv::decl_fn, BD, generate_grain_uv_422, avx2),
I444 => bd_fn!(generate_grain_uv::decl_fn, BD, generate_grain_uv_444, avx2),
});

if !flags.contains(CpuFlags::SLOW_GATHER) {
self.fgy_32x32xn = bd_fn!(decl_fgy_32x32xn_fn, BD, fgy_32x32xn, avx2);
self.fgy_32x32xn = bd_fn!(fgy_32x32xn::decl_fn, BD, fgy_32x32xn, avx2);
self.fguv_32x32xn = enum_map!(Rav1dPixelLayoutSubSampled => fguv_32x32xn::Fn; match key {
I420 => bd_fn!(decl_fguv_32x32xn_fn, BD, fguv_32x32xn_i420, avx2),
I422 => bd_fn!(decl_fguv_32x32xn_fn, BD, fguv_32x32xn_i422, avx2),
I444 => bd_fn!(decl_fguv_32x32xn_fn, BD, fguv_32x32xn_i444, avx2),
I420 => bd_fn!(fguv_32x32xn::decl_fn, BD, fguv_32x32xn_i420, avx2),
I422 => bd_fn!(fguv_32x32xn::decl_fn, BD, fguv_32x32xn_i422, avx2),
I444 => bd_fn!(fguv_32x32xn::decl_fn, BD, fguv_32x32xn_i444, avx2),
});
}

if !flags.contains(CpuFlags::AVX512ICL) {
return self;
}

self.fgy_32x32xn = bd_fn!(decl_fgy_32x32xn_fn, BD, fgy_32x32xn, avx512icl);
self.fgy_32x32xn = bd_fn!(fgy_32x32xn::decl_fn, BD, fgy_32x32xn, avx512icl);
self.fguv_32x32xn = enum_map!(Rav1dPixelLayoutSubSampled => fguv_32x32xn::Fn; match key {
I420 => bd_fn!(decl_fguv_32x32xn_fn, BD, fguv_32x32xn_i420, avx512icl),
I422 => bd_fn!(decl_fguv_32x32xn_fn, BD, fguv_32x32xn_i422, avx512icl),
I444 => bd_fn!(decl_fguv_32x32xn_fn, BD, fguv_32x32xn_i444, avx512icl),
I420 => bd_fn!(fguv_32x32xn::decl_fn, BD, fguv_32x32xn_i420, avx512icl),
I422 => bd_fn!(fguv_32x32xn::decl_fn, BD, fguv_32x32xn_i422, avx512icl),
I444 => bd_fn!(fguv_32x32xn::decl_fn, BD, fguv_32x32xn_i444, avx512icl),
});
}

Expand All @@ -1248,11 +1174,11 @@ impl Rav1dFilmGrainDSPContext {
return self;
}

self.generate_grain_y = bd_fn!(decl_generate_grain_y_fn, BD, generate_grain_y, neon);
self.generate_grain_y = bd_fn!(generate_grain_y::decl_fn, BD, generate_grain_y, neon);
self.generate_grain_uv = enum_map!(Rav1dPixelLayoutSubSampled => generate_grain_uv::Fn; match key {
I420 => bd_fn!(decl_generate_grain_uv_fn, BD, generate_grain_uv_420, neon),
I422 => bd_fn!(decl_generate_grain_uv_fn, BD, generate_grain_uv_422, neon),
I444 => bd_fn!(decl_generate_grain_uv_fn, BD, generate_grain_uv_444, neon),
I420 => bd_fn!(generate_grain_uv::decl_fn, BD, generate_grain_uv_420, neon),
I422 => bd_fn!(generate_grain_uv::decl_fn, BD, generate_grain_uv_422, neon),
I444 => bd_fn!(generate_grain_uv::decl_fn, BD, generate_grain_uv_444, neon),
});

self.fgy_32x32xn = fgy_32x32xn::Fn::new(fgy_32x32xn_neon_erased::<BD>);
Expand Down
16 changes: 16 additions & 0 deletions src/wrap_fn_ptr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,22 @@ macro_rules! wrap_fn_ptr {
Self::new(default_unimplemented)
};
}

#[cfg(feature = "asm")]
#[allow(unused_macros)]
macro_rules! decl_fn {
(fn $fn_name:ident) => {{
extern "C" {
fn $fn_name($($arg_name: $arg_ty,)*) -> $return_ty;
}

$name::Fn::new($fn_name)
}};
}

#[cfg(feature = "asm")]
#[allow(unused_imports)]
pub(crate) use decl_fn;
}
};
}
Expand Down

0 comments on commit 5bda2e8

Please sign in to comment.