Skip to content

Commit

Permalink
fn rav1d_intra_pred_dsp_init: Deduplicate w/ generics.
Browse files Browse the repository at this point in the history
  • Loading branch information
kkysen committed Nov 19, 2023
1 parent 3e460f1 commit e26e7b5
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 244 deletions.
4 changes: 0 additions & 4 deletions lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,6 @@ pub mod src {
mod intra_edge;
mod ipred;
mod ipred_prepare;
#[cfg(feature = "bitdepth_16")]
mod ipred_tmpl_16;
#[cfg(feature = "bitdepth_8")]
mod ipred_tmpl_8;
mod itx;
mod itx_1d;
#[cfg(feature = "bitdepth_16")]
Expand Down
8 changes: 4 additions & 4 deletions src/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ use crate::src::intra_edge::EdgeFlags;
use crate::src::intra_edge::EdgeNode;
use crate::src::intra_edge::EdgeTip;
use crate::src::intra_edge::EDGE_I444_TOP_HAS_RIGHT;
use crate::src::ipred::rav1d_intra_pred_dsp_init;
use crate::src::levels::mv;
use crate::src::levels::Av1Block;
use crate::src::levels::BS_128x128;
Expand Down Expand Up @@ -244,14 +245,13 @@ use std::sync::atomic::Ordering;
#[cfg(feature = "bitdepth_8")]
use crate::{
include::common::bitdepth::BitDepth8, src::cdef_tmpl_8::rav1d_cdef_dsp_init_8bpc,
src::ipred_tmpl_8::rav1d_intra_pred_dsp_init_8bpc, src::itx_tmpl_8::rav1d_itx_dsp_init_8bpc,
src::itx_tmpl_8::rav1d_itx_dsp_init_8bpc,
src::loopfilter_tmpl_8::rav1d_loop_filter_dsp_init_8bpc,
};

#[cfg(feature = "bitdepth_16")]
use crate::{
include::common::bitdepth::BitDepth16, src::cdef_tmpl_16::rav1d_cdef_dsp_init_16bpc,
src::ipred_tmpl_16::rav1d_intra_pred_dsp_init_16bpc,
src::itx_tmpl_16::rav1d_itx_dsp_init_16bpc,
src::loopfilter_tmpl_16::rav1d_loop_filter_dsp_init_16bpc,
};
Expand Down Expand Up @@ -5051,7 +5051,7 @@ pub unsafe fn rav1d_submit_frame(c: &mut Rav1dContext) -> Rav1dResult {
#[cfg(feature = "bitdepth_8")]
8 => {
rav1d_cdef_dsp_init_8bpc(&mut dsp.cdef);
rav1d_intra_pred_dsp_init_8bpc(&mut dsp.ipred);
rav1d_intra_pred_dsp_init::<BitDepth8>(&mut dsp.ipred);
rav1d_itx_dsp_init_8bpc(&mut dsp.itx, bpc);
rav1d_loop_filter_dsp_init_8bpc(&mut dsp.lf);
rav1d_loop_restoration_dsp_init::<BitDepth8>(&mut dsp.lr, bpc);
Expand All @@ -5061,7 +5061,7 @@ pub unsafe fn rav1d_submit_frame(c: &mut Rav1dContext) -> Rav1dResult {
#[cfg(feature = "bitdepth_16")]
10 | 12 => {
rav1d_cdef_dsp_init_16bpc(&mut dsp.cdef);
rav1d_intra_pred_dsp_init_16bpc(&mut dsp.ipred);
rav1d_intra_pred_dsp_init::<BitDepth16>(&mut dsp.ipred);
rav1d_itx_dsp_init_16bpc(&mut dsp.itx, bpc);
rav1d_loop_filter_dsp_init_16bpc(&mut dsp.lf);
rav1d_loop_restoration_dsp_init::<BitDepth16>(&mut dsp.lr, bpc);
Expand Down
151 changes: 82 additions & 69 deletions src/ipred.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,21 @@ use crate::include::common::bitdepth::DynPixel;
use crate::include::common::bitdepth::BPC;
use crate::include::common::intops::apply_sign;
use crate::include::common::intops::iclip;
use crate::include::dav1d::headers::Rav1dPixelLayout;
use crate::src::levels::DC_128_PRED;
use crate::src::levels::DC_PRED;
use crate::src::levels::FILTER_PRED;
use crate::src::levels::HOR_PRED;
use crate::src::levels::LEFT_DC_PRED;
use crate::src::levels::PAETH_PRED;
use crate::src::levels::SMOOTH_H_PRED;
use crate::src::levels::SMOOTH_PRED;
use crate::src::levels::SMOOTH_V_PRED;
use crate::src::levels::TOP_DC_PRED;
use crate::src::levels::VERT_PRED;
use crate::src::levels::Z1_PRED;
use crate::src::levels::Z2_PRED;
use crate::src::levels::Z3_PRED;
use crate::src::tables::dav1d_dr_intra_derivative;
use crate::src::tables::dav1d_filter_intra_taps;
use crate::src::tables::dav1d_sm_weights;
Expand All @@ -20,27 +35,7 @@ use std::ffi::c_void;
use std::slice;

#[cfg(feature = "asm")]
use crate::include::common::bitdepth::bd_fn;

#[cfg(feature = "asm")]
use crate::{
include::dav1d::headers::Rav1dPixelLayout,
src::cpu::{rav1d_get_cpu_flags, CpuFlags},
src::levels::DC_128_PRED,
src::levels::DC_PRED,
src::levels::FILTER_PRED,
src::levels::HOR_PRED,
src::levels::LEFT_DC_PRED,
src::levels::PAETH_PRED,
src::levels::SMOOTH_H_PRED,
src::levels::SMOOTH_PRED,
src::levels::SMOOTH_V_PRED,
src::levels::TOP_DC_PRED,
src::levels::VERT_PRED,
src::levels::Z1_PRED,
src::levels::Z2_PRED,
src::levels::Z3_PRED,
};
use crate::{include::common::bitdepth::bd_fn, src::cpu::rav1d_get_cpu_flags, src::cpu::CpuFlags};

#[cfg(all(feature = "asm", target_arch = "aarch64"))]
use ::to_method::To;
Expand Down Expand Up @@ -490,8 +485,7 @@ unsafe fn dc_gen_top<BD: BitDepth>(topleft: *const BD::Pixel, width: c_int) -> c
return dc >> ctz(width as c_uint);
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn ipred_dc_top_c_erased<BD: BitDepth>(
unsafe extern "C" fn ipred_dc_top_c_erased<BD: BitDepth>(
dst: *mut DynPixel,
stride: ptrdiff_t,
topleft: *const DynPixel,
Expand All @@ -512,8 +506,7 @@ pub(crate) unsafe extern "C" fn ipred_dc_top_c_erased<BD: BitDepth>(
);
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn ipred_cfl_top_c_erased<BD: BitDepth>(
unsafe extern "C" fn ipred_cfl_top_c_erased<BD: BitDepth>(
dst: *mut DynPixel,
stride: ptrdiff_t,
topleft: *const DynPixel,
Expand Down Expand Up @@ -545,8 +538,7 @@ unsafe fn dc_gen_left<BD: BitDepth>(topleft: *const BD::Pixel, height: c_int) ->
return dc >> ctz(height as c_uint);
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn ipred_dc_left_c_erased<BD: BitDepth>(
unsafe extern "C" fn ipred_dc_left_c_erased<BD: BitDepth>(
dst: *mut DynPixel,
stride: ptrdiff_t,
topleft: *const DynPixel,
Expand All @@ -567,8 +559,7 @@ pub(crate) unsafe extern "C" fn ipred_dc_left_c_erased<BD: BitDepth>(
);
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn ipred_cfl_left_c_erased<BD: BitDepth>(
unsafe extern "C" fn ipred_cfl_left_c_erased<BD: BitDepth>(
dst: *mut DynPixel,
stride: ptrdiff_t,
topleft: *const DynPixel,
Expand Down Expand Up @@ -620,8 +611,7 @@ unsafe fn dc_gen<BD: BitDepth>(topleft: *const BD::Pixel, width: c_int, height:
return dc;
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn ipred_dc_c_erased<BD: BitDepth>(
unsafe extern "C" fn ipred_dc_c_erased<BD: BitDepth>(
dst: *mut DynPixel,
stride: ptrdiff_t,
topleft: *const DynPixel,
Expand All @@ -642,8 +632,7 @@ pub(crate) unsafe extern "C" fn ipred_dc_c_erased<BD: BitDepth>(
);
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn ipred_cfl_c_erased<BD: BitDepth>(
unsafe extern "C" fn ipred_cfl_c_erased<BD: BitDepth>(
dst: *mut DynPixel,
stride: ptrdiff_t,
topleft: *const DynPixel,
Expand All @@ -666,8 +655,7 @@ pub(crate) unsafe extern "C" fn ipred_cfl_c_erased<BD: BitDepth>(
);
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn ipred_dc_128_c_erased<BD: BitDepth>(
unsafe extern "C" fn ipred_dc_128_c_erased<BD: BitDepth>(
dst: *mut DynPixel,
stride: ptrdiff_t,
_topleft: *const DynPixel,
Expand All @@ -683,8 +671,7 @@ pub(crate) unsafe extern "C" fn ipred_dc_128_c_erased<BD: BitDepth>(
splat_dc(dst.cast(), stride, width, height, dc, bd);
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn ipred_cfl_128_c_erased<BD: BitDepth>(
unsafe extern "C" fn ipred_cfl_128_c_erased<BD: BitDepth>(
dst: *mut DynPixel,
stride: ptrdiff_t,
_topleft: *const DynPixel,
Expand Down Expand Up @@ -724,8 +711,7 @@ unsafe fn ipred_v_rust<BD: BitDepth>(
}
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn ipred_v_c_erased<BD: BitDepth>(
unsafe extern "C" fn ipred_v_c_erased<BD: BitDepth>(
dst: *mut DynPixel,
stride: ptrdiff_t,
topleft: *const DynPixel,
Expand Down Expand Up @@ -774,8 +760,7 @@ unsafe fn ipred_h_rust<BD: BitDepth>(
}
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn ipred_h_c_erased<BD: BitDepth>(
unsafe extern "C" fn ipred_h_c_erased<BD: BitDepth>(
dst: *mut DynPixel,
stride: ptrdiff_t,
topleft: *const DynPixel,
Expand Down Expand Up @@ -836,8 +821,7 @@ unsafe fn ipred_paeth_rust<BD: BitDepth>(
}
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn ipred_paeth_c_erased<BD: BitDepth>(
unsafe extern "C" fn ipred_paeth_c_erased<BD: BitDepth>(
dst: *mut DynPixel,
stride: ptrdiff_t,
tl_ptr: *const DynPixel,
Expand Down Expand Up @@ -894,8 +878,7 @@ unsafe fn ipred_smooth_rust<BD: BitDepth>(
}
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn ipred_smooth_c_erased<BD: BitDepth>(
unsafe extern "C" fn ipred_smooth_c_erased<BD: BitDepth>(
dst: *mut DynPixel,
stride: ptrdiff_t,
topleft: *const DynPixel,
Expand Down Expand Up @@ -947,8 +930,7 @@ unsafe fn ipred_smooth_v_rust<BD: BitDepth>(
}
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn ipred_smooth_v_c_erased<BD: BitDepth>(
unsafe extern "C" fn ipred_smooth_v_c_erased<BD: BitDepth>(
dst: *mut DynPixel,
stride: ptrdiff_t,
topleft: *const DynPixel,
Expand Down Expand Up @@ -1000,8 +982,7 @@ unsafe fn ipred_smooth_h_rust<BD: BitDepth>(
}
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn ipred_smooth_h_c_erased<BD: BitDepth>(
unsafe extern "C" fn ipred_smooth_h_c_erased<BD: BitDepth>(
dst: *mut DynPixel,
stride: ptrdiff_t,
topleft: *const DynPixel,
Expand Down Expand Up @@ -1480,8 +1461,7 @@ unsafe fn ipred_z3_rust<BD: BitDepth>(
}
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn ipred_z1_c_erased<BD: BitDepth>(
unsafe extern "C" fn ipred_z1_c_erased<BD: BitDepth>(
dst: *mut DynPixel,
stride: ptrdiff_t,
topleft_in: *const DynPixel,
Expand All @@ -1505,8 +1485,7 @@ pub(crate) unsafe extern "C" fn ipred_z1_c_erased<BD: BitDepth>(
);
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn ipred_z2_c_erased<BD: BitDepth>(
unsafe extern "C" fn ipred_z2_c_erased<BD: BitDepth>(
dst: *mut DynPixel,
stride: ptrdiff_t,
topleft_in: *const DynPixel,
Expand All @@ -1530,8 +1509,7 @@ pub(crate) unsafe extern "C" fn ipred_z2_c_erased<BD: BitDepth>(
);
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn ipred_z3_c_erased<BD: BitDepth>(
unsafe extern "C" fn ipred_z3_c_erased<BD: BitDepth>(
dst: *mut DynPixel,
stride: ptrdiff_t,
topleft_in: *const DynPixel,
Expand Down Expand Up @@ -1650,8 +1628,7 @@ unsafe fn ipred_filter_rust<BD: BitDepth>(
}
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn ipred_filter_c_erased<BD: BitDepth>(
unsafe extern "C" fn ipred_filter_c_erased<BD: BitDepth>(
dst: *mut DynPixel,
stride: ptrdiff_t,
topleft_in: *const DynPixel,
Expand Down Expand Up @@ -1765,8 +1742,7 @@ unsafe fn cfl_ac_rust<BD: BitDepth>(
}
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn cfl_ac_420_c_erased<BD: BitDepth>(
unsafe extern "C" fn cfl_ac_420_c_erased<BD: BitDepth>(
ac: *mut i16,
ypx: *const DynPixel,
stride: ptrdiff_t,
Expand All @@ -1788,8 +1764,7 @@ pub(crate) unsafe extern "C" fn cfl_ac_420_c_erased<BD: BitDepth>(
);
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn cfl_ac_422_c_erased<BD: BitDepth>(
unsafe extern "C" fn cfl_ac_422_c_erased<BD: BitDepth>(
ac: *mut i16,
ypx: *const DynPixel,
stride: ptrdiff_t,
Expand All @@ -1811,8 +1786,7 @@ pub(crate) unsafe extern "C" fn cfl_ac_422_c_erased<BD: BitDepth>(
);
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn cfl_ac_444_c_erased<BD: BitDepth>(
unsafe extern "C" fn cfl_ac_444_c_erased<BD: BitDepth>(
ac: *mut i16,
ypx: *const DynPixel,
stride: ptrdiff_t,
Expand Down Expand Up @@ -1856,8 +1830,7 @@ unsafe fn pal_pred_rust<BD: BitDepth>(
}
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
pub(crate) unsafe extern "C" fn pal_pred_c_erased<BD: BitDepth>(
unsafe extern "C" fn pal_pred_c_erased<BD: BitDepth>(
dst: *mut DynPixel,
stride: ptrdiff_t,
pal: *const u16,
Expand Down Expand Up @@ -2341,10 +2314,9 @@ unsafe extern "C" fn ipred_z3_neon_erased<BD: BitDepth>(
);
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
#[cfg(all(feature = "asm", any(target_arch = "x86", target_arch = "x86_64"),))]
#[inline(always)]
pub(crate) unsafe fn intra_pred_dsp_init_x86<BD: BitDepth>(c: *mut Rav1dIntraPredDSPContext) {
unsafe fn intra_pred_dsp_init_x86<BD: BitDepth>(c: *mut Rav1dIntraPredDSPContext) {
let flags = rav1d_get_cpu_flags();

if !flags.contains(CpuFlags::SSSE3) {
Expand Down Expand Up @@ -2432,10 +2404,9 @@ pub(crate) unsafe fn intra_pred_dsp_init_x86<BD: BitDepth>(c: *mut Rav1dIntraPre
}
}

// TODO(kkysen) Temporarily pub until mod is deduplicated
#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64"),))]
#[inline(always)]
pub(crate) unsafe fn intra_pred_dsp_init_arm<BD: BitDepth>(c: *mut Rav1dIntraPredDSPContext) {
unsafe fn intra_pred_dsp_init_arm<BD: BitDepth>(c: *mut Rav1dIntraPredDSPContext) {
let flags = rav1d_get_cpu_flags();

if !flags.contains(CpuFlags::NEON) {
Expand Down Expand Up @@ -2471,3 +2442,45 @@ pub(crate) unsafe fn intra_pred_dsp_init_arm<BD: BitDepth>(c: *mut Rav1dIntraPre

(*c).pal_pred = bd_fn!(BD, pal_pred, neon);
}

#[cold]
pub unsafe fn rav1d_intra_pred_dsp_init<BD: BitDepth>(c: *mut Rav1dIntraPredDSPContext) {
(*c).intra_pred[DC_PRED as usize] = Some(ipred_dc_c_erased::<BD>);
(*c).intra_pred[DC_128_PRED as usize] = Some(ipred_dc_128_c_erased::<BD>);
(*c).intra_pred[TOP_DC_PRED as usize] = Some(ipred_dc_top_c_erased::<BD>);
(*c).intra_pred[LEFT_DC_PRED as usize] = Some(ipred_dc_left_c_erased::<BD>);
(*c).intra_pred[HOR_PRED as usize] = Some(ipred_h_c_erased::<BD>);
(*c).intra_pred[VERT_PRED as usize] = Some(ipred_v_c_erased::<BD>);
(*c).intra_pred[PAETH_PRED as usize] = Some(ipred_paeth_c_erased::<BD>);
(*c).intra_pred[SMOOTH_PRED as usize] = Some(ipred_smooth_c_erased::<BD>);
(*c).intra_pred[SMOOTH_V_PRED as usize] = Some(ipred_smooth_v_c_erased::<BD>);
(*c).intra_pred[SMOOTH_H_PRED as usize] = Some(ipred_smooth_h_c_erased::<BD>);
(*c).intra_pred[Z1_PRED as usize] = Some(ipred_z1_c_erased::<BD>);
(*c).intra_pred[Z2_PRED as usize] = Some(ipred_z2_c_erased::<BD>);
(*c).intra_pred[Z3_PRED as usize] = Some(ipred_z3_c_erased::<BD>);
(*c).intra_pred[FILTER_PRED as usize] = Some(ipred_filter_c_erased::<BD>);

(*c).cfl_ac[Rav1dPixelLayout::I420 as usize - 1] = cfl_ac_420_c_erased::<BD>;
(*c).cfl_ac[Rav1dPixelLayout::I422 as usize - 1] = cfl_ac_422_c_erased::<BD>;
(*c).cfl_ac[Rav1dPixelLayout::I444 as usize - 1] = cfl_ac_444_c_erased::<BD>;
(*c).cfl_pred[DC_PRED as usize] = ipred_cfl_c_erased::<BD>;

(*c).cfl_pred[DC_128_PRED as usize] = ipred_cfl_128_c_erased::<BD>;
(*c).cfl_pred[TOP_DC_PRED as usize] = ipred_cfl_top_c_erased::<BD>;
(*c).cfl_pred[LEFT_DC_PRED as usize] = ipred_cfl_left_c_erased::<BD>;

(*c).pal_pred = pal_pred_c_erased::<BD>;

#[cfg(feature = "asm")]
cfg_if! {
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
use crate::src::ipred::intra_pred_dsp_init_x86;

intra_pred_dsp_init_x86::<BD>(c);
} else if #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] {
use crate::src::ipred::intra_pred_dsp_init_arm;

intra_pred_dsp_init_arm::<BD>(c);
}
}
}
Loading

0 comments on commit e26e7b5

Please sign in to comment.