From 37b4f9839af4ec80bb551964797b9abbce7a6c2a Mon Sep 17 00:00:00 2001 From: Per Larsen Date: Sun, 19 Nov 2023 01:12:54 -0800 Subject: [PATCH] : Deduplicate w/ generics. --- src/loopfilter.rs | 345 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 344 insertions(+), 1 deletion(-) diff --git a/src/loopfilter.rs b/src/loopfilter.rs index 5329b25c2..2b411b165 100644 --- a/src/loopfilter.rs +++ b/src/loopfilter.rs @@ -1,6 +1,8 @@ -use crate::include::common::bitdepth::DynPixel; +use crate::include::common::bitdepth::{AsPrimitive, BitDepth, DynPixel}; +use crate::include::common::intops::iclip; use crate::src::lf_mask::Av1FilterLUT; use libc::ptrdiff_t; +use std::cmp; use std::ffi::c_int; pub type loopfilter_sb_fn = unsafe extern "C" fn( @@ -384,3 +386,344 @@ extern "C" { bitdepth_max: c_int, ); } + +#[inline(never)] +unsafe fn loop_filter( + mut dst: *mut BD::Pixel, + mut E: c_int, + mut I: c_int, + mut H: c_int, + stridea: ptrdiff_t, + strideb: ptrdiff_t, + wd: c_int, + bd: BD, +) { + let bitdepth_min_8 = bd.bitdepth() - 8; + let F = 1 << bitdepth_min_8; + E <<= bitdepth_min_8; + I <<= bitdepth_min_8; + H <<= bitdepth_min_8; + let mut i = 0; + while i < 4 { + let mut p6 = 0; + let mut p5 = 0; + let mut p4 = 0; + let mut p3 = 0; + let mut p2 = 0; + let p1 = (*dst.offset(strideb * -(2 as c_int) as isize)).as_::(); + let p0 = (*dst.offset(strideb * -(1 as c_int) as isize)).as_::(); + let q0 = (*dst.offset((strideb * 0) as isize)).as_::(); + let q1 = (*dst.offset((strideb * 1) as isize)).as_::(); + let mut q2 = 0; + let mut q3 = 0; + let mut q4 = 0; + let mut q5 = 0; + let mut q6 = 0; + let mut fm; + let mut flat8out = 0; + let mut flat8in = 0; + fm = ((p1 - p0).abs() <= I + && (q1 - q0).abs() <= I + && (p0 - q0).abs() * 2 + ((p1 - q1).abs() >> 1) <= E) as c_int; + if wd > 4 { + p2 = (*dst.offset(strideb * -(3 as c_int) as isize)).as_::(); + q2 = (*dst.offset((strideb * 2) as isize)).as_::(); + fm &= ((p2 - p1).abs() <= I && (q2 - q1).abs() <= I) as c_int; + if wd > 6 { + p3 = (*dst.offset(strideb * -(4 as c_int) as isize)).as_::(); + q3 = (*dst.offset((strideb * 3) as isize)).as_::(); + fm &= ((p3 - p2).abs() <= I && (q3 - q2).abs() <= I) as c_int; + } + } + if !(fm == 0) { + if wd >= 16 { + p6 = (*dst.offset(strideb * -(7 as c_int) as isize)).as_::(); + p5 = (*dst.offset(strideb * -(6 as c_int) as isize)).as_::(); + p4 = (*dst.offset(strideb * -(5 as c_int) as isize)).as_::(); + q4 = (*dst.offset((strideb * 4) as isize)).as_::(); + q5 = (*dst.offset((strideb * 5) as isize)).as_::(); + q6 = (*dst.offset(strideb * 6)).as_::(); + flat8out = ((p6 - p0).abs() <= F + && (p5 - p0).abs() <= F + && (p4 - p0).abs() <= F + && (q4 - q0).abs() <= F + && (q5 - q0).abs() <= F + && (q6 - q0).abs() <= F) as c_int; + } + if wd >= 6 { + flat8in = ((p2 - p0).abs() <= F + && (p1 - p0).abs() <= F + && (q1 - q0).abs() <= F + && (q2 - q0).abs() <= F) as c_int; + } + if wd >= 8 { + flat8in &= ((p3 - p0).abs() <= F && (q3 - q0).abs() <= F) as c_int; + } + if wd >= 16 && flat8out & flat8in != 0 { + *dst.offset(strideb * -(6 as c_int) as isize) = (p6 + + p6 + + p6 + + p6 + + p6 + + p6 * 2 + + p5 * 2 + + p4 * 2 + + p3 + + p2 + + p1 + + p0 + + q0 + + 8 + >> 4) + .as_::(); + *dst.offset(strideb * -(5 as c_int) as isize) = (p6 + + p6 + + p6 + + p6 + + p6 + + p5 * 2 + + p4 * 2 + + p3 * 2 + + p2 + + p1 + + p0 + + q0 + + q1 + + 8 + >> 4) + .as_::(); + *dst.offset(strideb * -(4 as c_int) as isize) = (p6 + + p6 + + p6 + + p6 + + p5 + + p4 * 2 + + p3 * 2 + + p2 * 2 + + p1 + + p0 + + q0 + + q1 + + q2 + + 8 + >> 4) + .as_::(); + *dst.offset(strideb * -(3 as c_int) as isize) = (p6 + + p6 + + p6 + + p5 + + p4 + + p3 * 2 + + p2 * 2 + + p1 * 2 + + p0 + + q0 + + q1 + + q2 + + q3 + + 8 + >> 4) + .as_::(); + *dst.offset(strideb * -(2 as c_int) as isize) = (p6 + + p6 + + p5 + + p4 + + p3 + + p2 * 2 + + p1 * 2 + + p0 * 2 + + q0 + + q1 + + q2 + + q3 + + q4 + + 8 + >> 4) + .as_::(); + *dst.offset(strideb * -(1 as c_int) as isize) = (p6 + + p5 + + p4 + + p3 + + p2 + + p1 * 2 + + p0 * 2 + + q0 * 2 + + q1 + + q2 + + q3 + + q4 + + q5 + + 8 + >> 4) + .as_::(); + *dst.offset((strideb * 0) as isize) = (p5 + + p4 + + p3 + + p2 + + p1 + + p0 * 2 + + q0 * 2 + + q1 * 2 + + q2 + + q3 + + q4 + + q5 + + q6 + + 8 + >> 4) + .as_::(); + *dst.offset((strideb * 1) as isize) = (p4 + + p3 + + p2 + + p1 + + p0 + + q0 * 2 + + q1 * 2 + + q2 * 2 + + q3 + + q4 + + q5 + + q6 + + q6 + + 8 + >> 4) + .as_::(); + *dst.offset((strideb * 2) as isize) = (p3 + + p2 + + p1 + + p0 + + q0 + + q1 * 2 + + q2 * 2 + + q3 * 2 + + q4 + + q5 + + q6 + + q6 + + q6 + + 8 + >> 4) + .as_::(); + *dst.offset((strideb * 3) as isize) = (p2 + + p1 + + p0 + + q0 + + q1 + + q2 * 2 + + q3 * 2 + + q4 * 2 + + q5 + + q6 + + q6 + + q6 + + q6 + + 8 + >> 4) + .as_::(); + *dst.offset((strideb * 4) as isize) = (p1 + + p0 + + q0 + + q1 + + q2 + + q3 * 2 + + q4 * 2 + + q5 * 2 + + q6 + + q6 + + q6 + + q6 + + q6 + + 8 + >> 4) + .as_::(); + *dst.offset((strideb * 5) as isize) = (p0 + + q0 + + q1 + + q2 + + q3 + + q4 * 2 + + q5 * 2 + + q6 * 2 + + q6 + + q6 + + q6 + + q6 + + q6 + + 8 + >> 4) + .as_::(); + } else if wd >= 8 && flat8in != 0 { + *dst.offset(strideb * -(3 as c_int) as isize) = + (p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0 + 4 >> 3).as_::(); + *dst.offset(strideb * -(2 as c_int) as isize) = + (p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1 + 4 >> 3).as_::(); + *dst.offset(strideb * -(1 as c_int) as isize) = + (p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2 + 4 >> 3).as_::(); + *dst.offset((strideb * 0) as isize) = + (p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3 + 4 >> 3).as_::(); + *dst.offset((strideb * 1) as isize) = + (p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3 + 4 >> 3).as_::(); + *dst.offset((strideb * 2) as isize) = + (p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3 + 4 >> 3).as_::(); + } else if wd == 6 && flat8in != 0 { + *dst.offset(strideb * -(2 as c_int) as isize) = + (p2 + 2 * p2 + 2 * p1 + 2 * p0 + q0 + 4 >> 3).as_::(); + *dst.offset(strideb * -(1 as c_int) as isize) = + (p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4 >> 3).as_::(); + *dst.offset((strideb * 0) as isize) = + (p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4 >> 3).as_::(); + *dst.offset((strideb * 1) as isize) = + (p0 + 2 * q0 + 2 * q1 + 2 * q2 + q2 + 4 >> 3).as_::(); + } else { + let hev = ((p1 - p0).abs() > H || (q1 - q0).abs() > H) as c_int; + if hev != 0 { + let mut f = iclip( + p1 - q1, + -(128 as c_int) * ((1 as c_int) << bitdepth_min_8), + 128 * ((1 as c_int) << bitdepth_min_8) - 1, + ); + let f1; + let f2; + f = iclip( + 3 * (q0 - p0) + f, + -(128 as c_int) * ((1 as c_int) << bitdepth_min_8), + 128 * ((1 as c_int) << bitdepth_min_8) - 1, + ); + f1 = cmp::min(f + 4, ((128 as c_int) << bitdepth_min_8) - 1) >> 3; + f2 = cmp::min(f + 3, ((128 as c_int) << bitdepth_min_8) - 1) >> 3; + *dst.offset(strideb * -(1 as c_int) as isize) = + iclip(p0 + f2, 0 as c_int, bd.bitdepth_max().as_::()) + .as_::(); + *dst.offset((strideb * 0) as isize) = + iclip(q0 - f1, 0 as c_int, bd.bitdepth_max().as_::()) + .as_::(); + } else { + let mut f_0 = iclip( + 3 * (q0 - p0), + -(128 as c_int) * ((1 as c_int) << bitdepth_min_8), + 128 * ((1 as c_int) << bitdepth_min_8) - 1, + ); + let f1_0; + let f2_0; + f1_0 = cmp::min(f_0 + 4, ((128 as c_int) << bitdepth_min_8) - 1) >> 3; + f2_0 = cmp::min(f_0 + 3, ((128 as c_int) << bitdepth_min_8) - 1) >> 3; + *dst.offset(strideb * -(1 as c_int) as isize) = + iclip(p0 + f2_0, 0 as c_int, bd.bitdepth_max().as_::()) + .as_::(); + *dst.offset((strideb * 0) as isize) = + iclip(q0 - f1_0, 0 as c_int, bd.bitdepth_max().as_::()) + .as_::(); + f_0 = f1_0 + 1 >> 1; + *dst.offset(strideb * -(2 as c_int) as isize) = + iclip(p1 + f_0, 0 as c_int, bd.bitdepth_max().as_::()) + .as_::(); + *dst.offset((strideb * 1) as isize) = + iclip(q1 - f_0, 0 as c_int, bd.bitdepth_max().as_::()) + .as_::(); + } + } + } + i += 1; + dst = dst.offset(stridea as isize); + } +}