Skip to content

Commit

Permalink
struct Rav1dFrameContext_lf::lr_lpf_line: Convert to offsets (#793)
Browse files Browse the repository at this point in the history
  • Loading branch information
randomPoison authored Mar 18, 2024
2 parents f5bee55 + b637376 commit 1e394d7
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 75 deletions.
23 changes: 14 additions & 9 deletions src/cdef_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
let uv_stride: ptrdiff_t = BD::pxstride(f.cur.stride[1]);

let cdef_line_buf = BD::cast_pixel_slice_mut(&mut f.lf.cdef_line_buf);
let lr_line_buf = BD::cast_pixel_slice(&f.lf.lr_line_buf);

let mut bit = false;
for by in (by_start..by_end).step_by(2) {
Expand Down Expand Up @@ -328,7 +329,7 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
} else {
offset = (sby * ((4 as c_int) << sb128) - 4) as isize * y_stride
+ (bx * 4) as isize;
top = f.lf.lr_lpf_line[0].cast::<BD::Pixel>().offset(offset);
top = lr_line_buf.as_ptr().add(f.lf.lr_lpf_line[0]).offset(offset);
}
bot = bptrs[0].offset(8 * y_stride as isize);
st_y = false;
Expand All @@ -347,7 +348,7 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
} else {
let line = sby * ((4 as c_int) << sb128) + 4 * sb128 + 2;
offset = line as isize * y_stride + (bx * 4) as isize;
bot = f.lf.lr_lpf_line[0].cast::<BD::Pixel>().offset(offset);
bot = lr_line_buf.as_ptr().add(f.lf.lr_lpf_line[0]).offset(offset);
}
st_y = false;
} else {
Expand Down Expand Up @@ -421,8 +422,10 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
let line_0 = sby * ((4 as c_int) << sb128) - 4;
offset = line_0 as isize * uv_stride
+ (bx * 4 >> ss_hor) as isize;
top =
f.lf.lr_lpf_line[pl].cast::<BD::Pixel>().offset(offset);
top = lr_line_buf
.as_ptr()
.add(f.lf.lr_lpf_line[pl])
.offset(offset);
}
bot = bptrs[pl].offset(((8 >> ss_ver) * uv_stride) as isize);
st_uv = false;
Expand All @@ -441,11 +444,13 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
.add(f.lf.cdef_lpf_line[pl])
.offset(offset);
} else {
let line_1 = sby * ((4 as c_int) << sb128) + 4 * sb128 + 2;
offset = line_1 as isize * uv_stride
+ (bx * 4 >> ss_hor) as isize;
bot =
f.lf.lr_lpf_line[pl].cast::<BD::Pixel>().offset(offset);
let line = sby * ((4 as c_int) << sb128) + 4 * sb128 + 2;
offset =
line as isize * uv_stride + (bx * 4 >> ss_hor) as isize;
bot = lr_line_buf
.as_ptr()
.add(f.lf.lr_lpf_line[pl])
.offset(offset);
}
st_uv = false;
} else {
Expand Down
54 changes: 24 additions & 30 deletions src/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4439,38 +4439,32 @@ pub(crate) unsafe fn rav1d_decode_frame_init(
};
y_stride = f.sr_cur.p.stride[0];
uv_stride = f.sr_cur.p.stride[1];
if y_stride * num_lines as isize != f.lf.lr_buf_plane_sz[0] as isize
|| uv_stride * num_lines as isize * 2 != f.lf.lr_buf_plane_sz[1] as isize
{
// lr simd may overread the input, so slightly over-allocate the lpf buffer
let mut alloc_sz: usize = 128;
alloc_sz += y_stride.unsigned_abs() * num_lines as usize;
alloc_sz += uv_stride.unsigned_abs() * num_lines as usize * 2;
// TODO: Fallible allocation
// On allocation failure set `f.lf.lr_buf_plane_sz` to 0.
f.lf.lr_line_buf.resize(alloc_sz, 0);
let mut ptr = f.lf.lr_line_buf.as_mut_ptr();

ptr = ptr.offset(64);
if y_stride < 0 {
f.lf.lr_lpf_line[0] =
ptr.offset(-(y_stride * (num_lines as isize - 1))) as *mut DynPixel;
} else {
f.lf.lr_lpf_line[0] = ptr as *mut DynPixel;
}
ptr = ptr.offset(y_stride.abs() * num_lines as isize);
if uv_stride < 0 {
f.lf.lr_lpf_line[1] =
ptr.offset(-(uv_stride * (num_lines as isize * 1 - 1))) as *mut DynPixel;
f.lf.lr_lpf_line[2] =
ptr.offset(-(uv_stride * (num_lines as isize * 2 - 1))) as *mut DynPixel;
} else {
f.lf.lr_lpf_line[1] = ptr as *mut DynPixel;
f.lf.lr_lpf_line[2] = ptr.offset(uv_stride * num_lines as isize) as *mut DynPixel;
}
// lr simd may overread the input, so slightly over-allocate the lpf buffer
let mut alloc_sz: usize = 128;
alloc_sz += y_stride.unsigned_abs() * num_lines as usize;
alloc_sz += uv_stride.unsigned_abs() * num_lines as usize * 2;
// TODO: Fallible allocation
f.lf.lr_line_buf.resize(alloc_sz, 0);

let y_stride_px = bpc.pxstride(y_stride);
let uv_stride_px = bpc.pxstride(uv_stride);

f.lf.lr_buf_plane_sz[0] = y_stride as c_int * num_lines;
f.lf.lr_buf_plane_sz[1] = uv_stride as c_int * num_lines * 2;
let mut offset = bpc.pxstride(64usize);
if y_stride < 0 {
f.lf.lr_lpf_line[0] = offset.wrapping_add_signed(-(y_stride_px * (num_lines as isize - 1)));
} else {
f.lf.lr_lpf_line[0] = offset;
}
offset = offset.wrapping_add_signed(y_stride_px.abs() * num_lines as isize);
if uv_stride < 0 {
f.lf.lr_lpf_line[1] =
offset.wrapping_add_signed(-(uv_stride_px * (num_lines as isize * 1 - 1)));
f.lf.lr_lpf_line[2] =
offset.wrapping_add_signed(-(uv_stride_px * (num_lines as isize * 2 - 1)));
} else {
f.lf.lr_lpf_line[1] = offset;
f.lf.lr_lpf_line[2] = offset.wrapping_add_signed(uv_stride_px * num_lines as isize);
}

// update allocation for loopfilter masks
Expand Down
7 changes: 3 additions & 4 deletions src/internal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -457,16 +457,15 @@ pub struct Rav1dFrameContext_lf {
pub level: Vec<[u8; 4]>,
pub mask: Vec<Av1Filter>, /* len = w*h */
pub lr_mask: Vec<Av1Restoration>,
pub lr_buf_plane_sz: [c_int; 2], /* (stride*sbh*4) << sb128 if n_tc > 1, else stride*4 */
pub lim_lut: Align16<Av1FilterLUT>,
pub last_sharpness: c_int,
pub lvl: [[[[u8; 2]; 8]; 4]; 8], /* [8 seg_id][4 dir][8 ref][2 is_gmv] */
pub tx_lpf_right_edge: TxLpfRightEdge,
pub cdef_line_buf: AlignedVec32<u8>, /* AlignedVec32<DynPixel> */
pub lr_line_buf: AlignedVec64<u8>,
pub cdef_line: [[usize; 3]; 2], /* [2 pre/post][3 plane] */
pub cdef_lpf_line: [usize; 3], /* plane */
pub lr_lpf_line: [*mut DynPixel; 3], /* plane */
pub cdef_line: [[usize; 3]; 2], /* [2 pre/post][3 plane] */
pub cdef_lpf_line: [usize; 3], /* plane */
pub lr_lpf_line: [usize; 3], /* plane */

// in-loop filter per-frame state keeping
pub start_of_tile_row: Vec<u8>,
Expand Down
35 changes: 11 additions & 24 deletions src/lf_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,35 +155,22 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
let seq_hdr = &***f.seq_hdr.as_ref().unwrap();
let tt_off = have_tt * sby * ((4 as c_int) << seq_hdr.sb128);

let lr_plane_sz = &f.lf.lr_buf_plane_sz;
let y_stride = BD::pxstride(lr_stride[0]);
let uv_stride = BD::pxstride(lr_stride[1]);
let y_span = lr_plane_sz[0] as isize - y_stride;
let uv_span = lr_plane_sz[1] as isize / 2 - uv_stride;

let dst: [&mut [BD::Pixel]; 3] = [
slice::from_raw_parts_mut(
(f.lf.lr_lpf_line[0] as *mut BD::Pixel).offset(cmp::min(y_span, 0)),
lr_plane_sz[0] as usize,
),
slice::from_raw_parts_mut(
(f.lf.lr_lpf_line[1] as *mut BD::Pixel).offset(cmp::min(uv_span, 0)),
lr_plane_sz[1] as usize / 2,
),
slice::from_raw_parts_mut(
(f.lf.lr_lpf_line[2] as *mut BD::Pixel).offset(cmp::min(uv_span, 0)),
lr_plane_sz[1] as usize / 2,
),
];
let dst_offset: [usize; 2] = [
(tt_off as isize * y_stride - cmp::min(y_span, 0)) as usize,
(tt_off as isize * uv_stride - cmp::min(uv_span, 0)) as usize,
let y_offset = (tt_off as isize * y_stride) as usize;
let uv_offset = (tt_off as isize * uv_stride) as usize;
let dst_offset = [
f.lf.lr_lpf_line[0] + y_offset,
f.lf.lr_lpf_line[1] + uv_offset,
f.lf.lr_lpf_line[2] + uv_offset,
];

// TODO Also check block level restore type to reduce copying.
let restore_planes = f.lf.restore_planes;

let cdef_line_buf = BD::cast_pixel_slice_mut(&mut f.lf.cdef_line_buf);
let lr_line_buf = BD::cast_pixel_slice_mut(&mut f.lf.lr_line_buf);

if seq_hdr.cdef != 0 || restore_planes & LR_RESTORE_Y as c_int != 0 {
let h = f.cur.p.h;
Expand All @@ -193,7 +180,7 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
if restore_planes & LR_RESTORE_Y as c_int != 0 || resize == 0 {
backup_lpf::<BD>(
c,
dst[0],
lr_line_buf,
dst_offset[0],
lr_stride[0],
src[0],
Expand Down Expand Up @@ -265,7 +252,7 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
if restore_planes & LR_RESTORE_U as c_int != 0 || resize == 0 {
backup_lpf::<BD>(
c,
dst[1],
lr_line_buf,
dst_offset[1],
lr_stride[1],
src[1],
Expand Down Expand Up @@ -325,8 +312,8 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
if restore_planes & LR_RESTORE_V as c_int != 0 || resize == 0 {
backup_lpf::<BD>(
c,
dst[2],
dst_offset[1],
lr_line_buf,
dst_offset[2],
lr_stride[1],
src[2],
(src_offset[1] as isize - offset_uv as isize * BD::pxstride(src_stride[1]))
Expand Down
15 changes: 7 additions & 8 deletions src/lr_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ use libc::ptrdiff_t;
use std::cmp;
use std::ffi::c_int;
use std::ffi::c_uint;
use std::slice;

pub type LrRestorePlanes = c_uint;
pub const LR_RESTORE_V: LrRestorePlanes = 4;
Expand Down Expand Up @@ -47,12 +46,8 @@ unsafe fn lr_stripe<BD: BitDepth>(
let sby = y + (if y != 0 { 8 << ss_ver } else { 0 }) >> 6 - ss_ver + seq_hdr.sb128;
let have_tt = (c.tc.len() > 1) as c_int;
let lpf_stride = BD::pxstride(stride);
let lpf_plane_sz = BD::pxstride(f.lf.lr_buf_plane_sz[(plane != 0) as usize] as isize);
let mut lpf_offset = cmp::max(lpf_stride - lpf_plane_sz, 0);
let lpf = &slice::from_raw_parts(
(f.lf.lr_lpf_line[plane as usize] as *const BD::Pixel).offset(-lpf_offset),
lpf_plane_sz.unsigned_abs(),
);
let lr_line_buf = BD::cast_pixel_slice(&f.lf.lr_line_buf);
let mut lpf_offset = f.lf.lr_lpf_line[plane as usize] as isize;
lpf_offset += (have_tt * (sby * (4 << seq_hdr.sb128) - 4)) as isize * lpf_stride + x as isize;
// The first stripe of the frame is shorter by 8 luma pixel rows.
let mut stripe_h = cmp::min(64 - 8 * (y == 0) as c_int >> ss_ver, row_h - y);
Expand Down Expand Up @@ -102,7 +97,11 @@ unsafe fn lr_stripe<BD: BitDepth>(
p.as_mut_ptr().add(p_offset).cast(),
stride,
left.as_ptr().cast(),
lpf.as_ptr().offset(lpf_offset).cast(),
// NOTE: The calculated pointer may point to before the beginning of
// `lr_line_buf`, so we must use `.wrapping_offset` here.
// `.wrapping_offset` is needed since `.offset` requires the pointer is in bounds,
// which `.wrapping_offset` does not, and delays that requirement to when the pointer is dereferenced
lr_line_buf.as_ptr().wrapping_offset(lpf_offset).cast(),
unit_w,
stripe_h,
&mut params,
Expand Down

0 comments on commit 1e394d7

Please sign in to comment.