Skip to content

Commit

Permalink
feat(core): introduce hardware jpeg decoder
Browse files Browse the repository at this point in the history
[no changelog]
  • Loading branch information
cepetr committed Feb 4, 2025
1 parent 13f948a commit 852b6d6
Show file tree
Hide file tree
Showing 28 changed files with 12,122 additions and 68 deletions.
9 changes: 6 additions & 3 deletions core/embed/gfx/bitblt/dma2d_bitblt.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#ifndef TREZORHAL_DMA2D_BITBLT_H
#define TREZORHAL_DMA2D_BITBLT_H
#pragma once

#include <gfx/gfx_bitblt.h>

Expand Down Expand Up @@ -50,4 +49,8 @@ bool dma2d_rgba8888_copy_rgba8888(const gfx_bitblt_t* bb);
bool dma2d_rgba8888_blend_mono4(const gfx_bitblt_t* bb);
bool dma2d_rgba8888_blend_mono8(const gfx_bitblt_t* bb);

#endif // TREZORHAL_DMA2D_BITBLT_H
#ifdef USE_HW_JPEG_DECODER
bool dma2d_rgba8888_copy_ycbcr420(const gfx_bitblt_t* bb);
bool dma2d_rgba8888_copy_ycbcr422(const gfx_bitblt_t* bb);
bool dma2d_rgba8888_copy_ycbcr444(const gfx_bitblt_t* bb);
#endif
61 changes: 61 additions & 0 deletions core/embed/gfx/bitblt/stm32/dma2d_bitblt.c
Original file line number Diff line number Diff line change
Expand Up @@ -789,3 +789,64 @@ bool dma2d_rgba8888_copy_rgba8888(const gfx_bitblt_t* bb) {
bb->width, bb->height);
return true;
}

#ifdef USE_HW_JPEG_DECODER
static bool dma2d_rgba8888_copy_ycbcr(const gfx_bitblt_t* bb, uint32_t css) {
dma2d_driver_t* drv = &g_dma2d_driver;

if (!drv->initialized) {
return false;
}

dma2d_wait();

if (!dma2d_accessible(bb->dst_row) || !dma2d_accessible(bb->src_row)) {
return false;
}

drv->handle.Init.ColorMode = DMA2D_OUTPUT_ARGB8888;
drv->handle.Init.Mode = DMA2D_M2M_PFC;
drv->handle.Init.OutputOffset = bb->dst_stride / sizeof(uint32_t) - bb->width;
HAL_DMA2D_Init(&drv->handle);

drv->handle.LayerCfg[1].InputColorMode = DMA2D_INPUT_YCBCR;
drv->handle.LayerCfg[1].InputOffset = 0;
drv->handle.LayerCfg[1].ChromaSubSampling = css;
drv->handle.LayerCfg[1].AlphaMode = 0;
drv->handle.LayerCfg[1].InputAlpha = 0;
HAL_DMA2D_ConfigLayer(&drv->handle, 1);

HAL_DMA2D_Start(&drv->handle, (uint32_t)bb->src_row,
(uint32_t)bb->dst_row + bb->dst_x * sizeof(uint32_t),
bb->width, bb->height);

// DMA2D overwrites CLUT during YCbCr conversion
// (seems to be a bug or an undocumented feature)
drv->clut_valid = false;

return true;
}

bool dma2d_rgba8888_copy_ycbcr420(const gfx_bitblt_t* bb) {
return dma2d_rgba8888_copy_ycbcr(bb, DMA2D_CSS_420);
}

bool dma2d_rgba8888_copy_ycbcr422(const gfx_bitblt_t* bb) {
return dma2d_rgba8888_copy_ycbcr(bb, DMA2D_CSS_422);
}

bool dma2d_rgba8888_copy_ycbcr444(const gfx_bitblt_t* bb) {
return dma2d_rgba8888_copy_ycbcr(bb, DMA2D_NO_CSS);
}

// Temporary hack to invalidate CLUT cache used in jpeg decoder.
// This function should be removed in the future with DMA2D syscalls.
void dma2d_invalidate_clut(void) {
dma2d_driver_t* drv = &g_dma2d_driver;
if (!drv->initialized) {
return;
}
drv->clut_valid = false;
}

#endif // USE_HW_JPEG_DECODER
127 changes: 127 additions & 0 deletions core/embed/gfx/inc/gfx/jpegdec.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/*
* This file is part of the Trezor project, https://trezor.io/
*
* Copyright (c) SatoshiLabs
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#pragma once

#include <trezor_types.h>

// Maximum number of blocks (8x8) in a slice.
// The more blocks we use, the decodeer is faster.
// Minimum value is 4 to support 4:2:0 subsampling (MCU is 16x16).
#define JPEGDEC_MAX_SLICE_BLOCKS 16

// Size of Y/YCbCr data buffer
// The worst case is 192 bytes per block (8x8 pixels) for 4:4:4 subsampling
#define JPEGDEC_YCBCR_BUFFER_SIZE (JPEGDEC_MAX_SLICE_BLOCKS * 8 * 8 * 3)

// Maximum size of the RGBA8888 buffer for a slice.
#define JPEGDEC_RGBA8888_BUFFER_SIZE (JPEGDEC_MAX_SLICE_BLOCKS * 8 * 8 * 4)

typedef struct jpegdec jpegdec_t;

typedef enum {
// Decoder needs more data
// (jpegdec_process should be called with more data)
JPEGDEC_STATE_NEED_DATA,
// Image info is ready
// (jpegdec_get_info can be called to get the image info)
JPEGDEC_STATE_INFO_READY,
// Decoded slice is ready
// (jpegdec_get_slice_rgba8888 can be called to get the slice data)
JPEGDEC_STATE_SLICE_READY,
// Decoding is finished
JPEGDEC_STATE_FINISHED,
// Error occurred, decoding is stopped
JPEGDEC_STATE_ERROR,
} jpegdec_state_t;

typedef enum {
JPEGDEC_IMAGE_GRAYSCALE, // Gray scale image
JPEGDEC_IMAGE_YCBCR420, // Color image with 4:2:0 subsampling
JPEGDEC_IMAGE_YCBCR422, // Color image with 4:2:2 subsampling
JPEGDEC_IMAGE_YCBCR444, // Color image with 4:4:4 subsampling
} jpegdec_image_format_t;

typedef struct {
// Pointer to the data
const uint8_t* data;
// Size of the data in bytes
size_t size;
// Current offset in the data
size_t offset;
// Set to true when no more data is available
bool last_chunk;
} jpegdec_input_t;

typedef struct {
// Image format
jpegdec_image_format_t format;
// Image width in pixels
int16_t width;
// Image height in pixels
int16_t height;
} jpegdec_image_t;

typedef struct {
// Slice x-coordinate
int16_t x;
// Slice y-coordinate
int16_t y;
// Slice width
int16_t width;
// Slice height
int16_t height;
} jpegdec_slice_t;

// Initialize and reset the decoder internal state
bool jpegdec_open(void);

// Release the decoder and free resources
void jpegdec_close(void);

// Process all or part of the input buffer and advances the `input->offset`
//
// `input->offset` must be aligned to 4 bytes.
// `input->size` must be aligned to 4 bytes except for the last chunk.
// `input->last_chunk` must be set to true when no more data is available.
//
// Returns the current state of the decoder:
// - `JPEGDEC_STATE_NEED_DATA` - more data is needed
// - `JPEGDEC_STATE_INFO_READY` - the image info is ready
// - `JPEGDEC_STATE_SLICE_READY` - a decoded slice is ready
// - `JPEGDEC_STATE_FINISHED` - the decoding is finished
// - `JPEGDEC_STATE_ERROR` - an error occurred
jpegdec_state_t jpegdec_process(jpegdec_input_t* input);

// Get the decoded image info
//
// Can be called anytimer if the decoder went through the
// `JPEGDEC_STATE_INFO_READY` state.
//
// Returns true if the info is available
bool jpegdec_get_info(jpegdec_image_t* info);

// Copy the last decoded slice to the buffer
//
// `rgba8888` must be a buffer of at least `JPEGDEC_RGBA8888_BUFFER_SIZE`
// bytes and must be aligned to 4 bytes.
//
// Can be called immediately after `jpegdec_process` returns
// `JPEGDEC_STATE_SLICE_READY`.
bool jpegdec_get_slice_rgba8888(uint32_t* rgba8888, jpegdec_slice_t* slice);
175 changes: 175 additions & 0 deletions core/embed/gfx/jpegdec/jpegdec_test.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
/*
* This file is part of the Trezor project, https://trezor.io/
*
* Copyright (c) SatoshiLabs
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#include <trezor_rtl.h>

#include <gfx/gfx_draw.h>
#include <gfx/jpegdec.h>
#include <io/display.h>
#include <rtl/cli.h>
#include <sys/systick.h>

#include "jpegdec_test_data.h"

uint32_t jpeg_rgba8888_buffer[JPEGDEC_RGBA8888_BUFFER_SIZE / sizeof(uint32_t)];

void jpegdec_test(cli_t *cli) {
bool show_slice_info = false;
bool display_image = true;

for (int i = 0; i < cli_arg_count(cli); i++) {
const char *arg = cli_nth_arg(cli, i);
if (strcmp(arg, "--slice-info") == 0) {
show_slice_info = true;
} else if (strcmp(arg, "--decode-only") == 0) {
display_image = false;
} else {
cli_error_arg_count(cli);
return;
}
}

if (!jpegdec_open()) {
cli_error(cli, CLI_ERROR, "Failed to open JPEG decoder");
return;
}

jpegdec_input_t input = {
.data = jpegdata_cham,
.size = sizeof(jpegdata_cham),
.offset = 0,
.last_chunk = true,
};

cli_trace(cli, "Decoding JPEG image (%d bytes)...", input.size);

gfx_clear();
display_refresh();

jpegdec_state_t state;
jpegdec_image_t image = {0};
display_fb_info_t fb;

if (display_image) {
if (!display_get_frame_buffer(&fb)) {
cli_error(cli, CLI_ERROR, "Failed to get frame buffer");
goto cleanup;
}
}

uint32_t start_time = systick_us();
int slice_count = 0;

do {
state = jpegdec_process(&input);
switch (state) {
case JPEGDEC_STATE_ERROR:
cli_error(cli, CLI_ERROR, "Decoding failed");
goto cleanup;

case JPEGDEC_STATE_INFO_READY:
if (!jpegdec_get_info(&image)) {
cli_error(cli, CLI_ERROR, "Failed to parse headers");
goto cleanup;
}
break;

case JPEGDEC_STATE_SLICE_READY:
jpegdec_slice_t slice;
if (!jpegdec_get_slice_rgba8888(jpeg_rgba8888_buffer, &slice)) {
cli_error(cli, CLI_ERROR, "Failed to parse slice");
goto cleanup;
}

if (show_slice_info) {
cli_trace(cli, "Slice: %d,%d %dx%d", slice.x, slice.y, slice.width,
slice.height);
}

if (display_image) {
gfx_bitblt_t bb = {
.height = slice.height,
.width = slice.width,
.dst_row = (uint8_t *)fb.ptr + (fb.stride * slice.y),
.dst_stride = fb.stride,
.dst_x = slice.x,
.dst_y = slice.y,
.src_row = jpeg_rgba8888_buffer,
.src_stride = slice.width * 4, // ARGB8888
.src_x = 0,
.src_y = 0,
.src_fg = 0, // Unused
.src_bg = 0, // Unused
.src_alpha = 255,
};

gfx_rgba8888_copy_rgba8888(&bb);
}

slice_count++;
break;

case JPEGDEC_STATE_FINISHED:
case JPEGDEC_STATE_NEED_DATA:
break;
}
} while (state != JPEGDEC_STATE_FINISHED);

uint32_t end_time = systick_us();

if (display_image) {
display_refresh();
}

const char *type = "Unknown";
switch (image.format) {
case JPEGDEC_IMAGE_GRAYSCALE:
type = "Grayscale";
break;
case JPEGDEC_IMAGE_YCBCR420:
type = "YCbCr 4:2:0";
break;
case JPEGDEC_IMAGE_YCBCR422:
type = "YCbCr 4:2:2";
break;
case JPEGDEC_IMAGE_YCBCR444:
type = "YCbCr 4:4:4";
break;
}

cli_trace(cli, "Image: %dx%d (%s)", image.width, image.height, type);
cli_trace(cli, "Decoded %d slices", slice_count);
cli_trace(cli, "Decoding took %d us", end_time - start_time);

jpegdec_close();
cli_ok(cli, "");
return;

cleanup:
jpegdec_close();
}

// clang-format off

PRODTEST_CLI_CMD(
.name = "jpegdec-test",
.func = jpegdec_test,
.info = "JPEG decoder test",
.args = "[--slice-info] [--decode-only]"
);
Loading

0 comments on commit 852b6d6

Please sign in to comment.