Introduce sound API

Baekalfen · Feb 17, 2025 · 9e9da6c · 9e9da6c
1 parent 7174dd0
commit 9e9da6c
Show file tree

Hide file tree

Showing 9 changed files with 328 additions and 2 deletions.
diff --git a/pyboy/api/sound.pxd b/pyboy/api/sound.pxd
@@ -0,0 +1,17 @@
+#
+# License: See LICENSE.md file
+# GitHub: https://github.com/Baekalfen/PyBoy
+#
+
+from libc.stdint cimport uint64_t, int8_t
+from pyboy.core.mb cimport Motherboard
+
+cdef class Sound:
+    cdef Motherboard mb
+    cdef readonly uint64_t sample_rate
+
+    cdef readonly int8_t[:] raw_buffer
+    cdef readonly (int, int) raw_buffer_dims
+    cdef readonly str raw_buffer_format
+    cdef readonly int raw_buffer_length
+    cdef readonly object raw_ndarray
diff --git a/pyboy/api/sound.py b/pyboy/api/sound.py
@@ -0,0 +1,174 @@
+#
+# License: See LICENSE.md file
+# GitHub: https://github.com/Baekalfen/PyBoy
+#
+"""
+This class gives access to the sound buffer of PyBoy.
+"""
+
+import numpy as np
+
+from pyboy import utils
+from pyboy.logging import get_logger
+
+logger = get_logger(__name__)
+
+
+class Sound:
+    """
+    As part of the emulation, we generate a sound buffer for each frame on the screen. This class has several helper
+    methods to make it possible to read this buffer out.
+
+    When the game enables/disables the LCD, the timing will be shorter than 70224 emulated cycles. Therefore the sound
+    buffer will also be shorter than 16.667ms (60 FPS).
+
+    Because the number of samples and the timing of frames don't match exactly, you can expect a little fluctuation in
+    the number of samples per frame. Normally at a sample rate of 24,000Hz, it'll be 400 samples/second. But some times,
+    it might become 401. As described above, when the LCD enables/disables, it might be even less -- maybe 30, 143,
+    or 200 samples. This timespan represent what the real hardware would have shown.
+
+    If you're working with encoding the screen and sound in a video stream, you could drop these shorter frames, if they
+    cause problems. They usually only happen in transitions from menu to game or similar.
+    """
+
+    def __init__(self, mb):
+        self.mb = mb
+
+        self.sample_rate = self.mb.sound.sample_rate
+        """
+        Read-only. Changing this, will not change the sample rate. See `PyBoy` constructor instead.
+
+        The sample rate is reported per second, while the frame rate of the Game Boy is ~60 frame per second.
+        So expect the sound buffer to have 1/60 of this value in the buffer after every frame. Although it will
+        fluctuate. See top of the page.
+
+        ```python
+        >>> pyboy.sound.sample_rate # in Hz
+        48000
+        >>> pyboy.sound.sample_rate // 60 # Expected samples per frame
+        800
+        >>> (800+1) * 2 # Minimum buffer size for you to prepare (2 channels, +1 for fluctuating lengths)
+        1602
+        >>> 1602 == pyboy.sound.raw_buffer_length # This is how the length is calculated at the moment
+        True
+        ```
+
+        Returns
+        -------
+        int:
+            The sample rate in Hz (samples per second)
+        """
+
+        self.raw_buffer_format = self.mb.sound.buffer_format
+        """
+        Returns the color format of the raw sound buffer. **This format is subject to change.**
+
+        See how to interpret the format on: https://docs.python.org/3/library/struct.html#format-characters
+
+        Example:
+        ```python
+        >>> pyboy.sound.raw_buffer_format
+        'b'
+        ```
+
+        Returns
+        -------
+        str:
+            Struct format of the raw sound buffer. E.g. 'b' for signed 8-bit
+        """
+
+        self.raw_buffer_length = self.mb.sound.audiobuffer_length
+        """
+        Read-only. Changing this, will not change the buffer length.
+
+        This is the total length of the allocated raw buffer. Use this only to allocate an appropriate buffer in your
+        script. The length of the valid data in the buffer is found using `Sound.raw_buffer_head`.
+
+        Returns
+        -------
+        int:
+            Total raw buffer length
+        """
+
+        self.raw_buffer = memoryview(self.mb.sound.audiobuffer).cast(
+            self.raw_buffer_format, shape=(self.mb.sound.audiobuffer_length,)
+        )
+        """
+        Provides a raw, unfiltered `memoryview` object with the data from sound buffer. Check
+        `Sound.raw_buffer_format` to see which dataformat is used. **The returned type and dataformat are
+        subject to change.** The sound buffer is in stereo format, so the odd indexes are the left channel,
+        and even indexes are the right channel.
+
+        Use this, only if you need to bypass the overhead of `Sound.ndarray`.
+
+        Be aware to use the `Sound.raw_buffer_head`, as not all 'frames' are of equal length.
+
+        Example:
+        ```python
+        >>> from array import array
+        >>> sound_buffer = array(pyboy.sound.raw_buffer_format, pyboy.sound.raw_buffer[:pyboy.sound.raw_buffer_head])
+        >>> sound_buffer
+        array('b', [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...])
+        ```
+
+        Returns
+        -------
+        memoryview:
+            memoryview of sound data.
+        """
+
+        self.raw_ndarray = None
+        """
+        ndarray
+        """
+        if self.mb.sound.enabled:
+            self.raw_ndarray = np.frombuffer(
+                self.mb.sound.audiobuffer,
+                dtype=np.int8,
+            ).reshape(self.mb.sound.audiobuffer_length // 2, 2)
+        else:
+            self.raw_ndarray = None
+
+    @property
+    def raw_buffer_head(self):
+        """
+        This returns the
+
+        See the explanation at the top of the page.
+        """
+        return self.mb.sound.audiobuffer_head
+
+    @property
+    def ndarray(self):
+        """
+        References the sound data in NumPy format. **Remember to copy this object** if you intend to store it.
+        The backing buffer will update, but it will be the same `ndarray` object.
+
+        The format is given by `pyboy.api.sound.Sound.raw_buffer_format`. The sound buffer is in stereo format,
+        so the first index is the left channel, and the second index is the right channel.
+
+        This property returns an `ndarray` that is already accounting for the changing length of the sound buffer.
+        See the explanation at the top of the page.
+
+        Example:
+        ```python
+        >>> pyboy.sound.ndarray.shape # 401 samples, 2 channels (stereo)
+        (801, 2)
+        >>> pyboy.sound.ndarray
+        array([[0, 0],
+               [0, 0],
+               ...
+               [0, 0],
+               [0, 0]], dtype=int8)
+
+        ```
+
+        Returns
+        -------
+        numpy.ndarray:
+            Sound data in `ndarray` of bytes with shape given by sample rate
+        """
+        if self.mb.sound.enabled:
+            return self.raw_ndarray[: self.mb.sound.audiobuffer_head]
+        else:
+            raise utils.PyBoyFeatureDisabledError("Sound is not enabled!")
diff --git a/pyboy/core/sound.pxd b/pyboy/core/sound.pxd
@@ -39,6 +39,7 @@ cdef class Sound:
     cdef uint64_t samples_per_frame
     cdef cython.double cycles_per_sample
     cdef int8_t[:] audiobuffer
+    cdef str buffer_format
 
     cdef uint8_t noise_left
     cdef uint8_t wave_left

diff --git a/pyboy/core/sound.py b/pyboy/core/sound.py
@@ -47,10 +47,11 @@ def __init__(self, volume, emulate, sample_rate, cgb):
         assert self.sample_rate % 60 == 0, "We do not want a sample rate that doesn't divide the frame rate"
         self.audiobuffer_head = 0
         self.samples_per_frame = self.sample_rate // 60
-        self.cycles_per_sample = FRAME_CYCLES // self.samples_per_frame  # Notice use of float
+        self.cycles_per_sample = float(FRAME_CYCLES) / self.samples_per_frame  # Notice use of float
+        self.buffer_format = "b"
         # Buffer for 1 frame of stereo 8-bit sound. +1 for rounding error
         self.audiobuffer_length = (self.samples_per_frame + 1) * 2
-        self.audiobuffer = array("b", [0] * self.audiobuffer_length)
+        self.audiobuffer = array(self.buffer_format, [0] * self.audiobuffer_length)
 
         self.speed_shift = 0
         if self.emulate and not self.enabled:

diff --git a/pyboy/pyboy.pxd b/pyboy/pyboy.pxd
@@ -11,6 +11,7 @@ from libc.stdint cimport int64_t, uint64_t
 from pyboy.api.gameshark cimport GameShark
 from pyboy.api.memory_scanner cimport MemoryScanner
 from pyboy.api.screen cimport Screen
+from pyboy.api.sound cimport Sound
 from pyboy.api.tilemap cimport TileMap
 from pyboy.core.cpu cimport CPU
 from pyboy.core.mb cimport Motherboard
@@ -56,6 +57,7 @@ cdef class PyBoy:
     cdef readonly PyBoyMemoryView memory
     cdef readonly PyBoyRegisterFile register_file
     cdef readonly Screen screen
+    cdef readonly Sound sound
     cdef readonly TileMap tilemap_background
     cdef readonly TileMap tilemap_window
     cdef readonly object game_wrapper

diff --git a/pyboy/pyboy.py b/pyboy/pyboy.py
@@ -16,6 +16,7 @@
 from pyboy.api.gameshark import GameShark
 from pyboy.api.memory_scanner import MemoryScanner
 from pyboy.api.screen import Screen
+from pyboy.api.sound import Sound
 from pyboy.api.tilemap import TileMap
 from pyboy.logging import get_logger
 from pyboy.logging import log_level as _log_level
@@ -248,11 +249,35 @@ def __init__(
 
         ```
 
+        NOTE: See `PyBoy.sound` to get the sound buffer.
+
         Returns
         -------
         `pyboy.api.screen.Screen`:
             A Screen object with helper functions for reading the screen buffer.
         """
+        self.sound = Sound(self.mb)
+        """
+        Use this method to get a `pyboy.api.sound.Sound` object. This can be used to get the sound buffer of the
+        latest screen frame (see `PyBoy.screen`).
+
+        Example:
+        ```python
+        >>> pyboy.sound.ndarray.shape # 801 samples, 2 channels (stereo)
+        (801, 2)
+        >>> pyboy.sound.ndarray
+        array([[0, 0],
+               [0, 0],
+               ...
+               [0, 0],
+               [0, 0]], dtype=int8)
+        ```
+
+        Returns
+        -------
+        `pyboy.api.sound.Sound`:
+            A Sound object with helper functions for accessing the sound buffer.
+        """
         self.memory = PyBoyMemoryView(self.mb)
         """
         Provides a `pyboy.PyBoyMemoryView` object for reading and writing the memory space of the Game Boy.

diff --git a/requirements_tests.txt b/requirements_tests.txt
@@ -8,3 +8,4 @@ pyopengl
 filelock
 cryptography
 GitPython
+matplotlib
diff --git a/tests/test_results/sound_swoosh.png b/tests/test_results/sound_swoosh.png
diff --git a/tests/test_sound.py b/tests/test_sound.py
@@ -0,0 +1,105 @@
+#
+# License: See LICENSE.md file
+# GitHub: https://github.com/Baekalfen/PyBoy
+#
+
+import io
+import os
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+import PIL
+import pytest
+
+from pyboy import PyBoy
+from pyboy.utils import PyBoyFeatureDisabledError
+
+OVERWRITE_PNGS = False
+
+
+def test_swoosh(default_rom):
+    sample_rate = 24000
+    pyboy = PyBoy(default_rom, window="null", sound=True, sound_sample_rate=sample_rate)
+
+    frames = 60
+    pointer = 0
+    buffers = np.zeros((sample_rate // 60 * frames, 2))
+    # array("b", [0] * (sample_rate) * 2 * (frames//60))
+
+    for _ in range(frames):
+        pyboy.tick(1, sound=True)
+        audiobuffer = pyboy.sound.ndarray
+        length, _ = audiobuffer.shape
+        buffers[pointer : pointer + length] = audiobuffer[:]
+        pointer += len(audiobuffer)
+
+    left_channel = buffers[:, 0]
+    right_channel = buffers[:, 1]
+    time = np.linspace(0, len(left_channel) / sample_rate, num=len(left_channel))
+
+    # Plot the channels
+    plt.figure(figsize=(12, 6))
+    plt.subplot(2, 1, 1)
+    plt.plot(time, left_channel, label="Left Channel", color="blue")
+    plt.title("Left Channel")
+    plt.xlabel("Time (s)")
+    plt.ylabel("Amplitude")
+    plt.ylim(-0.4, 15.4)
+
+    plt.subplot(2, 1, 2)
+    plt.plot(time, right_channel, label="Right Channel", color="red")
+    plt.title("Right Channel")
+    plt.xlabel("Time (s)")
+    plt.ylabel("Amplitude")
+    plt.ylim(-0.4, 15.4)
+
+    plt.tight_layout()
+
+    png_path = Path("tests/test_results/sound_swoosh.png")
+    if OVERWRITE_PNGS:
+        png_path.parents[0].mkdir(parents=True, exist_ok=True)
+        plt.savefig(png_path)
+    else:
+        # Converting to RGB as ImageChops.difference cannot handle Alpha: https://github.com/python-pillow/Pillow/issues/4849
+        plt_data = io.BytesIO()
+        plt.savefig(plt_data, format="png")
+        plt_data.seek(0)
+        image = PIL.Image.open(plt_data).convert("RGB")
+        old_image = PIL.Image.open(png_path).convert("RGB")
+        diff = PIL.ImageChops.difference(image, old_image)
+        if diff.getbbox() and os.environ.get("TEST_VERBOSE_IMAGES"):
+            image.show()
+            old_image.show()
+            diff.show()
+            plt.show()
+        assert not diff.getbbox(), "Images are different!"
+
+
+def test_api_sound_enabled(default_rom):
+    pyboy = PyBoy(default_rom, window="null", sound=True)
+
+    pyboy.sound.raw_buffer[0]  # No exception
+    pyboy.sound.raw_ndarray[0]  # No exception
+    assert pyboy.sound.ndarray.shape == (0, 2), "Assumed empty sound buffer"
+
+
+def test_api_sound_disabled(default_rom):
+    pyboy = PyBoy(default_rom, window="null", sound=False)  # Sound emulation disabled
+
+    assert pyboy.sound.raw_buffer[0] == 0  # Always defined, but empty
+    with pytest.raises(PyBoyFeatureDisabledError):
+        pyboy.sound.raw_ndarray[0]
+    with pytest.raises(PyBoyFeatureDisabledError):
+        pyboy.sound.ndarray[0]
+
+
+@pytest.mark.parametrize("sample_rate", [3000, 6000, 12000, 24000, 44100, 48000, 88200, 96000])
+def test_buffer_overrun(default_rom, capsys, sample_rate):
+    pyboy = PyBoy(default_rom, window="null", sound_sample_rate=sample_rate)
+    for _ in range(200):
+        pyboy.tick(1, False, True)
+
+    # Watch out for critical "Buffer overrun" log from sound
+    captured = capsys.readouterr()
+    assert captured.out == ""