From c2fe2252798ab89ab7576dd1e34c5beb73e28abc Mon Sep 17 00:00:00 2001 From: dotX12 Date: Mon, 3 Feb 2025 06:14:56 +0300 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20feat(Cargo.lock):=20Add=20arc-swap,?= =?UTF-8?q?=20portable-atomic,=20pyo3-log=20packages=20=F0=9F=94=A7=20fix(?= =?UTF-8?q?Cargo.lock):=20Update=20pyo3,=20pyo3-build-config,=20pyo3-ffi,?= =?UTF-8?q?=20pyo3-macros,=20pyo3-macros-backend=20versions=20to=200.20.3?= =?UTF-8?q?=20=F0=9F=94=A7=20fix(pyproject.toml):=20Update=20shazamio=5Fco?= =?UTF-8?q?re=20version=20to=201.1.0-rc.2=20=F0=9F=94=A7=20fix(Cargo.toml)?= =?UTF-8?q?:=20Update=20shazamio-core=20version=20to=201.1.0-rc.2,=20add?= =?UTF-8?q?=20pyo3-log=20and=20log=20dependencies=20=F0=9F=94=A7=20fix(src?= =?UTF-8?q?/lib.rs):=20Add=20logging=20initialization=20and=20messages,=20?= =?UTF-8?q?update=20segment=20duration=20to=2010=20seconds=20=F0=9F=94=A7?= =?UTF-8?q?=20fix(src/fingerprinting/algorithm.rs):=20Update=20segment=20d?= =?UTF-8?q?uration=20to=2010=20seconds=20=F0=9F=94=A7=20fix(shazamio=5Fcor?= =?UTF-8?q?e/shazamio=5Fcore.py):=20Update=20segment=20duration=20to=2010?= =?UTF-8?q?=20seconds,=20add=20type=20hints=20and=20docstrings=20?= =?UTF-8?q?=F0=9F=94=A7=20fix(shazamio=5Fcore/shazamio=5Fcore.pyi):=20Upda?= =?UTF-8?q?te=20segment=20duration=20to=2010=20seconds,=20add=20type=20hin?= =?UTF-8?q?ts=20and=20docstrings=20=F0=9F=94=A7=20fix(.github/workflows/CI?= =?UTF-8?q?.yml):=20Update=20comments=20for=20macOS=20targets?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/CI.yml | 4 +-- Cargo.lock | 49 ++++++++++++++++++++++++------- Cargo.toml | 6 ++-- pyproject.toml | 2 +- shazamio_core/shazamio_core.py | 44 +++++++++++++++++++++++----- shazamio_core/shazamio_core.pyi | 52 ++++++++++++++++++++++++++++----- src/fingerprinting/algorithm.rs | 8 ++--- src/lib.rs | 29 ++++++++++++++++-- 8 files changed, 154 insertions(+), 40 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 7ce4fa6..b430224 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -44,7 +44,7 @@ jobs: fail-fast: false matrix: include: - # macOS 13 (x86_64) со всеми версиями Python + # macOS 13 (x86_64) - os: macos-13 target: x86_64-apple-darwin python-version: "3.9" @@ -58,7 +58,7 @@ jobs: target: x86_64-apple-darwin python-version: "3.12" - # macOS-latest (arm64) со всеми версиями Python + # macOS-latest (arm64) - os: macos-latest target: aarch64-apple-darwin python-version: "3.9" diff --git a/Cargo.lock b/Cargo.lock index 5bcd75b..4e66da2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -48,6 +48,12 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "arc-swap" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" + [[package]] name = "arrayvec" version = "0.7.4" @@ -1220,6 +1226,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "portable-atomic" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" + [[package]] name = "proc-macro-crate" version = "1.3.1" @@ -1241,15 +1253,16 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.20.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a89dc7a5850d0e983be1ec2a463a171d20990487c3cfcd68b5363f1ee3d6fe0" +checksum = "53bdbb96d49157e65d45cc287af5f32ffadd5f4761438b527b055fb0d4bb8233" dependencies = [ "cfg-if", "indoc", "libc", "memoffset", "parking_lot", + "portable-atomic", "pyo3-build-config", "pyo3-ffi", "pyo3-macros", @@ -1272,9 +1285,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.20.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07426f0d8fe5a601f26293f300afd1a7b1ed5e78b2a705870c5f30893c5163be" +checksum = "deaa5745de3f5231ce10517a1f5dd97d53e5a2fd77aa6b5842292085831d48d7" dependencies = [ "once_cell", "target-lexicon", @@ -1282,19 +1295,30 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.20.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbb7dec17e17766b46bca4f1a4215a85006b4c2ecde122076c562dd058da6cf1" +checksum = "62b42531d03e08d4ef1f6e85a2ed422eb678b8cd62b762e53891c05faf0d4afa" dependencies = [ "libc", "pyo3-build-config", ] +[[package]] +name = "pyo3-log" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c09c2b349b6538d8a73d436ca606dab6ce0aaab4dad9e6b7bdd57a4f556c3bc3" +dependencies = [ + "arc-swap", + "log", + "pyo3", +] + [[package]] name = "pyo3-macros" -version = "0.20.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f738b4e40d50b5711957f142878cfa0f28e054aa0ebdfc3fd137a843f74ed3" +checksum = "7305c720fa01b8055ec95e484a6eca7a83c841267f0dd5280f0c8b8551d2c158" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -1304,12 +1328,13 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.20.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fc910d4851847827daf9d6cdd4a823fbdaab5b8818325c5e97a86da79e8881f" +checksum = "7c7e9b68bb9c3149c5b0cade5d07f953d6d125eb4337723c4ccdb665f1f96185" dependencies = [ "heck", "proc-macro2", + "pyo3-build-config", "quote", "syn 2.0.50", ] @@ -1519,7 +1544,7 @@ dependencies = [ [[package]] name = "shazamio-core" -version = "1.1.0-rc.1" +version = "1.1.0-rc.2" dependencies = [ "base64", "blocking", @@ -1528,8 +1553,10 @@ dependencies = [ "chfft", "crc32fast", "futures", + "log", "pyo3", "pyo3-asyncio", + "pyo3-log", "rodio", "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 5cce7de..1673225 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "shazamio-core" -version = "1.1.0-rc.1" +version = "1.1.0-rc.2" edition = "2021" rust-version = "1.62" @@ -23,8 +23,10 @@ futures = { version = "0.3.30", features = [] } serde = { version = "1.0.196", features = ["derive"] } bytes = "1.5.0" tempdir = "0.3.7" -pyo3 = "0.20.2" +pyo3 = "0.20.3" pyo3-asyncio = { version = "0.20.0", features = ["async-std-runtime", "async-std", "tokio", "tokio-runtime"] } +pyo3-log = "=0.8.4" +log = "0.4.20" [features] default = ["pyo3/extension-module"] diff --git a/pyproject.toml b/pyproject.toml index 087870e..294f012 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "shazamio_core" -version = "1.1.0-rc.1" +version = "1.1.0-rc.2" description = "" authors = ["dotX12 "] readme = "README.md" diff --git a/shazamio_core/shazamio_core.py b/shazamio_core/shazamio_core.py index 06d42a5..c9670b2 100644 --- a/shazamio_core/shazamio_core.py +++ b/shazamio_core/shazamio_core.py @@ -1,4 +1,6 @@ from dataclasses import dataclass +from typing import Union +from os import PathLike @dataclass @@ -35,21 +37,47 @@ def __repr__(self) -> str: class Recognizer: - def __init__(self, segment_duration_seconds: int = 12) -> None: + """ + Recognizer uses a Rust implementation under the hood. + + This class provides an interface for recognizing audio files, but the actual + processing logic is implemented in Rust and accessed via FFI. + """ + + def __init__(self, segment_duration_seconds: int = 10) -> None: + """ + :param segment_duration_seconds: The duration (in seconds) of the audio segment to analyze. + - **Default:** 12 seconds. + - **If the audio file is longer than this duration**, a centered segment of the specified duration is selected. + - Example: If the audio is **60 seconds** and `segment_duration_seconds = 10`, the extracted segment will be **from 25s to 35s**. + - **If the audio file is shorter than this duration**, the entire file is used. + - Example: If the audio is **8 seconds** and `segment_duration_seconds = 10`, the entire **8-second file** will be processed. + - **Audio is always converted to mono and downsampled to 16 kHz** before analysis. + - This parameter determines the number of samples used for frequency analysis and fingerprint generation. + """ + self.segment_duration_seconds = segment_duration_seconds raise NotImplemented - async def recognize_path(self, value: str) -> Signature: + async def recognize_path(self, value: Union[str, PathLike]) -> Signature: """ - :param value: path file - :return: Signature object - :raises SignatureError: if there is any error + Recognize audio from a file path. + + This method is a Python wrapper around a Rust implementation. + + :param value: Path to an audio file. + :return: Signature object. + :raises SignatureError: if an error occurs. """ raise NotImplemented async def recognize_bytes(self, value: bytes) -> Signature: """ - :param value: bytes file - :return: Signature object - :raises SignatureError: if there is any error + Recognize audio from raw bytes. + + This method is a Python wrapper around a Rust implementation. + + :param value: Raw audio file as bytes. + :return: Signature object. + :raises SignatureError: if an error occurs. """ raise NotImplemented diff --git a/shazamio_core/shazamio_core.pyi b/shazamio_core/shazamio_core.pyi index 623a5df..c9670b2 100644 --- a/shazamio_core/shazamio_core.pyi +++ b/shazamio_core/shazamio_core.pyi @@ -1,4 +1,7 @@ from dataclasses import dataclass +from typing import Union +from os import PathLike + @dataclass class Geolocation: @@ -6,12 +9,14 @@ class Geolocation: latitude: int longitude: int + @dataclass class SignatureSong: samples: int timestamp: int uri: str + @dataclass class Signature: geolocation: Geolocation @@ -19,29 +24,60 @@ class Signature: timestamp: int timezone: str + class SignatureError(Exception): def __init__(self, message: str): self.message = message + def __str__(self) -> str: return self.message + def __repr__(self) -> str: return f"SignatureError({self.message})" + class Recognizer: - def __init__(self, segment_duration_seconds: int = 12) -> None: + """ + Recognizer uses a Rust implementation under the hood. + + This class provides an interface for recognizing audio files, but the actual + processing logic is implemented in Rust and accessed via FFI. + """ + + def __init__(self, segment_duration_seconds: int = 10) -> None: + """ + :param segment_duration_seconds: The duration (in seconds) of the audio segment to analyze. + - **Default:** 12 seconds. + - **If the audio file is longer than this duration**, a centered segment of the specified duration is selected. + - Example: If the audio is **60 seconds** and `segment_duration_seconds = 10`, the extracted segment will be **from 25s to 35s**. + - **If the audio file is shorter than this duration**, the entire file is used. + - Example: If the audio is **8 seconds** and `segment_duration_seconds = 10`, the entire **8-second file** will be processed. + - **Audio is always converted to mono and downsampled to 16 kHz** before analysis. + - This parameter determines the number of samples used for frequency analysis and fingerprint generation. + """ + self.segment_duration_seconds = segment_duration_seconds raise NotImplemented - async def recognize_path(self, value: str) -> Signature: + async def recognize_path(self, value: Union[str, PathLike]) -> Signature: """ - :param value: path file - :return: Signature object - :raises SignatureError: if there is any error + Recognize audio from a file path. + + This method is a Python wrapper around a Rust implementation. + + :param value: Path to an audio file. + :return: Signature object. + :raises SignatureError: if an error occurs. """ raise NotImplemented + async def recognize_bytes(self, value: bytes) -> Signature: """ - :param value: bytes file - :return: Signature object - :raises SignatureError: if there is any error + Recognize audio from raw bytes. + + This method is a Python wrapper around a Rust implementation. + + :param value: Raw audio file as bytes. + :return: Signature object. + :raises SignatureError: if an error occurs. """ raise NotImplemented diff --git a/src/fingerprinting/algorithm.rs b/src/fingerprinting/algorithm.rs index 34cea9a..d0e382d 100644 --- a/src/fingerprinting/algorithm.rs +++ b/src/fingerprinting/algorithm.rs @@ -35,7 +35,7 @@ impl SignatureGenerator { let raw_pcm_samples: Vec = converted_file.collect(); // Process the PCM samples as in make_signature_from_buffer - let duration_seconds = segment_duration_seconds.unwrap_or(12); + let duration_seconds = segment_duration_seconds.unwrap_or(10); let sample_rate = 16000; let segment_samples = (duration_seconds * sample_rate) as usize; @@ -74,8 +74,8 @@ impl SignatureGenerator { } // Downsample the raw PCM samples to 16 KHz, and skip to the middle of the file - // in order to increase recognition odds. Take N (12 default) seconds of sample. - let duration_seconds = segment_duration_seconds.unwrap_or(12); + // in order to increase recognition odds. Take N (10 default) seconds of sample. + let duration_seconds = segment_duration_seconds.unwrap_or(10); let sample_rate = 16000; let segment_samples = (duration_seconds * sample_rate) as usize; @@ -187,10 +187,8 @@ impl SignatureGenerator { .max(spread_fft_results[position + 2]); } - // Сначала скопируем данные, чтобы избежать одновременной мутации. let spread_fft_results_copy = spread_fft_results.clone(); - // Теперь, используя копию, мы можем обновить исходные данные без конфликта мутации. for position in 0..=1024 { for former_fft_number in &[1, 3, 6] { let former_fft_output = &mut self.spread_fft_outputs diff --git a/src/lib.rs b/src/lib.rs index 8e5e523..7e2e2c6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,16 +9,22 @@ use crate::utils::convert_signature_to_py; use crate::utils::get_python_future; use crate::utils::unwrap_decoded_signature; use fingerprinting::algorithm::SignatureGenerator; -use pyo3::prelude::PyModule; +use pyo3::prelude::*; use pyo3::{pyclass, pymethods, pymodule, PyErr, PyObject, PyResult, Python, ToPyObject}; +use log::{info, debug, error}; #[pymodule] fn shazamio_core(_py: Python<'_>, m: &PyModule) -> PyResult<()> { + pyo3_log::init(); + info!("Initializing shazamio_core module"); + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; + + info!("shazamio_core module initialized successfully"); Ok(()) } @@ -33,47 +39,64 @@ struct Recognizer { impl Recognizer { #[new] pub fn new(segment_duration_seconds: Option) -> Self { - let duration = segment_duration_seconds.unwrap_or(12); + let duration = segment_duration_seconds.unwrap_or(10); + info!("Recognizer created with segment_duration_seconds = {}", duration); Recognizer { segment_duration_seconds: duration } } fn recognize_bytes(&self, py: Python, bytes: Vec) -> PyResult { + debug!("Recognize bytes method called"); + debug!("Segment duration: {}", self.segment_duration_seconds); + debug!("Received {} bytes for recognition", bytes.len()); + let segment_duration = self.segment_duration_seconds; let future = async move { + debug!("Starting async recognition from bytes"); let data = SignatureGenerator::make_signature_from_bytes( bytes, Some(segment_duration), ).map_err(|e| { + error!("Error in make_signature_from_bytes: {}", e); let error_message = format!("{}", e); PyErr::new::(SignatureError::new(error_message)) })?; + debug!("Successfully generated signature from bytes"); let signature = unwrap_decoded_signature(data); convert_signature_to_py(signature?) }; let python_future = get_python_future(py, future); + debug!("Returning Python future for recognize_bytes"); python_future.map(|any| any.to_object(py)) } fn recognize_path(&self, py: Python, value: String) -> PyResult { + debug!("Recognize path method called"); + debug!("Segment duration: {}", self.segment_duration_seconds); + debug!("File path: {}", value); + let segment_duration = self.segment_duration_seconds; let future = async move { + debug!("Starting async recognition from file: {}", value); let data = SignatureGenerator::make_signature_from_file( &value, Some(segment_duration), ).map_err(|e| { + debug!("Error in make_signature_from_file: {}", e); let error_message = format!("{}", e); PyErr::new::(SignatureError::new(error_message)) })?; + debug!("Successfully generated signature from file"); let signature = unwrap_decoded_signature(data); convert_signature_to_py(signature?) }; let python_future = get_python_future(py, future); + debug!("Returning Python future for recognize_path"); python_future.map(|any| any.to_object(py)) } -} \ No newline at end of file +}