remove filter_radius

IAHispano · Feb 11, 2025 · f9cb292 · f9cb292
1 parent 7b0ff3d
commit f9cb292
Show file tree

Hide file tree

Showing 9 changed files with 4 additions and 89 deletions.
diff --git a/assets/Applio_NoUI.ipynb b/assets/Applio_NoUI.ipynb
@@ -226,7 +226,6 @@
     "export_format = \"WAV\"  # @param ['WAV', 'MP3', 'FLAC', 'OGG', 'M4A'] {allow-input: false}\n",
     "f0_method = \"rmvpe\"  # @param [\"crepe\", \"crepe-tiny\", \"rmvpe\", \"fcpe\", \"hybrid[rmvpe+fcpe]\"] {allow-input: false}\n",
     "f0_up_key = 0  # @param {type:\"slider\", min:-24, max:24, step:0}\n",
-    "filter_radius = 3  # @param {type:\"slider\", min:0, max:10, step:0}\n",
     "rms_mix_rate = 0.8  # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n",
     "protect = 0.5  # @param {type:\"slider\", min:0.0, max:0.5, step:0.1}\n",
     "index_rate = 0.7  # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n",
@@ -326,7 +325,7 @@
     "if \"delay_mix\" not in globals():\n",
     "  delay_mix = 0.5 \n",
     "  \n",
-    "!python core.py infer --pitch \"{f0_up_key}\" --filter_radius \"{filter_radius}\" --volume_envelope \"{rms_mix_rate}\" --index_rate \"{index_rate}\" --hop_length \"{hop_length}\" --protect \"{protect}\" --f0_autotune \"{f0_autotune}\" --f0_method \"{f0_method}\" --input_path \"{input_path}\" --output_path \"{output_path}\" --pth_path \"{pth_file}\" --index_path \"{index_file}\" --split_audio \"{split_audio}\" --clean_audio \"{clean_audio}\" --clean_strength \"{clean_strength}\" --export_format \"{export_format}\" --embedder_model \"{embedder_model}\" --embedder_model_custom \"{embedder_model_custom}\" --formant_shifting \"{formant_shift}\" --formant_qfrency \"{formant_qfrency}\" --formant_timbre \"{formant_timbre}\" --post_process \"{post_process}\" --reverb \"{reverb}\" --pitch_shift \"{pitch_shift}\" --limiter \"{limiter}\" --gain \"{gain}\" --distortion \"{distortion}\" --chorus \"{chorus}\" --bitcrush \"{bitcrush}\" --clipping \"{clipping}\" --compressor \"{compressor}\" --delay \"{delay}\" --reverb_room_size \"{reverb_room_size}\" --reverb_damping \"{reverb_damping}\" --reverb_wet_gain \"{reverb_wet_gain}\" --reverb_dry_gain \"{reverb_dry_gain}\" --reverb_width \"{reverb_width}\" --reverb_freeze_mode \"{reverb_freeze_mode}\" --pitch_shift_semitones \"{pitch_shift_semitones}\" --limiter_threshold \"{limiter_threshold}\" --limiter_release_time \"{limiter_release_time}\" --gain_db \"{gain_db}\" --distortion_gain \"{distortion_gain}\" --chorus_rate \"{chorus_rate}\" --chorus_depth \"{chorus_depth}\" --chorus_center_delay \"{chorus_center_delay}\" --chorus_feedback \"{chorus_feedback}\" --chorus_mix \"{chorus_mix}\" --bitcrush_bit_depth \"{bitcrush_bit_depth}\" --clipping_threshold \"{clipping_threshold}\" --compressor_threshold \"{compressor_threshold}\" --compressor_ratio \"{compressor_ratio}\" --compressor_attack \"{compressor_attack}\" --compressor_release \"{compressor_release}\" --delay_seconds \"{delay_seconds}\" --delay_feedback \"{delay_feedback}\" --delay_mix \"{delay_mix}\"\n",
+    "!python core.py infer --pitch \"{f0_up_key}\" --volume_envelope \"{rms_mix_rate}\" --index_rate \"{index_rate}\" --hop_length \"{hop_length}\" --protect \"{protect}\" --f0_autotune \"{f0_autotune}\" --f0_method \"{f0_method}\" --input_path \"{input_path}\" --output_path \"{output_path}\" --pth_path \"{pth_file}\" --index_path \"{index_file}\" --split_audio \"{split_audio}\" --clean_audio \"{clean_audio}\" --clean_strength \"{clean_strength}\" --export_format \"{export_format}\" --embedder_model \"{embedder_model}\" --embedder_model_custom \"{embedder_model_custom}\" --formant_shifting \"{formant_shift}\" --formant_qfrency \"{formant_qfrency}\" --formant_timbre \"{formant_timbre}\" --post_process \"{post_process}\" --reverb \"{reverb}\" --pitch_shift \"{pitch_shift}\" --limiter \"{limiter}\" --gain \"{gain}\" --distortion \"{distortion}\" --chorus \"{chorus}\" --bitcrush \"{bitcrush}\" --clipping \"{clipping}\" --compressor \"{compressor}\" --delay \"{delay}\" --reverb_room_size \"{reverb_room_size}\" --reverb_damping \"{reverb_damping}\" --reverb_wet_gain \"{reverb_wet_gain}\" --reverb_dry_gain \"{reverb_dry_gain}\" --reverb_width \"{reverb_width}\" --reverb_freeze_mode \"{reverb_freeze_mode}\" --pitch_shift_semitones \"{pitch_shift_semitones}\" --limiter_threshold \"{limiter_threshold}\" --limiter_release_time \"{limiter_release_time}\" --gain_db \"{gain_db}\" --distortion_gain \"{distortion_gain}\" --chorus_rate \"{chorus_rate}\" --chorus_depth \"{chorus_depth}\" --chorus_center_delay \"{chorus_center_delay}\" --chorus_feedback \"{chorus_feedback}\" --chorus_mix \"{chorus_mix}\" --bitcrush_bit_depth \"{bitcrush_bit_depth}\" --clipping_threshold \"{clipping_threshold}\" --compressor_threshold \"{compressor_threshold}\" --compressor_ratio \"{compressor_ratio}\" --compressor_attack \"{compressor_attack}\" --compressor_release \"{compressor_release}\" --delay_seconds \"{delay_seconds}\" --delay_feedback \"{delay_feedback}\" --delay_mix \"{delay_mix}\"\n",
     "\n",
     "from IPython.display import Audio, display, clear_output\n",
     "\n",

diff --git a/assets/presets/Default.json b/assets/presets/Default.json
@@ -1,6 +1,5 @@
 {
     "pitch": 0,
-    "filter_radius": 3,
     "index_rate": 0.75,
     "rms_mix_rate": 1,
     "protect": 0.5

diff --git a/assets/presets/Good for Anything.json b/assets/presets/Good for Anything.json
@@ -1,6 +1,5 @@
 {
     "pitch": 0,
-    "filter_radius": 3,
     "index_rate": 0.75,
     "rms_mix_rate": 0.3,
     "protect": 0.33

diff --git a/assets/presets/Music.json b/assets/presets/Music.json
@@ -1,6 +1,5 @@
 {
     "pitch": 0,
-    "filter_radius": 3,
     "index_rate": 0.75,
     "rms_mix_rate": 0.25,
     "protect": 0.33

diff --git a/core.py b/core.py
@@ -52,7 +52,6 @@ def get_config():
 # Infer
 def run_infer_script(
     pitch: int,
-    filter_radius: int,
     index_rate: float,
     volume_envelope: int,
     protect: float,
@@ -118,7 +117,6 @@ def run_infer_script(
         "model_path": pth_path,
         "index_path": index_path,
         "pitch": pitch,
-        "filter_radius": filter_radius,
         "index_rate": index_rate,
         "volume_envelope": volume_envelope,
         "protect": protect,
@@ -188,7 +186,6 @@ def run_infer_script(
 # Batch infer
 def run_batch_infer_script(
     pitch: int,
-    filter_radius: int,
     index_rate: float,
     volume_envelope: int,
     protect: float,
@@ -254,7 +251,6 @@ def run_batch_infer_script(
         "model_path": pth_path,
         "index_path": index_path,
         "pitch": pitch,
-        "filter_radius": filter_radius,
         "index_rate": index_rate,
         "volume_envelope": volume_envelope,
         "protect": protect,
@@ -327,7 +323,6 @@ def run_tts_script(
     tts_voice: str,
     tts_rate: int,
     pitch: int,
-    filter_radius: int,
     index_rate: float,
     volume_envelope: int,
     protect: float,
@@ -374,7 +369,6 @@ def run_tts_script(
     infer_pipeline = import_voice_converter()
     infer_pipeline.convert_audio(
         pitch=pitch,
-        filter_radius=filter_radius,
         index_rate=index_rate,
         volume_envelope=volume_envelope,
         protect=protect,
@@ -647,14 +641,6 @@ def parse_arguments():
         choices=range(-24, 25),
         default=0,
     )
-    filter_radius_description = "Apply median filtering to the extracted pitch values if this value is greater than or equal to three. This can help reduce breathiness in the output audio."
-    infer_parser.add_argument(
-        "--filter_radius",
-        type=int,
-        help=filter_radius_description,
-        choices=range(11),
-        default=3,
-    )
     index_rate_description = "Control the influence of the index file on the output. Higher values mean stronger influence. Lower values can help reduce artifacts but may result in less accurate voice cloning."
     infer_parser.add_argument(
         "--index_rate",
@@ -1179,13 +1165,6 @@ def parse_arguments():
         choices=range(-24, 25),
         default=0,
     )
-    batch_infer_parser.add_argument(
-        "--filter_radius",
-        type=int,
-        help=filter_radius_description,
-        choices=range(11),
-        default=3,
-    )
     batch_infer_parser.add_argument(
         "--index_rate",
         type=float,
@@ -1666,13 +1645,6 @@ def parse_arguments():
         choices=range(-24, 25),
         default=0,
     )
-    tts_parser.add_argument(
-        "--filter_radius",
-        type=int,
-        help=filter_radius_description,
-        choices=range(11),
-        default=3,
-    )
     tts_parser.add_argument(
         "--index_rate",
         type=float,
@@ -2193,7 +2165,6 @@ def main():
         if args.mode == "infer":
             run_infer_script(
                 pitch=args.pitch,
-                filter_radius=args.filter_radius,
                 index_rate=args.index_rate,
                 volume_envelope=args.volume_envelope,
                 protect=args.protect,
@@ -2256,7 +2227,6 @@ def main():
         elif args.mode == "batch_infer":
             run_batch_infer_script(
                 pitch=args.pitch,
-                filter_radius=args.filter_radius,
                 index_rate=args.index_rate,
                 volume_envelope=args.volume_envelope,
                 protect=args.protect,
@@ -2323,7 +2293,6 @@ def main():
                 tts_voice=args.tts_voice,
                 tts_rate=args.tts_rate,
                 pitch=args.pitch,
-                filter_radius=args.filter_radius,
                 index_rate=args.index_rate,
                 volume_envelope=args.volume_envelope,
                 protect=args.protect,

diff --git a/rvc/infer/infer.py b/rvc/infer/infer.py
@@ -206,7 +206,6 @@ def convert_audio(
         split_audio: bool = False,
         f0_autotune: bool = False,
         f0_autotune_strength: float = 1,
-        filter_radius: int = 3,
         embedder_model: str = "contentvec",
         embedder_model_custom: str = None,
         clean_audio: bool = False,
@@ -222,7 +221,6 @@ def convert_audio(
 
         Args:
             pitch (int): Key for F0 up-sampling.
-            filter_radius (int): Radius for filtering.
             index_rate (float): Rate for index matching.
             volume_envelope (int): RMS mix rate.
             protect (float): Protection rate for certain audio segments.
@@ -299,7 +297,6 @@ def convert_audio(
                     file_index=file_index,
                     index_rate=index_rate,
                     pitch_guidance=self.use_f0,
-                    filter_radius=filter_radius,
                     volume_envelope=volume_envelope,
                     version=self.version,
                     protect=protect,

diff --git a/rvc/infer/pipeline.py b/rvc/infer/pipeline.py
@@ -325,7 +325,6 @@ def get_f0(
         p_len,
         pitch,
         f0_method,
-        filter_radius,
         hop_length,
         f0_autotune,
         f0_autotune_strength,
@@ -340,7 +339,6 @@ def get_f0(
             p_len: Desired length of the F0 output.
             pitch: Key to adjust the pitch of the F0 contour.
             f0_method: Method to use for F0 estimation (e.g., "crepe").
-            filter_radius: Radius for median filtering the F0 contour.
             hop_length: Hop length for F0 estimation methods.
             f0_autotune: Whether to apply autotune to the F0 contour.
             inp_f0: Optional input F0 contour to use instead of estimating.
@@ -513,7 +511,6 @@ def pipeline(
         file_index,
         index_rate,
         pitch_guidance,
-        filter_radius,
         volume_envelope,
         version,
         protect,
@@ -536,7 +533,6 @@ def pipeline(
             file_index: Path to the FAISS index file for speaker embedding retrieval.
             index_rate: Blending rate for speaker embedding retrieval.
             pitch_guidance: Whether to use pitch guidance during voice conversion.
-            filter_radius: Radius for median filtering the F0 contour.
             tgt_sr: Target sampling rate for the output audio.
             resample_sr: Resampling rate for the output audio.
             volume_envelope: Blending rate for adjusting the RMS level of the output audio.
@@ -595,7 +591,6 @@ def pipeline(
                 p_len,
                 pitch,
                 f0_method,
-                filter_radius,
                 hop_length,
                 f0_autotune,
                 f0_autotune_strength,

diff --git a/tabs/inference/inference.py b/tabs/inference/inference.py
@@ -94,7 +94,6 @@ def update_sliders(preset):
         values = json.load(json_file)
     return (
         values["pitch"],
-        values["filter_radius"],
         values["index_rate"],
         values["rms_mix_rate"],
         values["protect"],
@@ -123,23 +122,22 @@ def import_presets(file_path):
     return presets
 
 
-def get_presets_data(pitch, filter_radius, index_rate, rms_mix_rate, protect):
+def get_presets_data(pitch, index_rate, rms_mix_rate, protect):
     return {
         "pitch": pitch,
-        "filter_radius": filter_radius,
         "index_rate": index_rate,
         "rms_mix_rate": rms_mix_rate,
         "protect": protect,
     }
 
 
 def export_presets_button(
-    preset_name, pitch, filter_radius, index_rate, rms_mix_rate, protect
+    preset_name, pitch, index_rate, rms_mix_rate, protect
 ):
     if preset_name:
         file_path = os.path.join(PRESETS_DIR, f"{preset_name}.json")
         presets_data = get_presets_data(
-            pitch, filter_radius, index_rate, rms_mix_rate, protect
+            pitch, index_rate, rms_mix_rate, protect
         )
         with open(file_path, "w", encoding="utf-8") as json_file:
             json.dump(presets_data, json_file, ensure_ascii=False, indent=4)
@@ -874,17 +872,6 @@ def inference_tab():
                     value=0,
                     interactive=True,
                 )
-                filter_radius = gr.Slider(
-                    minimum=0,
-                    maximum=7,
-                    label=i18n("Filter Radius"),
-                    info=i18n(
-                        "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration."
-                    ),
-                    value=3,
-                    step=1,
-                    interactive=True,
-                )
                 index_rate = gr.Slider(
                     minimum=0,
                     maximum=1,
@@ -920,7 +907,6 @@ def inference_tab():
                     inputs=preset_dropdown,
                     outputs=[
                         pitch,
-                        filter_radius,
                         index_rate,
                         rms_mix_rate,
                         protect,
@@ -931,7 +917,6 @@ def inference_tab():
                     inputs=[
                         preset_name_input,
                         pitch,
-                        filter_radius,
                         index_rate,
                         rms_mix_rate,
                         protect,
@@ -1518,17 +1503,6 @@ def enforce_terms_batch(terms_accepted, *args):
                     value=0,
                     interactive=True,
                 )
-                filter_radius_batch = gr.Slider(
-                    minimum=0,
-                    maximum=7,
-                    label=i18n("Filter Radius"),
-                    info=i18n(
-                        "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration."
-                    ),
-                    value=3,
-                    step=1,
-                    interactive=True,
-                )
                 index_rate_batch = gr.Slider(
                     minimum=0,
                     maximum=1,
@@ -1564,7 +1538,6 @@ def enforce_terms_batch(terms_accepted, *args):
                     inputs=preset_dropdown,
                     outputs=[
                         pitch_batch,
-                        filter_radius_batch,
                         index_rate_batch,
                         rms_mix_rate_batch,
                         protect_batch,
@@ -1575,7 +1548,6 @@ def enforce_terms_batch(terms_accepted, *args):
                     inputs=[
                         preset_name_input,
                         pitch,
-                        filter_radius,
                         index_rate,
                         rms_mix_rate,
                         protect,
@@ -2053,7 +2025,6 @@ def delay_visible(checkbox):
         inputs=[
             terms_checkbox,
             pitch,
-            filter_radius,
             index_rate,
             rms_mix_rate,
             protect,
@@ -2120,7 +2091,6 @@ def delay_visible(checkbox):
         inputs=[
             terms_checkbox_batch,
             pitch_batch,
-            filter_radius_batch,
             index_rate_batch,
             rms_mix_rate_batch,
             protect_batch,

diff --git a/tabs/tts/tts.py b/tabs/tts/tts.py
@@ -217,17 +217,6 @@ def tts_tab():
                 value=0,
                 interactive=True,
             )
-            filter_radius = gr.Slider(
-                minimum=0,
-                maximum=7,
-                label=i18n("Filter Radius"),
-                info=i18n(
-                    "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration."
-                ),
-                value=3,
-                step=1,
-                interactive=True,
-            )
             index_rate = gr.Slider(
                 minimum=0,
                 maximum=1,
@@ -407,7 +396,6 @@ def toggle_visible_embedder_custom(embedder_model):
             tts_voice,
             tts_rate,
             pitch,
-            filter_radius,
             index_rate,
             rms_mix_rate,
             protect,