Added PyDub

2025-12-10 22:35:45 -05:00 · 2024-06-04 03:19:24 +02:00 · 2024-06-04 03:19:24 +02:00 · 53479d4700
commit 53479d4700
parent 446181aa58
11 changed files with 3344 additions and 0 deletions
--- a/sbapp/pydub/silence.py
+++ b/sbapp/pydub/silence.py
@ -0,0 +1,182 @@
+"""
+Various functions for finding/manipulating silence in AudioSegments
+"""
+import itertools
+
+from .utils import db_to_float
+
+
+def detect_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, seek_step=1):
+    """
+    Returns a list of all silent sections [start, end] in milliseconds of audio_segment.
+    Inverse of detect_nonsilent()
+
+    audio_segment - the segment to find silence in
+    min_silence_len - the minimum length for any silent section
+    silence_thresh - the upper bound for how quiet is silent in dFBS
+    seek_step - step size for interating over the segment in ms
+    """
+    seg_len = len(audio_segment)
+
+    # you can't have a silent portion of a sound that is longer than the sound
+    if seg_len < min_silence_len:
+        return []
+
+    # convert silence threshold to a float value (so we can compare it to rms)
+    silence_thresh = db_to_float(silence_thresh) * audio_segment.max_possible_amplitude
+
+    # find silence and add start and end indicies to the to_cut list
+    silence_starts = []
+
+    # check successive (1 sec by default) chunk of sound for silence
+    # try a chunk at every "seek step" (or every chunk for a seek step == 1)
+    last_slice_start = seg_len - min_silence_len
+    slice_starts = range(0, last_slice_start + 1, seek_step)
+
+    # guarantee last_slice_start is included in the range
+    # to make sure the last portion of the audio is searched
+    if last_slice_start % seek_step:
+        slice_starts = itertools.chain(slice_starts, [last_slice_start])
+
+    for i in slice_starts:
+        audio_slice = audio_segment[i:i + min_silence_len]
+        if audio_slice.rms <= silence_thresh:
+            silence_starts.append(i)
+
+    # short circuit when there is no silence
+    if not silence_starts:
+        return []
+
+    # combine the silence we detected into ranges (start ms - end ms)
+    silent_ranges = []
+
+    prev_i = silence_starts.pop(0)
+    current_range_start = prev_i
+
+    for silence_start_i in silence_starts:
+        continuous = (silence_start_i == prev_i + seek_step)
+
+        # sometimes two small blips are enough for one particular slice to be
+        # non-silent, despite the silence all running together. Just combine
+        # the two overlapping silent ranges.
+        silence_has_gap = silence_start_i > (prev_i + min_silence_len)
+
+        if not continuous and silence_has_gap:
+            silent_ranges.append([current_range_start,
+                                  prev_i + min_silence_len])
+            current_range_start = silence_start_i
+        prev_i = silence_start_i
+
+    silent_ranges.append([current_range_start,
+                          prev_i + min_silence_len])
+
+    return silent_ranges
+
+
+def detect_nonsilent(audio_segment, min_silence_len=1000, silence_thresh=-16, seek_step=1):
+    """
+    Returns a list of all nonsilent sections [start, end] in milliseconds of audio_segment.
+    Inverse of detect_silent()
+
+    audio_segment - the segment to find silence in
+    min_silence_len - the minimum length for any silent section
+    silence_thresh - the upper bound for how quiet is silent in dFBS
+    seek_step - step size for interating over the segment in ms
+    """
+    silent_ranges = detect_silence(audio_segment, min_silence_len, silence_thresh, seek_step)
+    len_seg = len(audio_segment)
+
+    # if there is no silence, the whole thing is nonsilent
+    if not silent_ranges:
+        return [[0, len_seg]]
+
+    # short circuit when the whole audio segment is silent
+    if silent_ranges[0][0] == 0 and silent_ranges[0][1] == len_seg:
+        return []
+
+    prev_end_i = 0
+    nonsilent_ranges = []
+    for start_i, end_i in silent_ranges:
+        nonsilent_ranges.append([prev_end_i, start_i])
+        prev_end_i = end_i
+
+    if end_i != len_seg:
+        nonsilent_ranges.append([prev_end_i, len_seg])
+
+    if nonsilent_ranges[0] == [0, 0]:
+        nonsilent_ranges.pop(0)
+
+    return nonsilent_ranges
+
+
+def split_on_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, keep_silence=100,
+                     seek_step=1):
+    """
+    Returns list of audio segments from splitting audio_segment on silent sections
+
+    audio_segment - original pydub.AudioSegment() object
+
+    min_silence_len - (in ms) minimum length of a silence to be used for
+        a split. default: 1000ms
+
+    silence_thresh - (in dBFS) anything quieter than this will be
+        considered silence. default: -16dBFS
+
+    keep_silence - (in ms or True/False) leave some silence at the beginning
+        and end of the chunks. Keeps the sound from sounding like it
+        is abruptly cut off.
+        When the length of the silence is less than the keep_silence duration
+        it is split evenly between the preceding and following non-silent
+        segments.
+        If True is specified, all the silence is kept, if False none is kept.
+        default: 100ms
+
+    seek_step - step size for interating over the segment in ms
+    """
+
+    # from the itertools documentation
+    def pairwise(iterable):
+        "s -> (s0,s1), (s1,s2), (s2, s3), ..."
+        a, b = itertools.tee(iterable)
+        next(b, None)
+        return zip(a, b)
+
+    if isinstance(keep_silence, bool):
+        keep_silence = len(audio_segment) if keep_silence else 0
+
+    output_ranges = [
+        [ start - keep_silence, end + keep_silence ]
+        for (start,end)
+            in detect_nonsilent(audio_segment, min_silence_len, silence_thresh, seek_step)
+    ]
+
+    for range_i, range_ii in pairwise(output_ranges):
+        last_end = range_i[1]
+        next_start = range_ii[0]
+        if next_start < last_end:
+            range_i[1] = (last_end+next_start)//2
+            range_ii[0] = range_i[1]
+
+    return [
+        audio_segment[ max(start,0) : min(end,len(audio_segment)) ]
+        for start,end in output_ranges
+    ]
+
+
+def detect_leading_silence(sound, silence_threshold=-50.0, chunk_size=10):
+    """
+    Returns the millisecond/index that the leading silence ends.
+
+    audio_segment - the segment to find silence in
+    silence_threshold - the upper bound for how quiet is silent in dFBS
+    chunk_size - chunk size for interating over the segment in ms
+    """
+    trim_ms = 0 # ms
+    assert chunk_size > 0 # to avoid infinite loop
+    while sound[trim_ms:trim_ms+chunk_size].dBFS < silence_threshold and trim_ms < len(sound):
+        trim_ms += chunk_size
+
+    # if there is no end it should return the length of the segment
+    return min(trim_ms, len(sound))
+
+