mirror of
https://github.com/markqvist/Sideband.git
synced 2025-08-02 03:26:25 -04:00
Added PyDub
This commit is contained in:
parent
446181aa58
commit
53479d4700
11 changed files with 3344 additions and 0 deletions
182
sbapp/pydub/silence.py
Normal file
182
sbapp/pydub/silence.py
Normal file
|
@ -0,0 +1,182 @@
|
|||
"""
|
||||
Various functions for finding/manipulating silence in AudioSegments
|
||||
"""
|
||||
import itertools
|
||||
|
||||
from .utils import db_to_float
|
||||
|
||||
|
||||
def detect_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, seek_step=1):
|
||||
"""
|
||||
Returns a list of all silent sections [start, end] in milliseconds of audio_segment.
|
||||
Inverse of detect_nonsilent()
|
||||
|
||||
audio_segment - the segment to find silence in
|
||||
min_silence_len - the minimum length for any silent section
|
||||
silence_thresh - the upper bound for how quiet is silent in dFBS
|
||||
seek_step - step size for interating over the segment in ms
|
||||
"""
|
||||
seg_len = len(audio_segment)
|
||||
|
||||
# you can't have a silent portion of a sound that is longer than the sound
|
||||
if seg_len < min_silence_len:
|
||||
return []
|
||||
|
||||
# convert silence threshold to a float value (so we can compare it to rms)
|
||||
silence_thresh = db_to_float(silence_thresh) * audio_segment.max_possible_amplitude
|
||||
|
||||
# find silence and add start and end indicies to the to_cut list
|
||||
silence_starts = []
|
||||
|
||||
# check successive (1 sec by default) chunk of sound for silence
|
||||
# try a chunk at every "seek step" (or every chunk for a seek step == 1)
|
||||
last_slice_start = seg_len - min_silence_len
|
||||
slice_starts = range(0, last_slice_start + 1, seek_step)
|
||||
|
||||
# guarantee last_slice_start is included in the range
|
||||
# to make sure the last portion of the audio is searched
|
||||
if last_slice_start % seek_step:
|
||||
slice_starts = itertools.chain(slice_starts, [last_slice_start])
|
||||
|
||||
for i in slice_starts:
|
||||
audio_slice = audio_segment[i:i + min_silence_len]
|
||||
if audio_slice.rms <= silence_thresh:
|
||||
silence_starts.append(i)
|
||||
|
||||
# short circuit when there is no silence
|
||||
if not silence_starts:
|
||||
return []
|
||||
|
||||
# combine the silence we detected into ranges (start ms - end ms)
|
||||
silent_ranges = []
|
||||
|
||||
prev_i = silence_starts.pop(0)
|
||||
current_range_start = prev_i
|
||||
|
||||
for silence_start_i in silence_starts:
|
||||
continuous = (silence_start_i == prev_i + seek_step)
|
||||
|
||||
# sometimes two small blips are enough for one particular slice to be
|
||||
# non-silent, despite the silence all running together. Just combine
|
||||
# the two overlapping silent ranges.
|
||||
silence_has_gap = silence_start_i > (prev_i + min_silence_len)
|
||||
|
||||
if not continuous and silence_has_gap:
|
||||
silent_ranges.append([current_range_start,
|
||||
prev_i + min_silence_len])
|
||||
current_range_start = silence_start_i
|
||||
prev_i = silence_start_i
|
||||
|
||||
silent_ranges.append([current_range_start,
|
||||
prev_i + min_silence_len])
|
||||
|
||||
return silent_ranges
|
||||
|
||||
|
||||
def detect_nonsilent(audio_segment, min_silence_len=1000, silence_thresh=-16, seek_step=1):
|
||||
"""
|
||||
Returns a list of all nonsilent sections [start, end] in milliseconds of audio_segment.
|
||||
Inverse of detect_silent()
|
||||
|
||||
audio_segment - the segment to find silence in
|
||||
min_silence_len - the minimum length for any silent section
|
||||
silence_thresh - the upper bound for how quiet is silent in dFBS
|
||||
seek_step - step size for interating over the segment in ms
|
||||
"""
|
||||
silent_ranges = detect_silence(audio_segment, min_silence_len, silence_thresh, seek_step)
|
||||
len_seg = len(audio_segment)
|
||||
|
||||
# if there is no silence, the whole thing is nonsilent
|
||||
if not silent_ranges:
|
||||
return [[0, len_seg]]
|
||||
|
||||
# short circuit when the whole audio segment is silent
|
||||
if silent_ranges[0][0] == 0 and silent_ranges[0][1] == len_seg:
|
||||
return []
|
||||
|
||||
prev_end_i = 0
|
||||
nonsilent_ranges = []
|
||||
for start_i, end_i in silent_ranges:
|
||||
nonsilent_ranges.append([prev_end_i, start_i])
|
||||
prev_end_i = end_i
|
||||
|
||||
if end_i != len_seg:
|
||||
nonsilent_ranges.append([prev_end_i, len_seg])
|
||||
|
||||
if nonsilent_ranges[0] == [0, 0]:
|
||||
nonsilent_ranges.pop(0)
|
||||
|
||||
return nonsilent_ranges
|
||||
|
||||
|
||||
def split_on_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, keep_silence=100,
|
||||
seek_step=1):
|
||||
"""
|
||||
Returns list of audio segments from splitting audio_segment on silent sections
|
||||
|
||||
audio_segment - original pydub.AudioSegment() object
|
||||
|
||||
min_silence_len - (in ms) minimum length of a silence to be used for
|
||||
a split. default: 1000ms
|
||||
|
||||
silence_thresh - (in dBFS) anything quieter than this will be
|
||||
considered silence. default: -16dBFS
|
||||
|
||||
keep_silence - (in ms or True/False) leave some silence at the beginning
|
||||
and end of the chunks. Keeps the sound from sounding like it
|
||||
is abruptly cut off.
|
||||
When the length of the silence is less than the keep_silence duration
|
||||
it is split evenly between the preceding and following non-silent
|
||||
segments.
|
||||
If True is specified, all the silence is kept, if False none is kept.
|
||||
default: 100ms
|
||||
|
||||
seek_step - step size for interating over the segment in ms
|
||||
"""
|
||||
|
||||
# from the itertools documentation
|
||||
def pairwise(iterable):
|
||||
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
|
||||
a, b = itertools.tee(iterable)
|
||||
next(b, None)
|
||||
return zip(a, b)
|
||||
|
||||
if isinstance(keep_silence, bool):
|
||||
keep_silence = len(audio_segment) if keep_silence else 0
|
||||
|
||||
output_ranges = [
|
||||
[ start - keep_silence, end + keep_silence ]
|
||||
for (start,end)
|
||||
in detect_nonsilent(audio_segment, min_silence_len, silence_thresh, seek_step)
|
||||
]
|
||||
|
||||
for range_i, range_ii in pairwise(output_ranges):
|
||||
last_end = range_i[1]
|
||||
next_start = range_ii[0]
|
||||
if next_start < last_end:
|
||||
range_i[1] = (last_end+next_start)//2
|
||||
range_ii[0] = range_i[1]
|
||||
|
||||
return [
|
||||
audio_segment[ max(start,0) : min(end,len(audio_segment)) ]
|
||||
for start,end in output_ranges
|
||||
]
|
||||
|
||||
|
||||
def detect_leading_silence(sound, silence_threshold=-50.0, chunk_size=10):
|
||||
"""
|
||||
Returns the millisecond/index that the leading silence ends.
|
||||
|
||||
audio_segment - the segment to find silence in
|
||||
silence_threshold - the upper bound for how quiet is silent in dFBS
|
||||
chunk_size - chunk size for interating over the segment in ms
|
||||
"""
|
||||
trim_ms = 0 # ms
|
||||
assert chunk_size > 0 # to avoid infinite loop
|
||||
while sound[trim_ms:trim_ms+chunk_size].dBFS < silence_threshold and trim_ms < len(sound):
|
||||
trim_ms += chunk_size
|
||||
|
||||
# if there is no end it should return the length of the segment
|
||||
return min(trim_ms, len(sound))
|
||||
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue