mirror of
https://github.com/markqvist/Sideband.git
synced 2025-07-23 23:10:59 -04:00
Added PyDub
This commit is contained in:
parent
446181aa58
commit
53479d4700
11 changed files with 3344 additions and 0 deletions
341
sbapp/pydub/effects.py
Normal file
341
sbapp/pydub/effects.py
Normal file
|
@ -0,0 +1,341 @@
|
|||
import sys
|
||||
import math
|
||||
import array
|
||||
from .utils import (
|
||||
db_to_float,
|
||||
ratio_to_db,
|
||||
register_pydub_effect,
|
||||
make_chunks,
|
||||
audioop,
|
||||
get_min_max_value
|
||||
)
|
||||
from .silence import split_on_silence
|
||||
from .exceptions import TooManyMissingFrames, InvalidDuration
|
||||
|
||||
if sys.version_info >= (3, 0):
|
||||
xrange = range
|
||||
|
||||
|
||||
@register_pydub_effect
|
||||
def apply_mono_filter_to_each_channel(seg, filter_fn):
|
||||
n_channels = seg.channels
|
||||
|
||||
channel_segs = seg.split_to_mono()
|
||||
channel_segs = [filter_fn(channel_seg) for channel_seg in channel_segs]
|
||||
|
||||
out_data = seg.get_array_of_samples()
|
||||
for channel_i, channel_seg in enumerate(channel_segs):
|
||||
for sample_i, sample in enumerate(channel_seg.get_array_of_samples()):
|
||||
index = (sample_i * n_channels) + channel_i
|
||||
out_data[index] = sample
|
||||
|
||||
return seg._spawn(out_data)
|
||||
|
||||
|
||||
@register_pydub_effect
|
||||
def normalize(seg, headroom=0.1):
|
||||
"""
|
||||
headroom is how close to the maximum volume to boost the signal up to (specified in dB)
|
||||
"""
|
||||
peak_sample_val = seg.max
|
||||
|
||||
# if the max is 0, this audio segment is silent, and can't be normalized
|
||||
if peak_sample_val == 0:
|
||||
return seg
|
||||
|
||||
target_peak = seg.max_possible_amplitude * db_to_float(-headroom)
|
||||
|
||||
needed_boost = ratio_to_db(target_peak / peak_sample_val)
|
||||
return seg.apply_gain(needed_boost)
|
||||
|
||||
|
||||
@register_pydub_effect
|
||||
def speedup(seg, playback_speed=1.5, chunk_size=150, crossfade=25):
|
||||
# we will keep audio in 150ms chunks since one waveform at 20Hz is 50ms long
|
||||
# (20 Hz is the lowest frequency audible to humans)
|
||||
|
||||
# portion of AUDIO TO KEEP. if playback speed is 1.25 we keep 80% (0.8) and
|
||||
# discard 20% (0.2)
|
||||
atk = 1.0 / playback_speed
|
||||
|
||||
if playback_speed < 2.0:
|
||||
# throwing out more than half the audio - keep 50ms chunks
|
||||
ms_to_remove_per_chunk = int(chunk_size * (1 - atk) / atk)
|
||||
else:
|
||||
# throwing out less than half the audio - throw out 50ms chunks
|
||||
ms_to_remove_per_chunk = int(chunk_size)
|
||||
chunk_size = int(atk * chunk_size / (1 - atk))
|
||||
|
||||
# the crossfade cannot be longer than the amount of audio we're removing
|
||||
crossfade = min(crossfade, ms_to_remove_per_chunk - 1)
|
||||
|
||||
# DEBUG
|
||||
#print("chunk: {0}, rm: {1}".format(chunk_size, ms_to_remove_per_chunk))
|
||||
|
||||
chunks = make_chunks(seg, chunk_size + ms_to_remove_per_chunk)
|
||||
if len(chunks) < 2:
|
||||
raise Exception("Could not speed up AudioSegment, it was too short {2:0.2f}s for the current settings:\n{0}ms chunks at {1:0.1f}x speedup".format(
|
||||
chunk_size, playback_speed, seg.duration_seconds))
|
||||
|
||||
# we'll actually truncate a bit less than we calculated to make up for the
|
||||
# crossfade between chunks
|
||||
ms_to_remove_per_chunk -= crossfade
|
||||
|
||||
# we don't want to truncate the last chunk since it is not guaranteed to be
|
||||
# the full chunk length
|
||||
last_chunk = chunks[-1]
|
||||
chunks = [chunk[:-ms_to_remove_per_chunk] for chunk in chunks[:-1]]
|
||||
|
||||
out = chunks[0]
|
||||
for chunk in chunks[1:]:
|
||||
out = out.append(chunk, crossfade=crossfade)
|
||||
|
||||
out += last_chunk
|
||||
return out
|
||||
|
||||
|
||||
@register_pydub_effect
|
||||
def strip_silence(seg, silence_len=1000, silence_thresh=-16, padding=100):
|
||||
if padding > silence_len:
|
||||
raise InvalidDuration("padding cannot be longer than silence_len")
|
||||
|
||||
chunks = split_on_silence(seg, silence_len, silence_thresh, padding)
|
||||
crossfade = padding / 2
|
||||
|
||||
if not len(chunks):
|
||||
return seg[0:0]
|
||||
|
||||
seg = chunks[0]
|
||||
for chunk in chunks[1:]:
|
||||
seg = seg.append(chunk, crossfade=crossfade)
|
||||
|
||||
return seg
|
||||
|
||||
|
||||
@register_pydub_effect
|
||||
def compress_dynamic_range(seg, threshold=-20.0, ratio=4.0, attack=5.0, release=50.0):
|
||||
"""
|
||||
Keyword Arguments:
|
||||
|
||||
threshold - default: -20.0
|
||||
Threshold in dBFS. default of -20.0 means -20dB relative to the
|
||||
maximum possible volume. 0dBFS is the maximum possible value so
|
||||
all values for this argument sould be negative.
|
||||
|
||||
ratio - default: 4.0
|
||||
Compression ratio. Audio louder than the threshold will be
|
||||
reduced to 1/ratio the volume. A ratio of 4.0 is equivalent to
|
||||
a setting of 4:1 in a pro-audio compressor like the Waves C1.
|
||||
|
||||
attack - default: 5.0
|
||||
Attack in milliseconds. How long it should take for the compressor
|
||||
to kick in once the audio has exceeded the threshold.
|
||||
|
||||
release - default: 50.0
|
||||
Release in milliseconds. How long it should take for the compressor
|
||||
to stop compressing after the audio has falled below the threshold.
|
||||
|
||||
|
||||
For an overview of Dynamic Range Compression, and more detailed explanation
|
||||
of the related terminology, see:
|
||||
|
||||
http://en.wikipedia.org/wiki/Dynamic_range_compression
|
||||
"""
|
||||
|
||||
thresh_rms = seg.max_possible_amplitude * db_to_float(threshold)
|
||||
|
||||
look_frames = int(seg.frame_count(ms=attack))
|
||||
def rms_at(frame_i):
|
||||
return seg.get_sample_slice(frame_i - look_frames, frame_i).rms
|
||||
def db_over_threshold(rms):
|
||||
if rms == 0: return 0.0
|
||||
db = ratio_to_db(rms / thresh_rms)
|
||||
return max(db, 0)
|
||||
|
||||
output = []
|
||||
|
||||
# amount to reduce the volume of the audio by (in dB)
|
||||
attenuation = 0.0
|
||||
|
||||
attack_frames = seg.frame_count(ms=attack)
|
||||
release_frames = seg.frame_count(ms=release)
|
||||
for i in xrange(int(seg.frame_count())):
|
||||
rms_now = rms_at(i)
|
||||
|
||||
# with a ratio of 4.0 this means the volume will exceed the threshold by
|
||||
# 1/4 the amount (of dB) that it would otherwise
|
||||
max_attenuation = (1 - (1.0 / ratio)) * db_over_threshold(rms_now)
|
||||
|
||||
attenuation_inc = max_attenuation / attack_frames
|
||||
attenuation_dec = max_attenuation / release_frames
|
||||
|
||||
if rms_now > thresh_rms and attenuation <= max_attenuation:
|
||||
attenuation += attenuation_inc
|
||||
attenuation = min(attenuation, max_attenuation)
|
||||
else:
|
||||
attenuation -= attenuation_dec
|
||||
attenuation = max(attenuation, 0)
|
||||
|
||||
frame = seg.get_frame(i)
|
||||
if attenuation != 0.0:
|
||||
frame = audioop.mul(frame,
|
||||
seg.sample_width,
|
||||
db_to_float(-attenuation))
|
||||
|
||||
output.append(frame)
|
||||
|
||||
return seg._spawn(data=b''.join(output))
|
||||
|
||||
|
||||
# Invert the phase of the signal.
|
||||
|
||||
@register_pydub_effect
|
||||
|
||||
def invert_phase(seg, channels=(1, 1)):
|
||||
"""
|
||||
channels- specifies which channel (left or right) to reverse the phase of.
|
||||
Note that mono AudioSegments will become stereo.
|
||||
"""
|
||||
if channels == (1, 1):
|
||||
inverted = audioop.mul(seg._data, seg.sample_width, -1.0)
|
||||
return seg._spawn(data=inverted)
|
||||
|
||||
else:
|
||||
if seg.channels == 2:
|
||||
left, right = seg.split_to_mono()
|
||||
else:
|
||||
raise Exception("Can't implicitly convert an AudioSegment with " + str(seg.channels) + " channels to stereo.")
|
||||
|
||||
if channels == (1, 0):
|
||||
left = left.invert_phase()
|
||||
else:
|
||||
right = right.invert_phase()
|
||||
|
||||
return seg.from_mono_audiosegments(left, right)
|
||||
|
||||
|
||||
|
||||
# High and low pass filters based on implementation found on Stack Overflow:
|
||||
# http://stackoverflow.com/questions/13882038/implementing-simple-high-and-low-pass-filters-in-c
|
||||
|
||||
@register_pydub_effect
|
||||
def low_pass_filter(seg, cutoff):
|
||||
"""
|
||||
cutoff - Frequency (in Hz) where higher frequency signal will begin to
|
||||
be reduced by 6dB per octave (doubling in frequency) above this point
|
||||
"""
|
||||
RC = 1.0 / (cutoff * 2 * math.pi)
|
||||
dt = 1.0 / seg.frame_rate
|
||||
|
||||
alpha = dt / (RC + dt)
|
||||
|
||||
original = seg.get_array_of_samples()
|
||||
filteredArray = array.array(seg.array_type, original)
|
||||
|
||||
frame_count = int(seg.frame_count())
|
||||
|
||||
last_val = [0] * seg.channels
|
||||
for i in range(seg.channels):
|
||||
last_val[i] = filteredArray[i] = original[i]
|
||||
|
||||
for i in range(1, frame_count):
|
||||
for j in range(seg.channels):
|
||||
offset = (i * seg.channels) + j
|
||||
last_val[j] = last_val[j] + (alpha * (original[offset] - last_val[j]))
|
||||
filteredArray[offset] = int(last_val[j])
|
||||
|
||||
return seg._spawn(data=filteredArray)
|
||||
|
||||
|
||||
@register_pydub_effect
|
||||
def high_pass_filter(seg, cutoff):
|
||||
"""
|
||||
cutoff - Frequency (in Hz) where lower frequency signal will begin to
|
||||
be reduced by 6dB per octave (doubling in frequency) below this point
|
||||
"""
|
||||
RC = 1.0 / (cutoff * 2 * math.pi)
|
||||
dt = 1.0 / seg.frame_rate
|
||||
|
||||
alpha = RC / (RC + dt)
|
||||
|
||||
minval, maxval = get_min_max_value(seg.sample_width * 8)
|
||||
|
||||
original = seg.get_array_of_samples()
|
||||
filteredArray = array.array(seg.array_type, original)
|
||||
|
||||
frame_count = int(seg.frame_count())
|
||||
|
||||
last_val = [0] * seg.channels
|
||||
for i in range(seg.channels):
|
||||
last_val[i] = filteredArray[i] = original[i]
|
||||
|
||||
for i in range(1, frame_count):
|
||||
for j in range(seg.channels):
|
||||
offset = (i * seg.channels) + j
|
||||
offset_minus_1 = ((i-1) * seg.channels) + j
|
||||
|
||||
last_val[j] = alpha * (last_val[j] + original[offset] - original[offset_minus_1])
|
||||
filteredArray[offset] = int(min(max(last_val[j], minval), maxval))
|
||||
|
||||
return seg._spawn(data=filteredArray)
|
||||
|
||||
|
||||
@register_pydub_effect
|
||||
def pan(seg, pan_amount):
|
||||
"""
|
||||
pan_amount should be between -1.0 (100% left) and +1.0 (100% right)
|
||||
|
||||
When pan_amount == 0.0 the left/right balance is not changed.
|
||||
|
||||
Panning does not alter the *perceived* loundness, but since loudness
|
||||
is decreasing on one side, the other side needs to get louder to
|
||||
compensate. When panned hard left, the left channel will be 3dB louder.
|
||||
"""
|
||||
if not -1.0 <= pan_amount <= 1.0:
|
||||
raise ValueError("pan_amount should be between -1.0 (100% left) and +1.0 (100% right)")
|
||||
|
||||
max_boost_db = ratio_to_db(2.0)
|
||||
boost_db = abs(pan_amount) * max_boost_db
|
||||
|
||||
boost_factor = db_to_float(boost_db)
|
||||
reduce_factor = db_to_float(max_boost_db) - boost_factor
|
||||
|
||||
reduce_db = ratio_to_db(reduce_factor)
|
||||
|
||||
# Cut boost in half (max boost== 3dB) - in reality 2 speakers
|
||||
# do not sum to a full 6 dB.
|
||||
boost_db = boost_db / 2.0
|
||||
|
||||
if pan_amount < 0:
|
||||
return seg.apply_gain_stereo(boost_db, reduce_db)
|
||||
else:
|
||||
return seg.apply_gain_stereo(reduce_db, boost_db)
|
||||
|
||||
|
||||
@register_pydub_effect
|
||||
def apply_gain_stereo(seg, left_gain=0.0, right_gain=0.0):
|
||||
"""
|
||||
left_gain - amount of gain to apply to the left channel (in dB)
|
||||
right_gain - amount of gain to apply to the right channel (in dB)
|
||||
|
||||
note: mono audio segments will be converted to stereo
|
||||
"""
|
||||
if seg.channels == 1:
|
||||
left = right = seg
|
||||
elif seg.channels == 2:
|
||||
left, right = seg.split_to_mono()
|
||||
|
||||
l_mult_factor = db_to_float(left_gain)
|
||||
r_mult_factor = db_to_float(right_gain)
|
||||
|
||||
left_data = audioop.mul(left._data, left.sample_width, l_mult_factor)
|
||||
left_data = audioop.tostereo(left_data, left.sample_width, 1, 0)
|
||||
|
||||
right_data = audioop.mul(right._data, right.sample_width, r_mult_factor)
|
||||
right_data = audioop.tostereo(right_data, right.sample_width, 0, 1)
|
||||
|
||||
output = audioop.add(left_data, right_data, seg.sample_width)
|
||||
|
||||
return seg._spawn(data=output,
|
||||
overrides={'channels': 2,
|
||||
'frame_width': 2 * seg.sample_width})
|
Loading…
Add table
Add a link
Reference in a new issue