Sideband/sbapp/pyogg/opus_buffered_encoder.py

408 lines
15 KiB
Python
Raw Normal View History

2024-06-03 01:54:58 +02:00
import copy
import ctypes
from typing import Optional, ByteString, List, Tuple, Callable
import warnings
from . import opus
from .opus_encoder import OpusEncoder
from .pyogg_error import PyOggError
class OpusBufferedEncoder(OpusEncoder):
# TODO: This could be made more efficient. We don't need a
# deque. Instead, we need only sufficient PCM storage for one
# whole packet. We know the size of the packet thanks to
# set_frame_size().
def __init__(self) -> None:
super().__init__()
self._frame_size_ms: Optional[float] = None
self._frame_size_bytes: Optional[int] = None
# Buffer contains the bytes required for the next
# frame.
self._buffer: Optional[ctypes.Array] = None
# Location of the next free byte in the buffer
self._buffer_index = 0
def set_frame_size(self, frame_size: float) -> None:
""" Set the desired frame duration (in milliseconds).
Valid options are 2.5, 5, 10, 20, 40, or 60ms.
"""
# Ensure the frame size is valid. Compare frame size in
# units of 0.1ms to avoid floating point comparison
if int(frame_size*10) not in [25, 50, 100, 200, 400, 600]:
raise PyOggError(
"Frame size ({:f}) not one of ".format(frame_size)+
"the acceptable values"
)
self._frame_size_ms = frame_size
self._calc_frame_size()
def set_sampling_frequency(self, samples_per_second: int) -> None:
super().set_sampling_frequency(samples_per_second)
self._calc_frame_size()
def buffered_encode(self,
pcm_bytes: memoryview,
flush: bool = False,
callback: Callable[[memoryview,int,bool],None] = None
) -> List[Tuple[memoryview, int, bool]]:
"""Gets encoded packets and their number of samples.
This method returns a list, where each item in the list is
a tuple. The first item in the tuple is an Opus-encoded
frame stored as a bytes-object. The second item in the
tuple is the number of samples encoded (excluding
silence).
If `callback` is supplied then this method will instead
return an empty list but call the callback for every
Opus-encoded frame that would have been returned as a
list. This option has the desireable property of
eliminating the copying of the encoded packets, which is
required in order to form a list. The callback should
take two arguments, the encoded frame (a Python bytes
object) and the number of samples encoded per channel (an
int). The user must either process or copy the data as
the data may be overwritten once the callback terminates.
"""
# If there's no work to do return immediately
if len(pcm_bytes) == 0 and flush == False:
return [] # no work to do
# Sanity checks
if self._frame_size_ms is None:
raise PyOggError("Frame size must be set before encoding")
assert self._frame_size_bytes is not None
assert self._channels is not None
assert self._buffer is not None
assert self._buffer_index is not None
# Local variable initialisation
results = []
pcm_index = 0
pcm_len = len(pcm_bytes)
# 'Cast' memoryview of PCM to ctypes Array
Buffer = ctypes.c_ubyte * len(pcm_bytes)
try:
pcm_ctypes = Buffer.from_buffer(pcm_bytes)
except TypeError:
warnings.warn(
"Because PCM was read-only, an extra memory "+
"copy was required; consider storing PCM in "+
"writable memory (for example, bytearray "+
"rather than bytes)."
)
pcm_ctypes = Buffer.from_buffer(pcm_bytes)
# Either store the encoded packet to return at the end of the
# method or immediately call the callback with the encoded
# packet.
def store_or_callback(encoded_packet: memoryview,
samples: int,
end_of_stream: bool = False) -> None:
if callback is None:
# Store the result
results.append((
encoded_packet,
samples,
end_of_stream
))
else:
# Call the callback
callback(
encoded_packet,
samples,
end_of_stream
)
# Fill the remainder of the buffer with silence and encode it.
# The associated number of samples are only that of actual
# data, not the added silence.
def flush_buffer() -> None:
# Sanity checks to satisfy mypy
assert self._buffer_index is not None
assert self._channels is not None
assert self._buffer is not None
# If the buffer is already empty, we have no work to do
if self._buffer_index == 0:
return
# Store the number of samples currently in the buffer
samples = (
self._buffer_index
// self._channels
// ctypes.sizeof(opus.opus_int16)
)
# Fill the buffer with silence
ctypes.memset(
# destination
ctypes.byref(self._buffer, self._buffer_index),
# value
0,
# count
len(self._buffer) - self._buffer_index
)
# Encode the PCM
# As at 2020-11-05, mypy is unaware that ctype Arrays
# support the buffer protocol.
encoded_packet = self.encode(memoryview(self._buffer)) # type: ignore
# Either store the encoded packet or call the
# callback
store_or_callback(encoded_packet, samples, True)
# Copy the data remaining from the provided PCM into the
# buffer. Flush if required.
def copy_insufficient_data() -> None:
# Sanity checks to satisfy mypy
assert self._buffer is not None
# Calculate remaining data
remaining_data = len(pcm_bytes) - pcm_index
# Copy the data into the buffer.
ctypes.memmove(
# destination
ctypes.byref(self._buffer, self._buffer_index),
# source
ctypes.byref(pcm_ctypes, pcm_index),
# count
remaining_data
)
self._buffer_index += remaining_data
# If we've been asked to flush the buffer then do so
if flush:
flush_buffer()
# Loop through the provided PCM and the current buffer,
# encoding as we have full packets.
while True:
# There are two possibilities at this point: either we
# have previously unencoded data still in the buffer or we
# do not
if self._buffer_index == 0:
# We do not have unencoded data
# We are free to progress through the PCM that has
# been provided encoding frames without copying any
# bytes. Once there is insufficient data remaining
# for a complete frame, that data should be copied
# into the buffer and we have finished.
if pcm_len - pcm_index > self._frame_size_bytes:
# We have enough data remaining in the provided
# PCM to encode more than an entire frame without
# copying any data. Unfortunately, splicing a
# ctypes array copies the array. To avoid the
# copy we use memoryview see
# https://mattgwwalker.wordpress.com/2020/12/12/python-ctypes-slicing/
frame_data = memoryview(pcm_bytes)[
pcm_index:pcm_index+self._frame_size_bytes
]
# Update the PCM index
pcm_index += self._frame_size_bytes
# Store number of samples (per channel) of actual
# data
samples = (
len(frame_data)
// self._channels
// ctypes.sizeof(opus.opus_int16)
)
# Encode the PCM
encoded_packet = super().encode(frame_data)
# Either store the encoded packet or call the
# callback
store_or_callback(encoded_packet, samples)
else:
# We do not have enough data to fill a frame while
# still having data left over. Copy the data into
# the buffer.
copy_insufficient_data()
return results
else:
# We have unencoded data.
# Copy the provided PCM into the buffer (up until the
# buffer is full). If we can fill it, then we can
# encode the filled buffer and continue. If we can't
# fill it then we've finished.
data_required = len(self._buffer) - self._buffer_index
if pcm_len > data_required:
# We have sufficient data to fill the buffer and
# have data left over. Copy data into the buffer.
assert pcm_index == 0
remaining = len(self._buffer) - self._buffer_index
ctypes.memmove(
# destination
ctypes.byref(self._buffer, self._buffer_index),
# source
pcm_ctypes,
# count
remaining
)
pcm_index += remaining
self._buffer_index += remaining
assert self._buffer_index == len(self._buffer)
# Encode the PCM
encoded_packet = super().encode(
# Memoryviews of ctypes do work, even though
# mypy complains.
memoryview(self._buffer) # type: ignore
)
# Store number of samples (per channel) of actual
# data
samples = (
self._buffer_index
// self._channels
// ctypes.sizeof(opus.opus_int16)
)
# We've now processed the buffer
self._buffer_index = 0
# Either store the encoded packet or call the
# callback
store_or_callback(encoded_packet, samples)
else:
# We have insufficient data to fill the buffer
# while still having data left over. Copy the
# data into the buffer.
copy_insufficient_data()
return results
def _calc_frame_size(self):
"""Calculates the number of bytes in a frame.
If the frame size (in milliseconds) and the number of
samples per seconds have already been specified, then the
frame size in bytes is set. Otherwise, this method does
nothing.
The frame size is measured in bytes required to store the
sample.
"""
if (self._frame_size_ms is None
or self._samples_per_second is None):
return
self._frame_size_bytes = (
self._frame_size_ms
* self._samples_per_second
// 1000
* ctypes.sizeof(opus.opus_int16)
* self._channels
)
# Allocate space for the buffer
Buffer = ctypes.c_ubyte * self._frame_size_bytes
self._buffer = Buffer()
def _get_next_frame(self, add_silence=False):
"""Gets the next Opus-encoded frame.
Returns a tuple where the first item is the Opus-encoded
frame and the second item is the number of encoded samples
(per channel).
Returns None if insufficient data is available.
"""
next_frame = bytes()
samples = 0
# Ensure frame size has been specified
if self._frame_size_bytes is None:
raise PyOggError(
"Desired frame size hasn't been set. Perhaps "+
"encode() was called before set_frame_size() "+
"and set_sampling_frequency()?"
)
# Check if there's insufficient data in the buffer to fill
# a frame.
if self._frame_size_bytes > self._buffer_size:
if len(self._buffer) == 0:
# No data at all in buffer
return None
if add_silence:
# Get all remaining data
while len(self._buffer) != 0:
next_frame += self._buffer.popleft()
self._buffer_size = 0
# Store number of samples (per channel) of actual
# data
samples = (
len(next_frame)
// self._channels
// ctypes.sizeof(opus.opus_int16)
)
# Fill remainder of frame with silence
bytes_remaining = self._frame_size_bytes - len(next_frame)
next_frame += b'\x00' * bytes_remaining
return (next_frame, samples)
else:
# Insufficient data to fill a frame and we're not
# adding silence
return None
bytes_remaining = self._frame_size_bytes
while bytes_remaining > 0:
if len(self._buffer[0]) <= bytes_remaining:
# Take the whole first item
buffer_ = self._buffer.popleft()
next_frame += buffer_
bytes_remaining -= len(buffer_)
self._buffer_size -= len(buffer_)
else:
# Take only part of the buffer
# TODO: This could be more efficiently
# implemented. Rather than appending back the
# remaining data, we could just update an index
# saying where we were up to in regards to the
# first entry of the buffer.
buffer_ = self._buffer.popleft()
next_frame += buffer_[:bytes_remaining]
self._buffer_size -= bytes_remaining
# And put the unused part back into the buffer
self._buffer.appendleft(buffer_[bytes_remaining:])
bytes_remaining = 0
# Calculate number of samples (per channel)
samples = (
len(next_frame)
// self._channels
// ctypes.sizeof(opus.opus_int16)
)
return (next_frame, samples)