mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-11 16:00:38 -04:00
use a thread-local callback in monkey-patched
finish_frag_download, instead of locking around monkey-patching, to allow different threads to youtube-dl concurrently, but still not interfere with each other
This commit is contained in:
parent
82cf5c6dbb
commit
e5536182dc
1 changed files with 31 additions and 19 deletions
|
@ -30,7 +30,21 @@ import doublethink
|
||||||
import datetime
|
import datetime
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
global_ydl_lock = threading.Lock()
|
thread_local = threading.local()
|
||||||
|
_orig__finish_frag_download = youtube_dl.downloader.fragment.FragmentFD._finish_frag_download
|
||||||
|
def _finish_frag_download(ffd_self, ctx):
|
||||||
|
'''
|
||||||
|
We monkey-patch this youtube-dl internal method `_finish_frag_download()`
|
||||||
|
because it gets called after downloading the last segment of a segmented
|
||||||
|
video, which is a good time to upload the stitched-up video that youtube-dl
|
||||||
|
creates for us to warcprox. We have it call a thread-local callback
|
||||||
|
since different threads may be youtube-dl'ing at the same time.
|
||||||
|
'''
|
||||||
|
result = _orig__finish_frag_download(ffd_self, ctx)
|
||||||
|
if hasattr(thread_local, 'finish_frag_download_callback'):
|
||||||
|
thread_local.finish_frag_download_callback(ffd_self, ctx)
|
||||||
|
return result
|
||||||
|
youtube_dl.downloader.fragment.FragmentFD._finish_frag_download = _finish_frag_download
|
||||||
|
|
||||||
_orig_webpage_read_content = youtube_dl.extractor.generic.GenericIE._webpage_read_content
|
_orig_webpage_read_content = youtube_dl.extractor.generic.GenericIE._webpage_read_content
|
||||||
def _webpage_read_content(self, *args, **kwargs):
|
def _webpage_read_content(self, *args, **kwargs):
|
||||||
|
@ -111,9 +125,10 @@ def _build_youtube_dl(worker, destdir, site):
|
||||||
|
|
||||||
- keeps track of urls fetched using a `YoutubeDLSpy`
|
- keeps track of urls fetched using a `YoutubeDLSpy`
|
||||||
- periodically updates `site.last_claimed` in rethinkdb
|
- periodically updates `site.last_claimed` in rethinkdb
|
||||||
- if brozzling through warcprox and downloading fragmented (DASH) videos,
|
- if brozzling through warcprox and downloading segmented videos (e.g.
|
||||||
pushes the stitched together video to warcprox using a
|
HLS), pushes the stitched-up video created by youtube-dl to warcprox
|
||||||
WARCPROX_WRITE_RECORD request
|
using a WARCPROX_WRITE_RECORD request
|
||||||
|
- adds some logging
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
worker (brozzler.BrozzlerWorker): the calling brozzler worker
|
worker (brozzler.BrozzlerWorker): the calling brozzler worker
|
||||||
|
@ -199,21 +214,18 @@ def _build_youtube_dl(worker, destdir, site):
|
||||||
})
|
})
|
||||||
|
|
||||||
def process_info(self, info_dict):
|
def process_info(self, info_dict):
|
||||||
# lock this section to prevent race condition between threads that
|
'''
|
||||||
# want to monkey patch _finish_frag_download() at the same time
|
See comment above on `_finish_frag_download()`
|
||||||
with global_ydl_lock:
|
'''
|
||||||
_orig__finish_frag_download = youtube_dl.downloader.fragment.FragmentFD._finish_frag_download
|
def ffd_callback(ffd_self, ctx):
|
||||||
|
logging.info('%s')
|
||||||
def _finish_frag_download(ffd_self, ctx):
|
if worker._using_warcprox(site):
|
||||||
_orig__finish_frag_download(ffd_self, ctx)
|
self._push_stitched_up_vid_to_warcprox(site, info_dict, ctx)
|
||||||
if worker._using_warcprox(site):
|
try:
|
||||||
self._push_stitched_up_vid_to_warcprox(site, info_dict, ctx)
|
thread_local.finish_frag_download_callback = ffd_callback
|
||||||
|
return super().process_info(info_dict)
|
||||||
try:
|
finally:
|
||||||
youtube_dl.downloader.fragment.FragmentFD._finish_frag_download = _finish_frag_download
|
delattr(thread_local, 'finish_frag_download_callback')
|
||||||
return super().process_info(info_dict)
|
|
||||||
finally:
|
|
||||||
youtube_dl.downloader.fragment.FragmentFD._finish_frag_download = _orig__finish_frag_download
|
|
||||||
|
|
||||||
def maybe_heartbeat_site_last_claimed(*args, **kwargs):
|
def maybe_heartbeat_site_last_claimed(*args, **kwargs):
|
||||||
# in case youtube-dl takes a long time, heartbeat site.last_claimed
|
# in case youtube-dl takes a long time, heartbeat site.last_claimed
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue