Merge pull request #307 from galgeek/ytdlp_tmpdir

brozzler yt-dlp should be able to specify a separate tempdir
This commit is contained in:
Barbara Miller 2024-12-12 13:59:51 -08:00 committed by GitHub
commit bfc4aac76a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 22 additions and 1 deletions

View File

@ -483,6 +483,7 @@ class Browser:
skip_extract_outlinks=False,
skip_visit_hashtags=False,
skip_youtube_dl=False,
ytdlp_tmpdir="/tmp",
simpler404=False,
page_timeout=300,
behavior_timeout=900,

View File

@ -265,6 +265,12 @@ def brozzle_page(argv=None):
arg_parser.add_argument(
"--skip-youtube-dl", dest="skip_youtube_dl", action="store_true"
)
arg_parser.add_argument(
"--ytdlp_tmpdir",
dest="ytdlp_tmpdir",
default="/tmp",
help="specify a temp dir for ytdlp; defaults to /tmp",
)
arg_parser.add_argument("--simpler404", dest="simpler404", action="store_true")
add_common_options(arg_parser, argv)
@ -292,6 +298,7 @@ def brozzle_page(argv=None):
skip_extract_outlinks=args.skip_extract_outlinks,
skip_visit_hashtags=args.skip_visit_hashtags,
skip_youtube_dl=args.skip_youtube_dl,
ytdlp_tmpdir=args.ytdlp_tmpdir,
simpler404=args.simpler404,
screenshot_full_page=args.screenshot_full_page,
download_throughput=args.download_throughput,
@ -533,6 +540,12 @@ def brozzler_worker(argv=None):
action="store_true",
help=argparse.SUPPRESS,
)
arg_parser.add_argument(
"--ytdlp_tmpdir",
dest="ytdlp_tmpdir",
default="/tmp",
help="argparse.SUPPRESS",
)
arg_parser.add_argument(
"--stealth",
dest="stealth",
@ -613,6 +626,7 @@ def brozzler_worker(argv=None):
skip_extract_outlinks=args.skip_extract_outlinks,
skip_visit_hashtags=args.skip_visit_hashtags,
skip_youtube_dl=args.skip_youtube_dl,
ytdlp_tmpdir=args.ytdlp_tmpdir,
stealth=args.stealth,
metrics_port=args.metrics_port,
registry_url=args.registry_url,

View File

@ -64,6 +64,7 @@ class BrozzlerWorker:
skip_extract_outlinks=False,
skip_visit_hashtags=False,
skip_youtube_dl=False,
ytdlp_tmpdir="/tmp",
simpler404=False,
screenshot_full_page=False,
page_timeout=300,
@ -89,6 +90,7 @@ class BrozzlerWorker:
self._skip_extract_outlinks = skip_extract_outlinks
self._skip_visit_hashtags = skip_visit_hashtags
self._skip_youtube_dl = skip_youtube_dl
self._ytdlp_tmpdir = ytdlp_tmpdir
self._simpler404 = simpler404
self._screenshot_full_page = screenshot_full_page
self._page_timeout = page_timeout
@ -445,6 +447,7 @@ class BrozzlerWorker:
skip_extract_outlinks=self._skip_extract_outlinks,
skip_visit_hashtags=self._skip_visit_hashtags,
skip_youtube_dl=self._skip_youtube_dl,
ytdlp_tmpdir=self._ytdlp_tmpdir,
simpler404=self._simpler404,
screenshot_full_page=self._screenshot_full_page,
page_timeout=self._page_timeout,

View File

@ -422,7 +422,10 @@ def do_youtube_dl(worker, site, page):
Returns:
`list` of `str`: outlink urls
"""
with tempfile.TemporaryDirectory(prefix="brzl-ydl-") as tempdir:
with tempfile.TemporaryDirectory(
prefix="brzl-ydl-", dir=worker._ytdlp_tmpdir
) as tempdir:
logging.info("tempdir for yt-dlp: %s", tempdir)
ydl = _build_youtube_dl(worker, tempdir, site, page)
ie_result = _try_youtube_dl(worker, ydl, site, page)
outlinks = set()