mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-22 23:59:47 -05:00
Merge pull request #307 from galgeek/ytdlp_tmpdir
brozzler yt-dlp should be able to specify a separate tempdir
This commit is contained in:
commit
bfc4aac76a
@ -483,6 +483,7 @@ class Browser:
|
||||
skip_extract_outlinks=False,
|
||||
skip_visit_hashtags=False,
|
||||
skip_youtube_dl=False,
|
||||
ytdlp_tmpdir="/tmp",
|
||||
simpler404=False,
|
||||
page_timeout=300,
|
||||
behavior_timeout=900,
|
||||
|
@ -265,6 +265,12 @@ def brozzle_page(argv=None):
|
||||
arg_parser.add_argument(
|
||||
"--skip-youtube-dl", dest="skip_youtube_dl", action="store_true"
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
"--ytdlp_tmpdir",
|
||||
dest="ytdlp_tmpdir",
|
||||
default="/tmp",
|
||||
help="specify a temp dir for ytdlp; defaults to /tmp",
|
||||
)
|
||||
arg_parser.add_argument("--simpler404", dest="simpler404", action="store_true")
|
||||
add_common_options(arg_parser, argv)
|
||||
|
||||
@ -292,6 +298,7 @@ def brozzle_page(argv=None):
|
||||
skip_extract_outlinks=args.skip_extract_outlinks,
|
||||
skip_visit_hashtags=args.skip_visit_hashtags,
|
||||
skip_youtube_dl=args.skip_youtube_dl,
|
||||
ytdlp_tmpdir=args.ytdlp_tmpdir,
|
||||
simpler404=args.simpler404,
|
||||
screenshot_full_page=args.screenshot_full_page,
|
||||
download_throughput=args.download_throughput,
|
||||
@ -533,6 +540,12 @@ def brozzler_worker(argv=None):
|
||||
action="store_true",
|
||||
help=argparse.SUPPRESS,
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
"--ytdlp_tmpdir",
|
||||
dest="ytdlp_tmpdir",
|
||||
default="/tmp",
|
||||
help="argparse.SUPPRESS",
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
"--stealth",
|
||||
dest="stealth",
|
||||
@ -613,6 +626,7 @@ def brozzler_worker(argv=None):
|
||||
skip_extract_outlinks=args.skip_extract_outlinks,
|
||||
skip_visit_hashtags=args.skip_visit_hashtags,
|
||||
skip_youtube_dl=args.skip_youtube_dl,
|
||||
ytdlp_tmpdir=args.ytdlp_tmpdir,
|
||||
stealth=args.stealth,
|
||||
metrics_port=args.metrics_port,
|
||||
registry_url=args.registry_url,
|
||||
|
@ -64,6 +64,7 @@ class BrozzlerWorker:
|
||||
skip_extract_outlinks=False,
|
||||
skip_visit_hashtags=False,
|
||||
skip_youtube_dl=False,
|
||||
ytdlp_tmpdir="/tmp",
|
||||
simpler404=False,
|
||||
screenshot_full_page=False,
|
||||
page_timeout=300,
|
||||
@ -89,6 +90,7 @@ class BrozzlerWorker:
|
||||
self._skip_extract_outlinks = skip_extract_outlinks
|
||||
self._skip_visit_hashtags = skip_visit_hashtags
|
||||
self._skip_youtube_dl = skip_youtube_dl
|
||||
self._ytdlp_tmpdir = ytdlp_tmpdir
|
||||
self._simpler404 = simpler404
|
||||
self._screenshot_full_page = screenshot_full_page
|
||||
self._page_timeout = page_timeout
|
||||
@ -445,6 +447,7 @@ class BrozzlerWorker:
|
||||
skip_extract_outlinks=self._skip_extract_outlinks,
|
||||
skip_visit_hashtags=self._skip_visit_hashtags,
|
||||
skip_youtube_dl=self._skip_youtube_dl,
|
||||
ytdlp_tmpdir=self._ytdlp_tmpdir,
|
||||
simpler404=self._simpler404,
|
||||
screenshot_full_page=self._screenshot_full_page,
|
||||
page_timeout=self._page_timeout,
|
||||
|
@ -422,7 +422,10 @@ def do_youtube_dl(worker, site, page):
|
||||
Returns:
|
||||
`list` of `str`: outlink urls
|
||||
"""
|
||||
with tempfile.TemporaryDirectory(prefix="brzl-ydl-") as tempdir:
|
||||
with tempfile.TemporaryDirectory(
|
||||
prefix="brzl-ydl-", dir=worker._ytdlp_tmpdir
|
||||
) as tempdir:
|
||||
logging.info("tempdir for yt-dlp: %s", tempdir)
|
||||
ydl = _build_youtube_dl(worker, tempdir, site, page)
|
||||
ie_result = _try_youtube_dl(worker, ydl, site, page)
|
||||
outlinks = set()
|
||||
|
Loading…
x
Reference in New Issue
Block a user