mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-23 16:19:49 -05:00
Merge pull request #307 from galgeek/ytdlp_tmpdir
brozzler yt-dlp should be able to specify a separate tempdir
This commit is contained in:
commit
bfc4aac76a
@ -483,6 +483,7 @@ class Browser:
|
|||||||
skip_extract_outlinks=False,
|
skip_extract_outlinks=False,
|
||||||
skip_visit_hashtags=False,
|
skip_visit_hashtags=False,
|
||||||
skip_youtube_dl=False,
|
skip_youtube_dl=False,
|
||||||
|
ytdlp_tmpdir="/tmp",
|
||||||
simpler404=False,
|
simpler404=False,
|
||||||
page_timeout=300,
|
page_timeout=300,
|
||||||
behavior_timeout=900,
|
behavior_timeout=900,
|
||||||
|
@ -265,6 +265,12 @@ def brozzle_page(argv=None):
|
|||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
"--skip-youtube-dl", dest="skip_youtube_dl", action="store_true"
|
"--skip-youtube-dl", dest="skip_youtube_dl", action="store_true"
|
||||||
)
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"--ytdlp_tmpdir",
|
||||||
|
dest="ytdlp_tmpdir",
|
||||||
|
default="/tmp",
|
||||||
|
help="specify a temp dir for ytdlp; defaults to /tmp",
|
||||||
|
)
|
||||||
arg_parser.add_argument("--simpler404", dest="simpler404", action="store_true")
|
arg_parser.add_argument("--simpler404", dest="simpler404", action="store_true")
|
||||||
add_common_options(arg_parser, argv)
|
add_common_options(arg_parser, argv)
|
||||||
|
|
||||||
@ -292,6 +298,7 @@ def brozzle_page(argv=None):
|
|||||||
skip_extract_outlinks=args.skip_extract_outlinks,
|
skip_extract_outlinks=args.skip_extract_outlinks,
|
||||||
skip_visit_hashtags=args.skip_visit_hashtags,
|
skip_visit_hashtags=args.skip_visit_hashtags,
|
||||||
skip_youtube_dl=args.skip_youtube_dl,
|
skip_youtube_dl=args.skip_youtube_dl,
|
||||||
|
ytdlp_tmpdir=args.ytdlp_tmpdir,
|
||||||
simpler404=args.simpler404,
|
simpler404=args.simpler404,
|
||||||
screenshot_full_page=args.screenshot_full_page,
|
screenshot_full_page=args.screenshot_full_page,
|
||||||
download_throughput=args.download_throughput,
|
download_throughput=args.download_throughput,
|
||||||
@ -533,6 +540,12 @@ def brozzler_worker(argv=None):
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
help=argparse.SUPPRESS,
|
help=argparse.SUPPRESS,
|
||||||
)
|
)
|
||||||
|
arg_parser.add_argument(
|
||||||
|
"--ytdlp_tmpdir",
|
||||||
|
dest="ytdlp_tmpdir",
|
||||||
|
default="/tmp",
|
||||||
|
help="argparse.SUPPRESS",
|
||||||
|
)
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
"--stealth",
|
"--stealth",
|
||||||
dest="stealth",
|
dest="stealth",
|
||||||
@ -613,6 +626,7 @@ def brozzler_worker(argv=None):
|
|||||||
skip_extract_outlinks=args.skip_extract_outlinks,
|
skip_extract_outlinks=args.skip_extract_outlinks,
|
||||||
skip_visit_hashtags=args.skip_visit_hashtags,
|
skip_visit_hashtags=args.skip_visit_hashtags,
|
||||||
skip_youtube_dl=args.skip_youtube_dl,
|
skip_youtube_dl=args.skip_youtube_dl,
|
||||||
|
ytdlp_tmpdir=args.ytdlp_tmpdir,
|
||||||
stealth=args.stealth,
|
stealth=args.stealth,
|
||||||
metrics_port=args.metrics_port,
|
metrics_port=args.metrics_port,
|
||||||
registry_url=args.registry_url,
|
registry_url=args.registry_url,
|
||||||
|
@ -64,6 +64,7 @@ class BrozzlerWorker:
|
|||||||
skip_extract_outlinks=False,
|
skip_extract_outlinks=False,
|
||||||
skip_visit_hashtags=False,
|
skip_visit_hashtags=False,
|
||||||
skip_youtube_dl=False,
|
skip_youtube_dl=False,
|
||||||
|
ytdlp_tmpdir="/tmp",
|
||||||
simpler404=False,
|
simpler404=False,
|
||||||
screenshot_full_page=False,
|
screenshot_full_page=False,
|
||||||
page_timeout=300,
|
page_timeout=300,
|
||||||
@ -89,6 +90,7 @@ class BrozzlerWorker:
|
|||||||
self._skip_extract_outlinks = skip_extract_outlinks
|
self._skip_extract_outlinks = skip_extract_outlinks
|
||||||
self._skip_visit_hashtags = skip_visit_hashtags
|
self._skip_visit_hashtags = skip_visit_hashtags
|
||||||
self._skip_youtube_dl = skip_youtube_dl
|
self._skip_youtube_dl = skip_youtube_dl
|
||||||
|
self._ytdlp_tmpdir = ytdlp_tmpdir
|
||||||
self._simpler404 = simpler404
|
self._simpler404 = simpler404
|
||||||
self._screenshot_full_page = screenshot_full_page
|
self._screenshot_full_page = screenshot_full_page
|
||||||
self._page_timeout = page_timeout
|
self._page_timeout = page_timeout
|
||||||
@ -445,6 +447,7 @@ class BrozzlerWorker:
|
|||||||
skip_extract_outlinks=self._skip_extract_outlinks,
|
skip_extract_outlinks=self._skip_extract_outlinks,
|
||||||
skip_visit_hashtags=self._skip_visit_hashtags,
|
skip_visit_hashtags=self._skip_visit_hashtags,
|
||||||
skip_youtube_dl=self._skip_youtube_dl,
|
skip_youtube_dl=self._skip_youtube_dl,
|
||||||
|
ytdlp_tmpdir=self._ytdlp_tmpdir,
|
||||||
simpler404=self._simpler404,
|
simpler404=self._simpler404,
|
||||||
screenshot_full_page=self._screenshot_full_page,
|
screenshot_full_page=self._screenshot_full_page,
|
||||||
page_timeout=self._page_timeout,
|
page_timeout=self._page_timeout,
|
||||||
|
@ -422,7 +422,10 @@ def do_youtube_dl(worker, site, page):
|
|||||||
Returns:
|
Returns:
|
||||||
`list` of `str`: outlink urls
|
`list` of `str`: outlink urls
|
||||||
"""
|
"""
|
||||||
with tempfile.TemporaryDirectory(prefix="brzl-ydl-") as tempdir:
|
with tempfile.TemporaryDirectory(
|
||||||
|
prefix="brzl-ydl-", dir=worker._ytdlp_tmpdir
|
||||||
|
) as tempdir:
|
||||||
|
logging.info("tempdir for yt-dlp: %s", tempdir)
|
||||||
ydl = _build_youtube_dl(worker, tempdir, site, page)
|
ydl = _build_youtube_dl(worker, tempdir, site, page)
|
||||||
ie_result = _try_youtube_dl(worker, ydl, site, page)
|
ie_result = _try_youtube_dl(worker, ydl, site, page)
|
||||||
outlinks = set()
|
outlinks = set()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user