diff --git a/brozzler/browser.py b/brozzler/browser.py index 4b602f1..aa8893e 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -483,6 +483,7 @@ class Browser: skip_extract_outlinks=False, skip_visit_hashtags=False, skip_youtube_dl=False, + ytdlp_tmpdir = '/tmp', simpler404=False, page_timeout=300, behavior_timeout=900, diff --git a/brozzler/cli.py b/brozzler/cli.py index 3927bfe..43bdd7a 100755 --- a/brozzler/cli.py +++ b/brozzler/cli.py @@ -265,6 +265,12 @@ def brozzle_page(argv=None): arg_parser.add_argument( "--skip-youtube-dl", dest="skip_youtube_dl", action="store_true" ) + arg_parser.add_argument( + "--ytdlp_tmpdir", + dest="ytdlp_tmpdir", + default="/tmp", + help="specify a temp dir for ytdlp; defaults to /tmp", + ) arg_parser.add_argument("--simpler404", dest="simpler404", action="store_true") add_common_options(arg_parser, argv) @@ -292,6 +298,7 @@ def brozzle_page(argv=None): skip_extract_outlinks=args.skip_extract_outlinks, skip_visit_hashtags=args.skip_visit_hashtags, skip_youtube_dl=args.skip_youtube_dl, + ytdlp_tmpdir = args.ytdlp_tmpdir, simpler404=args.simpler404, screenshot_full_page=args.screenshot_full_page, download_throughput=args.download_throughput, @@ -533,6 +540,12 @@ def brozzler_worker(argv=None): action="store_true", help=argparse.SUPPRESS, ) + arg_parser.add_argument( + "--ytdlp_tmpdir", + dest="ytdlp_tmpdir", + default="/tmp", + help="argparse.SUPPRESS", + ) arg_parser.add_argument( "--stealth", dest="stealth", @@ -598,6 +611,7 @@ def brozzler_worker(argv=None): skip_extract_outlinks=args.skip_extract_outlinks, skip_visit_hashtags=args.skip_visit_hashtags, skip_youtube_dl=args.skip_youtube_dl, + ytdlp_tmpdir=args.ytdlp_tmpdir, stealth=args.stealth, metrics_port=args.metrics_port, registry_url=args.registry_url, diff --git a/brozzler/worker.py b/brozzler/worker.py index 6829170..b919a61 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -64,6 +64,7 @@ class BrozzlerWorker: skip_extract_outlinks=False, skip_visit_hashtags=False, skip_youtube_dl=False, + ytdlp_tmpdir='/tmp', simpler404=False, screenshot_full_page=False, page_timeout=300, @@ -88,6 +89,7 @@ class BrozzlerWorker: self._skip_extract_outlinks = skip_extract_outlinks self._skip_visit_hashtags = skip_visit_hashtags self._skip_youtube_dl = skip_youtube_dl + self._ytdlp_tmpdir = ytdlp_tmpdir self._simpler404 = simpler404 self._screenshot_full_page = screenshot_full_page self._page_timeout = page_timeout @@ -471,6 +473,7 @@ class BrozzlerWorker: skip_extract_outlinks=self._skip_extract_outlinks, skip_visit_hashtags=self._skip_visit_hashtags, skip_youtube_dl=self._skip_youtube_dl, + ytdlp_tmpdir = self._ytdlp_tmpdir, simpler404=self._simpler404, screenshot_full_page=self._screenshot_full_page, page_timeout=self._page_timeout, diff --git a/brozzler/ydl.py b/brozzler/ydl.py index 88b49c6..89a9fec 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -429,7 +429,8 @@ def do_youtube_dl(worker, site, page): Returns: `list` of `str`: outlink urls """ - with tempfile.TemporaryDirectory(prefix="brzl-ydl-", dir="/tmp") as tempdir: + with tempfile.TemporaryDirectory(prefix="brzl-ydl-", dir=worker._ytdlp_tmpdir) as tempdir: + logging.info("tempdir for yt-dlp: %s", tempdir) ydl = _build_youtube_dl(worker, tempdir, site, page) ie_result = _try_youtube_dl(worker, ydl, site, page) outlinks = set()