diff --git a/brozzler/cli.py b/brozzler/cli.py index bea5153..d896f7b 100755 --- a/brozzler/cli.py +++ b/brozzler/cli.py @@ -544,27 +544,12 @@ def brozzler_worker(argv=None): finally: signal.signal(signal.SIGQUIT, dump_state) - def get_skip_av_seeds(): - # TODO: develop UI and refactor - SKIP_AV_SEEDS_FILE = "/opt/local/brozzler/skip_av_seeds.txt" - try: - # make set from seed IDs in SKIP_AV_SEEDS_FILE - with open(SKIP_AV_SEEDS_FILE) as skips: - skip_av_seeds = {int(l) for l in skips.readlines()} - logging.info("running with skip_av_seeds file %s" % SKIP_AV_SEEDS_FILE) - except Exception as e: - skip_av_seeds = set() - logging.info("running with empty skip_av_seeds") - return skip_av_seeds - rr = rethinker(args) frontier = brozzler.RethinkDbFrontier(rr) service_registry = doublethink.ServiceRegistry(rr) - skip_av_seeds_from_file = get_skip_av_seeds() worker = brozzler.worker.BrozzlerWorker( frontier, service_registry, - skip_av_seeds=skip_av_seeds_from_file, max_browsers=int(args.max_browsers), chrome_exe=args.chrome_exe, proxy=args.proxy, diff --git a/brozzler/worker.py b/brozzler/worker.py index 479dfa7..5f6fbd0 100644 --- a/brozzler/worker.py +++ b/brozzler/worker.py @@ -54,7 +54,6 @@ class BrozzlerWorker: self, frontier, service_registry=None, - skip_av_seeds=None, max_browsers=1, chrome_exe="chromium-browser", warcprox_auto=False, @@ -74,7 +73,6 @@ class BrozzlerWorker: ): self._frontier = frontier self._service_registry = service_registry - self._skip_av_seeds = skip_av_seeds self._max_browsers = max_browsers self._warcprox_auto = warcprox_auto @@ -262,7 +260,7 @@ class BrozzlerWorker: self.logger.info("page interstitial shown (http auth): %s", page) if enable_youtube_dl and ydl.should_ytdlp( - site, page, browser.websock_thread.page_status, self._skip_av_seeds + site, page, browser.websock_thread.page_status ): try: ydl_outlinks = ydl.do_youtube_dl(self, site, page) diff --git a/brozzler/ydl.py b/brozzler/ydl.py index b292129..7161294 100644 --- a/brozzler/ydl.py +++ b/brozzler/ydl.py @@ -32,7 +32,7 @@ import threading thread_local = threading.local() -def should_ytdlp(site, page, page_status, skip_av_seeds): +def should_ytdlp(site, page, page_status): # called only after we've passed needs_browsing() check if page_status != 200: @@ -47,21 +47,6 @@ def should_ytdlp(site, page, page_status, skip_av_seeds): if "chrome-error:" in ytdlp_url: return False - ytdlp_seed = ( - site["metadata"]["ait_seed_id"] - if "metadata" in site and "ait_seed_id" in site["metadata"] - else None - ) - - # TODO: develop UI and refactor - if ytdlp_seed: - if site.skip_ytdlp is None and ytdlp_seed in skip_av_seeds: - logging.info("skipping ytdlp: site in skip_av_seeds") - site.skip_ytdlp = True - return False - else: - site.skip_ytdlp = False - return True