Merge branch 'rotary_skip_ytdlp' into qa

This commit is contained in:
Barbara Miller 2024-05-30 10:15:37 -07:00
commit cf654bf07e
3 changed files with 9 additions and 6 deletions

View File

@ -545,10 +545,12 @@ def brozzler_worker(argv=None):
signal.signal(signal.SIGQUIT, dump_state) signal.signal(signal.SIGQUIT, dump_state)
def get_skip_av_seeds(): def get_skip_av_seeds():
# TODO: develop UI and refactor
SKIP_AV_SEEDS_FILE = "/opt/local/brozzler/skip_av_seeds.txt" SKIP_AV_SEEDS_FILE = "/opt/local/brozzler/skip_av_seeds.txt"
try: try:
with open(skip_av_seeds_file) as skips: # make set from seed IDs in SKIP_AV_SEEDS_FILE
skip_av_seeds = set(skips.readlines()) with open(SKIP_AV_SEEDS_FILE) as skips:
skip_av_seeds = {int(l) for l in skips.readlines()}
logging.info("running with skip_av_seeds file %s" % SKIP_AV_SEEDS_FILE) logging.info("running with skip_av_seeds file %s" % SKIP_AV_SEEDS_FILE)
except Exception as e: except Exception as e:
skip_av_seeds = set() skip_av_seeds = set()
@ -562,7 +564,7 @@ def brozzler_worker(argv=None):
worker = brozzler.worker.BrozzlerWorker( worker = brozzler.worker.BrozzlerWorker(
frontier, frontier,
service_registry, service_registry,
skip_av_seeds, skip_av_seeds=skip_av_seeds,
max_browsers=int(args.max_browsers), max_browsers=int(args.max_browsers),
chrome_exe=args.chrome_exe, chrome_exe=args.chrome_exe,
proxy=args.proxy, proxy=args.proxy,

View File

@ -263,7 +263,7 @@ class BrozzlerWorker:
except brozzler.PageInterstitialShown: except brozzler.PageInterstitialShown:
self.logger.info("page interstitial shown (http auth): %s", page) self.logger.info("page interstitial shown (http auth): %s", page)
if enable_youtube_dl and ydl.should_ytdlp(self, site, page): if enable_youtube_dl and ydl.should_ytdlp(site, page, self.skip_av_seeds):
try: try:
ydl_outlinks = ydl.do_youtube_dl(self, site, page) ydl_outlinks = ydl.do_youtube_dl(self, site, page)
outlinks.update(ydl_outlinks) outlinks.update(ydl_outlinks)

View File

@ -51,7 +51,7 @@ def _timestamp4datetime(timestamp):
int(timestamp[-2:]) int(timestamp[-2:])
) )
def should_ytdlp(worker, site, page): def should_ytdlp(site, page, skip_av_seeds):
# called only after we've passed needs_browsing() check # called only after we've passed needs_browsing() check
if page.status_code != 200: if page.status_code != 200:
logging.info("skipping ytdlp: non-200 page status") logging.info("skipping ytdlp: non-200 page status")
@ -71,7 +71,8 @@ def should_ytdlp(worker, site, page):
else None else None
) )
if ytdlp_seed and ytdlp_seed in worker.skip_av_seeds: # TODO: develop UI and refactor
if ytdlp_seed and ytdlp_seed in skip_av_seeds:
logging.info("skipping ytdlp: site in skip_av_seeds") logging.info("skipping ytdlp: site in skip_av_seeds")
site.skip_ytdlp = True site.skip_ytdlp = True
return False return False