mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-25 00:59:52 -05:00
Merge branch 'rotary_skip_ytdlp' into qa
This commit is contained in:
commit
cf654bf07e
@ -545,10 +545,12 @@ def brozzler_worker(argv=None):
|
|||||||
signal.signal(signal.SIGQUIT, dump_state)
|
signal.signal(signal.SIGQUIT, dump_state)
|
||||||
|
|
||||||
def get_skip_av_seeds():
|
def get_skip_av_seeds():
|
||||||
|
# TODO: develop UI and refactor
|
||||||
SKIP_AV_SEEDS_FILE = "/opt/local/brozzler/skip_av_seeds.txt"
|
SKIP_AV_SEEDS_FILE = "/opt/local/brozzler/skip_av_seeds.txt"
|
||||||
try:
|
try:
|
||||||
with open(skip_av_seeds_file) as skips:
|
# make set from seed IDs in SKIP_AV_SEEDS_FILE
|
||||||
skip_av_seeds = set(skips.readlines())
|
with open(SKIP_AV_SEEDS_FILE) as skips:
|
||||||
|
skip_av_seeds = {int(l) for l in skips.readlines()}
|
||||||
logging.info("running with skip_av_seeds file %s" % SKIP_AV_SEEDS_FILE)
|
logging.info("running with skip_av_seeds file %s" % SKIP_AV_SEEDS_FILE)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
skip_av_seeds = set()
|
skip_av_seeds = set()
|
||||||
@ -562,7 +564,7 @@ def brozzler_worker(argv=None):
|
|||||||
worker = brozzler.worker.BrozzlerWorker(
|
worker = brozzler.worker.BrozzlerWorker(
|
||||||
frontier,
|
frontier,
|
||||||
service_registry,
|
service_registry,
|
||||||
skip_av_seeds,
|
skip_av_seeds=skip_av_seeds,
|
||||||
max_browsers=int(args.max_browsers),
|
max_browsers=int(args.max_browsers),
|
||||||
chrome_exe=args.chrome_exe,
|
chrome_exe=args.chrome_exe,
|
||||||
proxy=args.proxy,
|
proxy=args.proxy,
|
||||||
|
@ -263,7 +263,7 @@ class BrozzlerWorker:
|
|||||||
except brozzler.PageInterstitialShown:
|
except brozzler.PageInterstitialShown:
|
||||||
self.logger.info("page interstitial shown (http auth): %s", page)
|
self.logger.info("page interstitial shown (http auth): %s", page)
|
||||||
|
|
||||||
if enable_youtube_dl and ydl.should_ytdlp(self, site, page):
|
if enable_youtube_dl and ydl.should_ytdlp(site, page, self.skip_av_seeds):
|
||||||
try:
|
try:
|
||||||
ydl_outlinks = ydl.do_youtube_dl(self, site, page)
|
ydl_outlinks = ydl.do_youtube_dl(self, site, page)
|
||||||
outlinks.update(ydl_outlinks)
|
outlinks.update(ydl_outlinks)
|
||||||
|
@ -51,7 +51,7 @@ def _timestamp4datetime(timestamp):
|
|||||||
int(timestamp[-2:])
|
int(timestamp[-2:])
|
||||||
)
|
)
|
||||||
|
|
||||||
def should_ytdlp(worker, site, page):
|
def should_ytdlp(site, page, skip_av_seeds):
|
||||||
# called only after we've passed needs_browsing() check
|
# called only after we've passed needs_browsing() check
|
||||||
if page.status_code != 200:
|
if page.status_code != 200:
|
||||||
logging.info("skipping ytdlp: non-200 page status")
|
logging.info("skipping ytdlp: non-200 page status")
|
||||||
@ -71,7 +71,8 @@ def should_ytdlp(worker, site, page):
|
|||||||
else None
|
else None
|
||||||
)
|
)
|
||||||
|
|
||||||
if ytdlp_seed and ytdlp_seed in worker.skip_av_seeds:
|
# TODO: develop UI and refactor
|
||||||
|
if ytdlp_seed and ytdlp_seed in skip_av_seeds:
|
||||||
logging.info("skipping ytdlp: site in skip_av_seeds")
|
logging.info("skipping ytdlp: site in skip_av_seeds")
|
||||||
site.skip_ytdlp = True
|
site.skip_ytdlp = True
|
||||||
return False
|
return False
|
||||||
|
Loading…
x
Reference in New Issue
Block a user