mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 00:29:53 -05:00
skip ytdlp for Rotary seeds
This commit is contained in:
parent
a7c2d469ca
commit
ff02480a13
121
brozzler/ydl.py
121
brozzler/ydl.py
@ -31,10 +31,131 @@ import threading
|
||||
|
||||
thread_local = threading.local()
|
||||
|
||||
skip_ytdlp_seeds = {
|
||||
166569,
|
||||
166570,
|
||||
166571,
|
||||
166572,
|
||||
166573,
|
||||
171054,
|
||||
577504,
|
||||
577505,
|
||||
577506,
|
||||
577507,
|
||||
577508,
|
||||
579556,
|
||||
588597,
|
||||
588599,
|
||||
588604,
|
||||
657452,
|
||||
680067,
|
||||
931642,
|
||||
1020763,
|
||||
1020845,
|
||||
1102795,
|
||||
1126155,
|
||||
1355999,
|
||||
1356000,
|
||||
1356001,
|
||||
1356002,
|
||||
1356003,
|
||||
1356004,
|
||||
1381601,
|
||||
1400183,
|
||||
1407124,
|
||||
1430611,
|
||||
1561452,
|
||||
2181615,
|
||||
2277187,
|
||||
2287692,
|
||||
2293805,
|
||||
2315198,
|
||||
2320887,
|
||||
2320889,
|
||||
2320890,
|
||||
2320891,
|
||||
2320892,
|
||||
2451964,
|
||||
2451965,
|
||||
2517850,
|
||||
2517851,
|
||||
2517852,
|
||||
2518225,
|
||||
2518226,
|
||||
2518227,
|
||||
2518228,
|
||||
2528222,
|
||||
2528223,
|
||||
2528224,
|
||||
2528225,
|
||||
2528227,
|
||||
2528800,
|
||||
2528801,
|
||||
2528802,
|
||||
2528803,
|
||||
2528847,
|
||||
2528848,
|
||||
2528849,
|
||||
2528850,
|
||||
2528851,
|
||||
2528852,
|
||||
2528853,
|
||||
2528854,
|
||||
2530393,
|
||||
2530394,
|
||||
2530395,
|
||||
2530396,
|
||||
2530397,
|
||||
2530398,
|
||||
2530399,
|
||||
2530400,
|
||||
2530401,
|
||||
2530402,
|
||||
2530403,
|
||||
2530404,
|
||||
2530408,
|
||||
2530409,
|
||||
2530410,
|
||||
2530411,
|
||||
2530412,
|
||||
2530413,
|
||||
2530414,
|
||||
2530415,
|
||||
2530416,
|
||||
2530417,
|
||||
2530418,
|
||||
2530419,
|
||||
2553200,
|
||||
2553201,
|
||||
2553202,
|
||||
2553203,
|
||||
2553204,
|
||||
2634329,
|
||||
2826641,
|
||||
2894571,
|
||||
2895333,
|
||||
3062930,
|
||||
3084847,
|
||||
3085989,
|
||||
3223637,
|
||||
3223656,
|
||||
}
|
||||
|
||||
|
||||
def should_ytdlp(page, site):
|
||||
# called only after we've passed needs_browsing() check
|
||||
if page.status_code != 200:
|
||||
logging.info("skipping ytdlp: non-200 page status")
|
||||
return False
|
||||
if site.skip_ytdlp:
|
||||
logging.info("skipping ytdlp: site marked skip_ytdp")
|
||||
return False
|
||||
|
||||
ytdlp_seed = site["metadata"]["ait_seed_id"]
|
||||
|
||||
if ytdlp_seed in skip_ytdlp_seeds:
|
||||
logging.info("skipping ytdlp: site in skip_ytdlp_seeds")
|
||||
site.skip_ytdlp = True
|
||||
return False
|
||||
|
||||
ytdlp_url = page.redirect_url if page.redirect_url else page.url
|
||||
|
Loading…
x
Reference in New Issue
Block a user