mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-25 00:59:52 -05:00
Merge branch 'rotary_skip_ytdlp' into qa
This commit is contained in:
commit
74808a2d10
@ -34,6 +34,7 @@ import urllib
|
||||
import uuid
|
||||
import yaml
|
||||
import zlib
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@ -191,6 +192,12 @@ class ElapsedMixIn(object):
|
||||
return dt
|
||||
|
||||
|
||||
class YTDLPStatus(Enum):
|
||||
UNKNOWN = 0
|
||||
SKIP = 1
|
||||
CAPTURE = 2
|
||||
|
||||
|
||||
class Job(doublethink.Document, ElapsedMixIn):
|
||||
logger = logging.getLogger(__module__ + "." + __qualname__)
|
||||
table = "jobs"
|
||||
@ -236,7 +243,7 @@ class Site(doublethink.Document, ElapsedMixIn):
|
||||
if not "scope" in self:
|
||||
self.scope = {}
|
||||
if not "skip_ytdlp" in self:
|
||||
self.skip_ytdlp = False
|
||||
self.skip_ytdlp = YTDLPStatus.UNKNOWN
|
||||
|
||||
# backward compatibility
|
||||
if "surt" in self.scope:
|
||||
|
@ -53,10 +53,11 @@ def _timestamp4datetime(timestamp):
|
||||
|
||||
def should_ytdlp(site, page, skip_av_seeds):
|
||||
# called only after we've passed needs_browsing() check
|
||||
from .model import YTDLPStatus
|
||||
if page.status_code != 200:
|
||||
logging.info("skipping ytdlp: non-200 page status")
|
||||
return False
|
||||
if site.skip_ytdlp:
|
||||
if site.skip_ytdlp == "SKIP":
|
||||
logging.info("skipping ytdlp: site marked skip_ytdlp")
|
||||
return False
|
||||
|
||||
@ -72,10 +73,13 @@ def should_ytdlp(site, page, skip_av_seeds):
|
||||
)
|
||||
|
||||
# TODO: develop UI and refactor
|
||||
if ytdlp_seed and ytdlp_seed in skip_av_seeds:
|
||||
if ytdlp_seed
|
||||
if site.skip_ytdlp == "UNKNOWN" and ytdlp_seed in skip_av_seeds:
|
||||
logging.info("skipping ytdlp: site in skip_av_seeds")
|
||||
site.skip_ytdlp = True
|
||||
site.skip_ytdlp = YTDLPStatus.SKIP
|
||||
return False
|
||||
else:
|
||||
site.skip_ytdlp = YTDLPStatus.CAPTURE
|
||||
|
||||
logging.info("checking containing page %s for seed %s", ytdlp_url, ytdlp_seed)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user