mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-07-26 00:05:42 -04:00
WT-2950 video capture options enum
This commit is contained in:
parent
77e6b9ed52
commit
66263f0371
3 changed files with 10 additions and 3 deletions
|
@ -34,6 +34,7 @@ import urllib
|
||||||
import uuid
|
import uuid
|
||||||
import yaml
|
import yaml
|
||||||
import zlib
|
import zlib
|
||||||
|
from enum import Enum
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
@ -219,6 +220,10 @@ class Job(doublethink.Document, ElapsedMixIn):
|
||||||
self.status = "FINISHED"
|
self.status = "FINISHED"
|
||||||
self.starts_and_stops[-1]["stop"] = doublethink.utcnow()
|
self.starts_and_stops[-1]["stop"] = doublethink.utcnow()
|
||||||
|
|
||||||
|
class VideoCaptureOptions(Enum):
|
||||||
|
ENABLE_VIDEO_CAPTURE = "ENABLE_VIDEO_CAPTURE"
|
||||||
|
LIMIT_VIDEO_CAPTURE = "LIMIT_VIDEO_CAPTURE"
|
||||||
|
DISABLE_YTDLP_CAPTURE = "DISABLE_YTDLP_CAPTURE"
|
||||||
|
|
||||||
class Site(doublethink.Document, ElapsedMixIn):
|
class Site(doublethink.Document, ElapsedMixIn):
|
||||||
logger = logging.getLogger(__module__ + "." + __qualname__)
|
logger = logging.getLogger(__module__ + "." + __qualname__)
|
||||||
|
@ -236,7 +241,7 @@ class Site(doublethink.Document, ElapsedMixIn):
|
||||||
if "scope" not in self:
|
if "scope" not in self:
|
||||||
self.scope = {}
|
self.scope = {}
|
||||||
if "video_capture" not in self:
|
if "video_capture" not in self:
|
||||||
self.video_capture = "ENABLE_VIDEO_CAPTURE"
|
self.video_capture = VideoCaptureOptions.ENABLE_VIDEO_CAPTURE.value
|
||||||
|
|
||||||
# backward compatibility
|
# backward compatibility
|
||||||
if "surt" in self.scope:
|
if "surt" in self.scope:
|
||||||
|
|
|
@ -21,6 +21,7 @@ limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
import brozzler
|
import brozzler
|
||||||
import brozzler.browser
|
import brozzler.browser
|
||||||
|
from brozzler.model import VideoCaptureOptions
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
@ -250,7 +251,7 @@ class BrozzlerWorker:
|
||||||
self.logger.info("needs fetch: %s", page)
|
self.logger.info("needs fetch: %s", page)
|
||||||
if (
|
if (
|
||||||
self._is_video_type(page_headers)
|
self._is_video_type(page_headers)
|
||||||
and site.video_capture != "ENABLE_VIDEO_CAPTURE"
|
and site.video_capture != VideoCaptureOptions.ENABLE_VIDEO_CAPTURE.value
|
||||||
):
|
):
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
"skipping video content: video capture disabled for site"
|
"skipping video content: video capture disabled for site"
|
||||||
|
|
|
@ -20,6 +20,7 @@ import logging
|
||||||
import yt_dlp
|
import yt_dlp
|
||||||
from yt_dlp.utils import match_filter_func
|
from yt_dlp.utils import match_filter_func
|
||||||
import brozzler
|
import brozzler
|
||||||
|
from brozzler.model import VideoCaptureOptions
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import tempfile
|
import tempfile
|
||||||
import urlcanon
|
import urlcanon
|
||||||
|
@ -38,7 +39,7 @@ def should_ytdlp(site, page, page_status):
|
||||||
if page_status != 200:
|
if page_status != 200:
|
||||||
logging.info("skipping ytdlp: non-200 page status %s", page_status)
|
logging.info("skipping ytdlp: non-200 page status %s", page_status)
|
||||||
return False
|
return False
|
||||||
if site.video_capture != "ENABLE_VIDEO_CAPTURE":
|
if site.video_capture != VideoCaptureOptions.ENABLE_VIDEO_CAPTURE.value:
|
||||||
logging.info("skipping ytdlp: site has video capture disabled")
|
logging.info("skipping ytdlp: site has video capture disabled")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue