mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-08 06:22:23 -04:00
Merge branch 'master' into qa
* master: monkey-patch youtube-dl to short-circuit
This commit is contained in:
commit
109d05c59a
2 changed files with 11 additions and 1 deletions
|
@ -39,6 +39,16 @@ import rethinkdb as r
|
|||
import datetime
|
||||
import urllib.parse
|
||||
|
||||
_orig_webpage_read_content = youtube_dl.extractor.generic.GenericIE._webpage_read_content
|
||||
def _webpage_read_content(self, *args, **kwargs):
|
||||
content = _orig_webpage_read_content(self, *args, **kwargs)
|
||||
if len(content) > 20000000:
|
||||
logging.warn(
|
||||
'bypassing youtube-dl extraction because content is '
|
||||
'too large (%s characters)', len(content))
|
||||
return ''
|
||||
youtube_dl.extractor.generic.GenericIE._webpage_read_content = _webpage_read_content
|
||||
|
||||
class ExtraHeaderAdder(urllib.request.BaseHandler):
|
||||
def __init__(self, extra_headers):
|
||||
self.extra_headers = extra_headers
|
||||
|
|
2
setup.py
2
setup.py
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
|||
|
||||
setuptools.setup(
|
||||
name='brozzler',
|
||||
version='1.1b13.dev290',
|
||||
version='1.1b13.dev291',
|
||||
description='Distributed web crawling with browsers',
|
||||
url='https://github.com/internetarchive/brozzler',
|
||||
author='Noah Levitt',
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue