Merge branch 'master' into qa

* master:
  monkey-patch youtube-dl to short-circuit
This commit is contained in:
Noah Levitt 2018-06-11 11:11:09 -07:00
commit 109d05c59a
2 changed files with 11 additions and 1 deletions

View File

@ -39,6 +39,16 @@ import rethinkdb as r
import datetime
import urllib.parse
_orig_webpage_read_content = youtube_dl.extractor.generic.GenericIE._webpage_read_content
def _webpage_read_content(self, *args, **kwargs):
content = _orig_webpage_read_content(self, *args, **kwargs)
if len(content) > 20000000:
logging.warn(
'bypassing youtube-dl extraction because content is '
'too large (%s characters)', len(content))
return ''
youtube_dl.extractor.generic.GenericIE._webpage_read_content = _webpage_read_content
class ExtraHeaderAdder(urllib.request.BaseHandler):
def __init__(self, extra_headers):
self.extra_headers = extra_headers

View File

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup(
name='brozzler',
version='1.1b13.dev290',
version='1.1b13.dev291',
description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler',
author='Noah Levitt',