diff --git a/brozzler/chrome.py b/brozzler/chrome.py index e9d3ef0..f72ff8d 100644 --- a/brozzler/chrome.py +++ b/brozzler/chrome.py @@ -117,7 +117,7 @@ class Chrome: # these can raise exceptions self._home_tmpdir = tempfile.TemporaryDirectory() self._chrome_user_data_dir = os.path.join( - self._home_tmpdir.name, 'chrome-user-data'), + self._home_tmpdir.name, 'chrome-user-data') self._init_cookie_db() new_env = os.environ.copy() @@ -279,7 +279,7 @@ class Chrome: self._home_tmpdir.cleanup() except: self.logger.error( - "exception deleting %s", self._home_tmpdir, + 'exception deleting %s', self._home_tmpdir, exc_info=True) finally: self._out_reader_thread.join() diff --git a/brozzler/pywb.py b/brozzler/pywb.py index 9b0a0f7..7b0b95a 100644 --- a/brozzler/pywb.py +++ b/brozzler/pywb.py @@ -70,7 +70,7 @@ class RethinkCDXSource(pywb.cdx.cdxsource.CDXSource): 'filename': record['filename'], } if record['warc_type'] != 'revisit': - blob['mime'] = record['content_type'] + blob['mime'] = record['content_type'] or '-' else: blob['mime'] = 'warc/revisit' # b'org,archive)/ 20160427215530 {"url": "https://archive.org/", "mime": "text/html", "status": "200", "digest": "VILUFXZD232SLUA6XROZQIMEVUPW6EIE", "length": "16001", "offset": "90144", "filename": "ARCHIVEIT-261-ONE_TIME-JOB209607-20160427215508135-00000.warc.gz"}' diff --git a/setup.py b/setup.py index 2fd868a..7fd25c6 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ def find_package_data(package): setuptools.setup( name='brozzler', - version='1.1b8.dev135', + version='1.1b8.dev137', description='Distributed web crawling with browsers', url='https://github.com/internetarchive/brozzler', author='Noah Levitt', diff --git a/tests/test_units.py b/tests/test_units.py index acd5fd0..f8042f0 100644 --- a/tests/test_units.py +++ b/tests/test_units.py @@ -25,6 +25,7 @@ import brozzler import brozzler.chrome import socket import logging +import psutil @pytest.fixture(scope='module') def httpd(request):