little fixes

This commit is contained in:
Noah Levitt 2016-12-07 11:20:10 -08:00
parent 0b6c5346bd
commit eed8b9ec30
4 changed files with 5 additions and 4 deletions

View file

@ -117,7 +117,7 @@ class Chrome:
# these can raise exceptions # these can raise exceptions
self._home_tmpdir = tempfile.TemporaryDirectory() self._home_tmpdir = tempfile.TemporaryDirectory()
self._chrome_user_data_dir = os.path.join( self._chrome_user_data_dir = os.path.join(
self._home_tmpdir.name, 'chrome-user-data'), self._home_tmpdir.name, 'chrome-user-data')
self._init_cookie_db() self._init_cookie_db()
new_env = os.environ.copy() new_env = os.environ.copy()
@ -279,7 +279,7 @@ class Chrome:
self._home_tmpdir.cleanup() self._home_tmpdir.cleanup()
except: except:
self.logger.error( self.logger.error(
"exception deleting %s", self._home_tmpdir, 'exception deleting %s', self._home_tmpdir,
exc_info=True) exc_info=True)
finally: finally:
self._out_reader_thread.join() self._out_reader_thread.join()

View file

@ -70,7 +70,7 @@ class RethinkCDXSource(pywb.cdx.cdxsource.CDXSource):
'filename': record['filename'], 'filename': record['filename'],
} }
if record['warc_type'] != 'revisit': if record['warc_type'] != 'revisit':
blob['mime'] = record['content_type'] blob['mime'] = record['content_type'] or '-'
else: else:
blob['mime'] = 'warc/revisit' blob['mime'] = 'warc/revisit'
# b'org,archive)/ 20160427215530 {"url": "https://archive.org/", "mime": "text/html", "status": "200", "digest": "VILUFXZD232SLUA6XROZQIMEVUPW6EIE", "length": "16001", "offset": "90144", "filename": "ARCHIVEIT-261-ONE_TIME-JOB209607-20160427215508135-00000.warc.gz"}' # b'org,archive)/ 20160427215530 {"url": "https://archive.org/", "mime": "text/html", "status": "200", "digest": "VILUFXZD232SLUA6XROZQIMEVUPW6EIE", "length": "16001", "offset": "90144", "filename": "ARCHIVEIT-261-ONE_TIME-JOB209607-20160427215508135-00000.warc.gz"}'

View file

@ -32,7 +32,7 @@ def find_package_data(package):
setuptools.setup( setuptools.setup(
name='brozzler', name='brozzler',
version='1.1b8.dev135', version='1.1b8.dev137',
description='Distributed web crawling with browsers', description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler', url='https://github.com/internetarchive/brozzler',
author='Noah Levitt', author='Noah Levitt',

View file

@ -25,6 +25,7 @@ import brozzler
import brozzler.chrome import brozzler.chrome
import socket import socket
import logging import logging
import psutil
@pytest.fixture(scope='module') @pytest.fixture(scope='module')
def httpd(request): def httpd(request):