mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-06-20 12:54:23 -04:00
little fixes
This commit is contained in:
parent
0b6c5346bd
commit
eed8b9ec30
4 changed files with 5 additions and 4 deletions
|
@ -117,7 +117,7 @@ class Chrome:
|
||||||
# these can raise exceptions
|
# these can raise exceptions
|
||||||
self._home_tmpdir = tempfile.TemporaryDirectory()
|
self._home_tmpdir = tempfile.TemporaryDirectory()
|
||||||
self._chrome_user_data_dir = os.path.join(
|
self._chrome_user_data_dir = os.path.join(
|
||||||
self._home_tmpdir.name, 'chrome-user-data'),
|
self._home_tmpdir.name, 'chrome-user-data')
|
||||||
self._init_cookie_db()
|
self._init_cookie_db()
|
||||||
|
|
||||||
new_env = os.environ.copy()
|
new_env = os.environ.copy()
|
||||||
|
@ -279,7 +279,7 @@ class Chrome:
|
||||||
self._home_tmpdir.cleanup()
|
self._home_tmpdir.cleanup()
|
||||||
except:
|
except:
|
||||||
self.logger.error(
|
self.logger.error(
|
||||||
"exception deleting %s", self._home_tmpdir,
|
'exception deleting %s', self._home_tmpdir,
|
||||||
exc_info=True)
|
exc_info=True)
|
||||||
finally:
|
finally:
|
||||||
self._out_reader_thread.join()
|
self._out_reader_thread.join()
|
||||||
|
|
|
@ -70,7 +70,7 @@ class RethinkCDXSource(pywb.cdx.cdxsource.CDXSource):
|
||||||
'filename': record['filename'],
|
'filename': record['filename'],
|
||||||
}
|
}
|
||||||
if record['warc_type'] != 'revisit':
|
if record['warc_type'] != 'revisit':
|
||||||
blob['mime'] = record['content_type']
|
blob['mime'] = record['content_type'] or '-'
|
||||||
else:
|
else:
|
||||||
blob['mime'] = 'warc/revisit'
|
blob['mime'] = 'warc/revisit'
|
||||||
# b'org,archive)/ 20160427215530 {"url": "https://archive.org/", "mime": "text/html", "status": "200", "digest": "VILUFXZD232SLUA6XROZQIMEVUPW6EIE", "length": "16001", "offset": "90144", "filename": "ARCHIVEIT-261-ONE_TIME-JOB209607-20160427215508135-00000.warc.gz"}'
|
# b'org,archive)/ 20160427215530 {"url": "https://archive.org/", "mime": "text/html", "status": "200", "digest": "VILUFXZD232SLUA6XROZQIMEVUPW6EIE", "length": "16001", "offset": "90144", "filename": "ARCHIVEIT-261-ONE_TIME-JOB209607-20160427215508135-00000.warc.gz"}'
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -32,7 +32,7 @@ def find_package_data(package):
|
||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.1b8.dev135',
|
version='1.1b8.dev137',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
|
|
@ -25,6 +25,7 @@ import brozzler
|
||||||
import brozzler.chrome
|
import brozzler.chrome
|
||||||
import socket
|
import socket
|
||||||
import logging
|
import logging
|
||||||
|
import psutil
|
||||||
|
|
||||||
@pytest.fixture(scope='module')
|
@pytest.fixture(scope='module')
|
||||||
def httpd(request):
|
def httpd(request):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue