mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-20 23:56:34 -04:00
Merge branch 'master' into qa
* master: fix exception happening now that we have binary data in rethinkdb (the cookie db) "TypeError: <binary, 7168 bytes, '53 51 4c 69 74 65...'> is not JSON serializable" dev version number again another version for pypi avoid "Uncaught RangeError: Maximum call stack size exceeded" compiling outlinks back to a dev version number bump version to 1.1b4 for pypi upload logging tweak install brozzler.webconsole package
This commit is contained in:
commit
10c90431e6
@ -301,17 +301,19 @@ class Browser:
|
||||
self._behavior = None
|
||||
|
||||
OUTLINKS_JS = """
|
||||
var compileOutlinks = function(frame) {
|
||||
var __brzl_framesDone = new Set();
|
||||
var __brzl_compileOutlinks = function(frame) {
|
||||
__brzl_framesDone.add(frame);
|
||||
var outlinks = Array.prototype.slice.call(
|
||||
frame.document.querySelectorAll('a[href]'));
|
||||
for (var i = 0; i < frame.frames.length; i++) {
|
||||
if (frame.frames[i]) { // sometimes undefined (why?)
|
||||
outlinks = outlinks.concat(compileOutlinks(frame.frames[i]));
|
||||
if (frame.frames[i] && !__brzl_framesDone.has(frame.frames[i])) {
|
||||
outlinks = outlinks.concat(__brzl_compileOutlinks(frame.frames[i]));
|
||||
}
|
||||
}
|
||||
return outlinks;
|
||||
}
|
||||
compileOutlinks(window).join(' ');
|
||||
__brzl_compileOutlinks(window).join(' ');
|
||||
"""
|
||||
|
||||
def _chain_chrome_messages(self, chain):
|
||||
@ -540,7 +542,7 @@ compileOutlinks(window).join(' ');
|
||||
elif "result" in message:
|
||||
if message["id"] in self._waiting_on_result_messages:
|
||||
callback = self._waiting_on_result_messages[message["id"]]
|
||||
self.logger.info(
|
||||
self.logger.debug(
|
||||
"received result for message id=%s, calling %s",
|
||||
message["id"], callback)
|
||||
callback(message)
|
||||
|
@ -34,6 +34,7 @@ import os
|
||||
import importlib
|
||||
import rethinkdb
|
||||
import yaml
|
||||
import base64
|
||||
|
||||
# flask does its own logging config
|
||||
# logging.basicConfig(
|
||||
@ -120,8 +121,10 @@ def page_yaml(page_id):
|
||||
@app.route("/api/sites/<site_id>")
|
||||
@app.route("/api/site/<site_id>")
|
||||
def site(site_id):
|
||||
site_ = r.table("sites").get(site_id).run()
|
||||
return flask.jsonify(site_)
|
||||
s = r.table("sites").get(site_id).run()
|
||||
if "cookie_db" in s:
|
||||
s["cookie_db"] = base64.b64encode(s["cookie_db"]).decode("ascii")
|
||||
return flask.jsonify(s)
|
||||
|
||||
@app.route("/api/sites/<site_id>/yaml")
|
||||
@app.route("/api/site/<site_id>/yaml")
|
||||
@ -139,8 +142,12 @@ def stats(bucket):
|
||||
@app.route("/api/jobs/<int:job_id>/sites")
|
||||
@app.route("/api/job/<int:job_id>/sites")
|
||||
def sites(job_id):
|
||||
sites_ = r.table("sites").get_all(job_id, index="job_id").run()
|
||||
return flask.jsonify(sites=list(sites_))
|
||||
sites_ = list(r.table("sites").get_all(job_id, index="job_id").run())
|
||||
# TypeError: <binary, 7168 bytes, '53 51 4c 69 74 65...'> is not JSON serializable
|
||||
for s in sites_:
|
||||
if "cookie_db" in s:
|
||||
s["cookie_db"] = base64.b64encode(s["cookie_db"]).decode("ascii")
|
||||
return flask.jsonify(sites=sites_)
|
||||
|
||||
@app.route("/api/jobs/<int:job_id>")
|
||||
@app.route("/api/job/<int:job_id>")
|
||||
|
20
setup.py
20
setup.py
@ -18,18 +18,32 @@ limitations under the License.
|
||||
'''
|
||||
|
||||
import setuptools
|
||||
import os
|
||||
|
||||
def find_package_data(package):
|
||||
pkg_data = []
|
||||
depth = len(package.split('.'))
|
||||
path = os.path.join(*package.split('.'))
|
||||
for dirpath, dirnames, filenames in os.walk(path):
|
||||
if not os.path.exists(os.path.join(dirpath, '__init__.py')):
|
||||
relpath = os.path.join(*dirpath.split(os.sep)[depth:])
|
||||
pkg_data.extend(os.path.join(relpath, f) for f in filenames)
|
||||
return pkg_data
|
||||
|
||||
setuptools.setup(
|
||||
name='brozzler',
|
||||
version='1.1b4.dev63',
|
||||
version='1.1b6.dev69',
|
||||
description='Distributed web crawling with browsers',
|
||||
url='https://github.com/internetarchive/brozzler',
|
||||
author='Noah Levitt',
|
||||
author_email='nlevitt@archive.org',
|
||||
long_description=open('README.rst', encoding='UTF-8').read(),
|
||||
license='Apache License 2.0',
|
||||
packages=['brozzler'],
|
||||
package_data={'brozzler': ['behaviors.d/*.js*', 'behaviors.yaml']},
|
||||
packages=['brozzler', 'brozzler.webconsole'],
|
||||
package_data={
|
||||
'brozzler': ['behaviors.d/*.js*', 'behaviors.yaml'],
|
||||
'brozzler.webconsole': find_package_data('brozzler.webconsole'),
|
||||
},
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'brozzle-page=brozzler.cli:brozzle_page',
|
||||
|
Loading…
x
Reference in New Issue
Block a user