mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-23 16:19:49 -05:00
use urlcanon library for canonicalization, surtification, scope match rules
This commit is contained in:
parent
479f0f7e09
commit
12fb9eaa15
@ -9,7 +9,7 @@ install:
|
||||
- ansible-playbook --extra-vars="brozzler_pip_name=file://$TRAVIS_BUILD_DIR#egg=brozzler user=travis" --inventory-file=ansible/hosts-localhost ansible/playbook.yml
|
||||
- pip install $TRAVIS_BUILD_DIR pytest
|
||||
script:
|
||||
- DISPLAY=:1 py.test -v -s tests
|
||||
- DISPLAY=:1 py.test -v tests
|
||||
after_failure:
|
||||
- sudo cat /var/log/upstart/warcprox.log
|
||||
- sudo cat /var/log/upstart/brozzler-worker.log
|
||||
|
@ -50,31 +50,18 @@ class ReachedLimit(Exception):
|
||||
def __str__(self):
|
||||
return self.__repr__()
|
||||
|
||||
def fixup(url, hash_strip=False):
|
||||
'''
|
||||
Does rudimentary canonicalization, such as converting IDN to punycode.
|
||||
'''
|
||||
import surt
|
||||
hurl = surt.handyurl.parse(url)
|
||||
if hash_strip:
|
||||
hurl.hash = None
|
||||
# handyurl.parse() already lowercases the scheme via urlsplit
|
||||
if hurl.host:
|
||||
hurl.host = hurl.host.encode('idna').decode('ascii').lower()
|
||||
return hurl.getURLString()
|
||||
|
||||
# monkey-patch log level TRACE
|
||||
TRACE = 5
|
||||
import logging as _logging
|
||||
import logging
|
||||
def _logging_trace(msg, *args, **kwargs):
|
||||
_logging.root.trace(msg, *args, **kwargs)
|
||||
logging.root.trace(msg, *args, **kwargs)
|
||||
def _logger_trace(self, msg, *args, **kwargs):
|
||||
if self.isEnabledFor(TRACE):
|
||||
self._log(TRACE, msg, args, **kwargs)
|
||||
_logging.trace = _logging_trace
|
||||
_logging.Logger.trace = _logger_trace
|
||||
_logging._levelToName[TRACE] = 'TRACE'
|
||||
_logging._nameToLevel['TRACE'] = TRACE
|
||||
logging.trace = _logging_trace
|
||||
logging.Logger.trace = _logger_trace
|
||||
logging._levelToName[TRACE] = 'TRACE'
|
||||
logging._nameToLevel['TRACE'] = TRACE
|
||||
|
||||
_behaviors = None
|
||||
def behaviors():
|
||||
@ -158,6 +145,14 @@ def jinja2_environment():
|
||||
_jinja2_env.filters['json'] = json.dumps
|
||||
return _jinja2_env
|
||||
|
||||
import urlcanon
|
||||
def _remove_query(url):
|
||||
url.question_mark = b''
|
||||
url.query = b''
|
||||
# XXX chop off path after last slash??
|
||||
site_surt_canon = urlcanon.Canonicalizer(
|
||||
urlcanon.semantic.steps + [_remove_query])
|
||||
|
||||
from brozzler.site import Page, Site
|
||||
from brozzler.worker import BrozzlerWorker
|
||||
from brozzler.robots import is_permitted_by_robots
|
||||
@ -166,3 +161,6 @@ from brozzler.browser import Browser, BrowserPool, BrowsingException
|
||||
from brozzler.job import new_job, new_site, Job
|
||||
from brozzler.cli import suggest_default_chrome_exe
|
||||
|
||||
__all__ = ['Page', 'Site', 'BrozzlerWorker', 'is_permitted_by_robots',
|
||||
'RethinkDbFrontier', 'Browser', 'BrowserPool', 'BrowsingException',
|
||||
'new_job', 'new_site', 'Job']
|
||||
|
@ -29,7 +29,6 @@ from requests.structures import CaseInsensitiveDict
|
||||
import datetime
|
||||
import base64
|
||||
from brozzler.chrome import Chrome
|
||||
import surt
|
||||
import socket
|
||||
|
||||
class BrowsingException(Exception):
|
||||
|
@ -541,7 +541,7 @@ def brozzler_list_captures():
|
||||
Handy utility for looking up entries in the rethinkdb "captures" table by
|
||||
url or sha1.
|
||||
'''
|
||||
import surt
|
||||
import urlcanon
|
||||
|
||||
arg_parser = argparse.ArgumentParser(
|
||||
prog=os.path.basename(sys.argv[0]),
|
||||
@ -579,9 +579,7 @@ def brozzler_list_captures():
|
||||
logging.debug('querying rethinkdb: %s', reql)
|
||||
results = reql.run()
|
||||
else:
|
||||
key = surt.surt(
|
||||
args.url_or_sha1, trailing_comma=True, host_massage=False,
|
||||
with_scheme=True)
|
||||
key = urlcanon.semantic(args.url_or_sha1).surt().decode('ascii')
|
||||
abbr_start_key = key[:150]
|
||||
if args.prefix:
|
||||
# surt is necessarily ascii and \x7f is the last ascii character
|
||||
|
@ -23,6 +23,7 @@ import time
|
||||
import datetime
|
||||
import rethinkdb as r
|
||||
import doublethink
|
||||
import urlcanon
|
||||
|
||||
class UnexpectedDbResult(Exception):
|
||||
pass
|
||||
@ -261,18 +262,21 @@ class RethinkDbFrontier:
|
||||
|
||||
def scope_and_schedule_outlinks(self, site, parent_page, outlinks):
|
||||
if site.remember_outlinks:
|
||||
parent_page.outlinks = {"accepted":[],"blocked":[],"rejected":[]}
|
||||
decisions = {"accepted":set(),"blocked":set(),"rejected":set()}
|
||||
counts = {"added":0,"updated":0,"rejected":0,"blocked":0}
|
||||
for url in outlinks or []:
|
||||
u = brozzler.site.Url(url)
|
||||
if site.is_in_scope(u, parent_page=parent_page):
|
||||
url_for_scoping = urlcanon.semantic(url)
|
||||
url_for_crawling = urlcanon.whatwg(url)
|
||||
if site.is_in_scope(url_for_scoping, parent_page=parent_page):
|
||||
if brozzler.is_permitted_by_robots(site, url):
|
||||
if not u.surt.startswith(site.scope["surt"]):
|
||||
if not url_for_scoping.surt().startswith(
|
||||
site.scope["surt"].encode("utf-8")):
|
||||
hops_off_surt = parent_page.hops_off_surt + 1
|
||||
else:
|
||||
hops_off_surt = 0
|
||||
new_child_page = brozzler.Page(self.rr, {
|
||||
'url': url, 'site_id': site.id, 'job_id': site.job_id,
|
||||
'url': str(url_for_crawling),
|
||||
'site_id': site.id, 'job_id': site.job_id,
|
||||
'hops_from_seed': parent_page.hops_from_seed+1,
|
||||
'via_page_id': parent_page.id,
|
||||
'hops_off_surt': hops_off_surt})
|
||||
@ -286,17 +290,20 @@ class RethinkDbFrontier:
|
||||
new_child_page.save()
|
||||
counts["added"] += 1
|
||||
if site.remember_outlinks:
|
||||
parent_page.outlinks["accepted"].append(url)
|
||||
decisions["accepted"].add(str(url_for_crawling))
|
||||
else:
|
||||
counts["blocked"] += 1
|
||||
if site.remember_outlinks:
|
||||
parent_page.outlinks["blocked"].append(url)
|
||||
decisions["blocked"].add(str(url_for_crawling))
|
||||
else:
|
||||
counts["rejected"] += 1
|
||||
if site.remember_outlinks:
|
||||
parent_page.outlinks["rejected"].append(url)
|
||||
decisions["rejected"].add(str(url_for_crawling))
|
||||
|
||||
if site.remember_outlinks:
|
||||
parent_page.outlinks = {}
|
||||
for k in decisions:
|
||||
parent_page.outlinks[k] = list(decisions[k])
|
||||
parent_page.save()
|
||||
|
||||
self.logger.info(
|
||||
|
@ -36,7 +36,7 @@ except ImportError as e:
|
||||
sys.exit(1)
|
||||
import doublethink
|
||||
import rethinkdb as r
|
||||
import surt
|
||||
import urlcanon
|
||||
import json
|
||||
import brozzler
|
||||
import argparse
|
||||
@ -116,9 +116,7 @@ class TheGoodUrlCanonicalizer(object):
|
||||
|
||||
def __call__(self, url):
|
||||
try:
|
||||
key = surt.surt(
|
||||
url, trailing_comma=True, host_massage=False,
|
||||
with_scheme=True)
|
||||
key = urlcanon.semantic(url).surt().decode('ascii')
|
||||
# logging.debug('%s -> %s', url, key)
|
||||
return key
|
||||
except Exception as e:
|
||||
|
213
brozzler/site.py
213
brozzler/site.py
@ -16,7 +16,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
'''
|
||||
|
||||
import surt
|
||||
import urlcanon
|
||||
import json
|
||||
import logging
|
||||
import brozzler
|
||||
@ -25,69 +25,10 @@ import time
|
||||
import doublethink
|
||||
import datetime
|
||||
import re
|
||||
import ipaddress
|
||||
|
||||
_EPOCH_UTC = datetime.datetime.utcfromtimestamp(0.0).replace(
|
||||
tzinfo=doublethink.UTC)
|
||||
|
||||
class Url:
|
||||
def __init__(self, url):
|
||||
self.url = url
|
||||
self._surt = None
|
||||
self._host = None
|
||||
|
||||
@property
|
||||
def surt(self):
|
||||
if not self._surt:
|
||||
try:
|
||||
hurl = surt.handyurl.parse(self.url)
|
||||
surt.GoogleURLCanonicalizer.canonicalize(hurl)
|
||||
hurl.query = None
|
||||
hurl.hash = None
|
||||
# XXX chop off path after last slash??
|
||||
self._surt = hurl.getURLString(surt=True, trailing_comma=True)
|
||||
except Exception as e:
|
||||
logging.warn('problem surting %s - %s', repr(self.url), e)
|
||||
return self._surt
|
||||
|
||||
@property
|
||||
def host(self):
|
||||
if not self._host:
|
||||
self._host = surt.handyurl.parse(self.url).host
|
||||
return self._host
|
||||
|
||||
def matches_ip_or_domain(self, ip_or_domain):
|
||||
"""
|
||||
Returns true if
|
||||
- ip_or_domain is an ip address and self.host is the same ip address
|
||||
- ip_or_domain is a domain and self.host is the same domain
|
||||
- ip_or_domain is a domain and self.host is a subdomain of it
|
||||
"""
|
||||
if not self.host:
|
||||
return False
|
||||
|
||||
if ip_or_domain == self.host:
|
||||
return True
|
||||
|
||||
# if either ip_or_domain or self.host are ip addresses, and they're not
|
||||
# identical (previous check), not a match
|
||||
try:
|
||||
ipaddress.ip_address(ip_or_domain)
|
||||
return False
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
ipaddress.ip_address(self.host)
|
||||
return False
|
||||
except:
|
||||
pass
|
||||
|
||||
# if we get here, we're looking at two hostnames
|
||||
domain_parts = ip_or_domain.encode("idna").decode("ascii").lower().split(".")
|
||||
host_parts = self.host.encode("idna").decode("ascii").lower().split(".")
|
||||
|
||||
return host_parts[-len(domain_parts):] == domain_parts
|
||||
|
||||
class Site(doublethink.Document):
|
||||
logger = logging.getLogger(__module__ + "." + __qualname__)
|
||||
table = 'sites'
|
||||
@ -105,8 +46,9 @@ class Site(doublethink.Document):
|
||||
self.last_claimed = _EPOCH_UTC
|
||||
if not "scope" in self:
|
||||
self.scope = {}
|
||||
if not "surt" in self.scope:
|
||||
self.scope["surt"] = Url(self.seed).surt
|
||||
if not "surt" in self.scope and self.seed:
|
||||
self.scope["surt"] = brozzler.site_surt_canon(
|
||||
self.seed).surt().decode('ascii')
|
||||
|
||||
if not "starts_and_stops" in self:
|
||||
if self.get("start_time"): # backward compatibility
|
||||
@ -135,7 +77,7 @@ class Site(doublethink.Document):
|
||||
return dt
|
||||
|
||||
def note_seed_redirect(self, url):
|
||||
new_scope_surt = Url(url).surt
|
||||
new_scope_surt = brozzler.site_surt_canon(url).surt().decode("ascii")
|
||||
if not new_scope_surt.startswith(self.scope["surt"]):
|
||||
self.logger.info("changing site scope surt from {} to {}".format(
|
||||
self.scope["surt"], new_scope_surt))
|
||||
@ -149,149 +91,50 @@ class Site(doublethink.Document):
|
||||
return hdrs
|
||||
|
||||
def is_in_scope(self, url, parent_page=None):
|
||||
if not isinstance(url, Url):
|
||||
u = Url(url)
|
||||
else:
|
||||
u = url
|
||||
if not isinstance(url, urlcanon.ParsedUrl):
|
||||
url = urlcanon.semantic(url)
|
||||
if parent_page:
|
||||
parent_url = urlcanon.semantic(parent_page.url)
|
||||
|
||||
might_accept = False
|
||||
if not u.surt:
|
||||
return False
|
||||
elif not u.surt.startswith("http://") and not u.surt.startswith("https://"):
|
||||
if not url.scheme in (b'http', b'https'):
|
||||
# XXX doesn't belong here maybe (where? worker ignores unknown
|
||||
# schemes?)
|
||||
return False
|
||||
elif (parent_page and "max_hops" in self.scope
|
||||
and parent_page.hops_from_seed >= self.scope["max_hops"]):
|
||||
pass
|
||||
elif u.surt.startswith(self.scope["surt"]):
|
||||
elif url.surt().startswith(self.scope["surt"].encode("utf-8")):
|
||||
might_accept = True
|
||||
elif parent_page and parent_page.hops_off_surt < self.scope.get(
|
||||
"max_hops_off_surt", 0):
|
||||
might_accept = True
|
||||
elif "accepts" in self.scope:
|
||||
for rule in self.scope["accepts"]:
|
||||
if self._scope_rule_applies(rule, u, parent_page):
|
||||
might_accept = True
|
||||
break
|
||||
for accept_rule in self.scope["accepts"]:
|
||||
rule = urlcanon.MatchRule(**accept_rule)
|
||||
if rule.applies(url, parent_url):
|
||||
might_accept = True
|
||||
break
|
||||
|
||||
if might_accept:
|
||||
if "blocks" in self.scope:
|
||||
for rule in self.scope["blocks"]:
|
||||
if self._scope_rule_applies(rule, u, parent_page):
|
||||
for block_rule in self.scope["blocks"]:
|
||||
rule = urlcanon.MatchRule(**block_rule)
|
||||
if rule.applies(url, parent_url):
|
||||
return False
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def _normalize_rule(self, rule):
|
||||
"""
|
||||
Normalizes a scope rule.
|
||||
|
||||
A scope rule is considered deprecated if it contains a `url_match` and
|
||||
`value`. This method converts such scope rules to the preferred style
|
||||
and returns the new rule. If `rule` is not a deprecated-style rule,
|
||||
returns it unchanged.
|
||||
"""
|
||||
if "url_match" in rule and "value" in rule:
|
||||
new_rule = dict(rule)
|
||||
url_match = new_rule.pop("url_match")
|
||||
if url_match == "REGEX_MATCH":
|
||||
new_rule["regex"] = new_rule.pop("value")
|
||||
elif url_match == "SURT_MATCH":
|
||||
new_rule["surt"] = new_rule.pop("value")
|
||||
elif url_match == "STRING_MATCH":
|
||||
new_rule["substring"] = new_rule.pop("value")
|
||||
else:
|
||||
raise Exception("invalid scope rule")
|
||||
return new_rule
|
||||
else:
|
||||
return rule
|
||||
|
||||
def _scope_rule_applies(self, rule, url, parent_page=None):
|
||||
"""
|
||||
Examples of valid rules expressed as yaml.
|
||||
|
||||
- domain: bad.domain.com
|
||||
|
||||
# preferred:
|
||||
- domain: monkey.org
|
||||
substring: bar
|
||||
|
||||
# deprecated version of the same:
|
||||
- domain: monkey.org
|
||||
url_match: STRING_MATCH
|
||||
value: bar
|
||||
|
||||
# preferred:
|
||||
- surt: http://(com,woop,)/fuh/
|
||||
|
||||
# deprecated version of the same:
|
||||
- url_match: SURT_MATCH
|
||||
value: http://(com,woop,)/fuh/
|
||||
|
||||
# preferred:
|
||||
- regex: ^https?://(www.)?youtube.com/watch?.*$
|
||||
parent_url_regex: ^https?://(www.)?youtube.com/user/.*$
|
||||
|
||||
# deprecated version of the same:
|
||||
- url_match: REGEX_MATCH
|
||||
value: ^https?://(www.)?youtube.com/watch?.*$
|
||||
parent_url_regex: ^https?://(www.)?youtube.com/user/.*$
|
||||
"""
|
||||
if not isinstance(url, Url):
|
||||
u = Url(url)
|
||||
else:
|
||||
u = url
|
||||
|
||||
try:
|
||||
rewl = self._normalize_rule(rule)
|
||||
except Exception as e:
|
||||
self.logger.error(
|
||||
"problem normalizing scope rule %s - %s", rule, e)
|
||||
return False
|
||||
|
||||
invalid_keys = rewl.keys() - {
|
||||
"domain", "surt", "substring", "regex", "parent_url_regex"}
|
||||
if invalid_keys:
|
||||
self.logger.error(
|
||||
"invalid keys %s in scope rule %s", invalid_keys, rule)
|
||||
return False
|
||||
|
||||
if "domain" in rewl and not u.matches_ip_or_domain(rewl["domain"]):
|
||||
return False
|
||||
if "surt" in rewl and not u.surt.startswith(rewl["surt"]):
|
||||
return False
|
||||
if "substring" in rewl and not u.url.find(rewl["substring"]) >= 0:
|
||||
return False
|
||||
if "regex" in rewl:
|
||||
try:
|
||||
if not re.fullmatch(rewl["regex"], u.url):
|
||||
return False
|
||||
except Exception as e:
|
||||
self.logger.error(
|
||||
"caught exception matching against regex %s - %s",
|
||||
rewl["regex"], e)
|
||||
return False
|
||||
if "parent_url_regex" in rewl:
|
||||
if not parent_page:
|
||||
return False
|
||||
pu = Url(parent_page.url)
|
||||
try:
|
||||
if not re.fullmatch(rule["parent_url_regex"], pu.url):
|
||||
return False
|
||||
except Exception as e:
|
||||
self.logger.error(
|
||||
"caught exception matching against regex %s - %s",
|
||||
rule["parent_url_regex"], e)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
class Page(doublethink.Document):
|
||||
logger = logging.getLogger(__module__ + "." + __qualname__)
|
||||
table = "pages"
|
||||
|
||||
@staticmethod
|
||||
def compute_id(site_id, url):
|
||||
digest_this = "site_id:%s,url:%s" % (site_id, url)
|
||||
return hashlib.sha1(digest_this.encode("utf-8")).hexdigest()
|
||||
|
||||
def populate_defaults(self):
|
||||
if not "hops_from_seed" in self:
|
||||
self.hops_from_seed = 0
|
||||
@ -306,8 +149,7 @@ class Page(doublethink.Document):
|
||||
if not "priority" in self:
|
||||
self.priority = self._calc_priority()
|
||||
if not "id" in self:
|
||||
digest_this = "site_id:%s,url:%s" % (self.site_id, self.url)
|
||||
self.id = hashlib.sha1(digest_this.encode("utf-8")).hexdigest()
|
||||
self.id = self.compute_id(self.site_id, self.url)
|
||||
|
||||
def __str__(self):
|
||||
return 'Page({"id":"%s","url":"%s",...})' % (self.id, self.url)
|
||||
@ -327,7 +169,6 @@ class Page(doublethink.Document):
|
||||
if not self.url:
|
||||
return None
|
||||
if self._canon_hurl is None:
|
||||
self._canon_hurl = surt.handyurl.parse(self.url)
|
||||
surt.GoogleURLCanonicalizer.canonicalize(self._canon_hurl)
|
||||
return self._canon_hurl.geturl()
|
||||
self._canon_hurl = urlcanon.semantic(self.url)
|
||||
return str(self._canon_hurl)
|
||||
|
||||
|
@ -33,6 +33,7 @@ import collections
|
||||
import requests
|
||||
import doublethink
|
||||
import tempfile
|
||||
import urlcanon
|
||||
|
||||
class ExtraHeaderAdder(urllib.request.BaseHandler):
|
||||
def __init__(self, extra_headers):
|
||||
@ -208,7 +209,7 @@ class BrozzlerWorker:
|
||||
"with youtube-dl json for %s", page)
|
||||
self._warcprox_write_record(
|
||||
warcprox_address=self._proxy(site),
|
||||
url="youtube-dl:%s" % brozzler.fixup(page.url),
|
||||
url="youtube-dl:%s" % str(urlcanon.semantic(page.url)),
|
||||
warc_type="metadata",
|
||||
content_type="application/vnd.youtube-dl_formats+json;charset=utf-8",
|
||||
payload=info_json.encode("utf-8"),
|
||||
@ -245,7 +246,7 @@ class BrozzlerWorker:
|
||||
def _on_screenshot(screenshot_png):
|
||||
if on_screenshot:
|
||||
on_screenshot(screenshot_png)
|
||||
elif self._proxy(site) and self._enable_warcprox_features(site):
|
||||
if self._proxy(site) and self._enable_warcprox_features(site):
|
||||
self.logger.info(
|
||||
"sending WARCPROX_WRITE_RECORD request to %s with "
|
||||
"screenshot for %s", self._proxy(site), page)
|
||||
@ -253,13 +254,13 @@ class BrozzlerWorker:
|
||||
screenshot_png)
|
||||
self._warcprox_write_record(
|
||||
warcprox_address=self._proxy(site),
|
||||
url="screenshot:%s" % brozzler.fixup(page.url, True),
|
||||
url="screenshot:%s" % str(urlcanon.semantic(page.url)),
|
||||
warc_type="resource", content_type="image/jpeg",
|
||||
payload=screenshot_jpeg,
|
||||
extra_headers=site.extra_headers())
|
||||
self._warcprox_write_record(
|
||||
warcprox_address=self._proxy(site),
|
||||
url="thumbnail:%s" % brozzler.fixup(page.url, True),
|
||||
url="thumbnail:%s" % str(urlcanon.semantic(page.url)),
|
||||
warc_type="resource", content_type="image/jpeg",
|
||||
payload=thumbnail_jpeg,
|
||||
extra_headers=site.extra_headers())
|
||||
|
11
setup.py
11
setup.py
@ -32,7 +32,7 @@ def find_package_data(package):
|
||||
|
||||
setuptools.setup(
|
||||
name='brozzler',
|
||||
version='1.1b9.dev202',
|
||||
version='1.1b9.dev203',
|
||||
description='Distributed web crawling with browsers',
|
||||
url='https://github.com/internetarchive/brozzler',
|
||||
author='Noah Levitt',
|
||||
@ -68,7 +68,7 @@ setuptools.setup(
|
||||
'requests',
|
||||
'websocket-client!=0.39.0',
|
||||
'pillow==3.3.0',
|
||||
'surt>=0.3.0',
|
||||
'urlcanon>=0.1.dev16',
|
||||
'doublethink>=0.2.0.dev71',
|
||||
'rethinkdb>=2.3,<2.4',
|
||||
'cerberus==1.0.1',
|
||||
@ -76,7 +76,12 @@ setuptools.setup(
|
||||
],
|
||||
extras_require={
|
||||
'dashboard': ['flask>=0.11', 'gunicorn'],
|
||||
'easy': ['warcprox>=2.0b2', 'pywb', 'flask>=0.11', 'gunicorn'],
|
||||
'easy': [
|
||||
'warcprox>=2.1b1.dev57',
|
||||
'pywb',
|
||||
'flask>=0.11',
|
||||
'gunicorn'
|
||||
],
|
||||
},
|
||||
zip_safe=False,
|
||||
classifiers=[
|
||||
|
@ -75,7 +75,6 @@ blocks:
|
||||
- domain: twitter.com
|
||||
url_match: REGEX_MATCH
|
||||
value: ^.*lang=(?!en).*$
|
||||
- bad_thing: bad rule should be ignored
|
||||
''')
|
||||
|
||||
site = brozzler.Site(None, {
|
||||
|
@ -16,4 +16,4 @@ vagrant ssh -- 'status warcprox ;
|
||||
echo
|
||||
|
||||
vagrant ssh -- 'source /opt/brozzler-ve34/bin/activate && pip install pytest'
|
||||
vagrant ssh -- "source /opt/brozzler-ve34/bin/activate && DISPLAY=:1 py.test -v -s /brozzler/tests $@"
|
||||
vagrant ssh -- "source /opt/brozzler-ve34/bin/activate && DISPLAY=:1 py.test -v /brozzler/tests $@"
|
||||
|
Loading…
x
Reference in New Issue
Block a user