renaming scope rule "host" to "domain" to make it a less confusing, since rules apply to subdomains as well

This commit is contained in:
Noah Levitt 2016-06-28 15:13:48 -05:00
parent e64a4d6985
commit 77c800f6a2
2 changed files with 22 additions and 22 deletions

View file

@ -1,20 +1,20 @@
# '''
# brozzler/site.py - classes representing sites and pages brozzler/site.py - classes representing sites and pages
#
# Copyright (C) 2014-2016 Internet Archive Copyright (C) 2014-2016 Internet Archive
#
# Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
# You may obtain a copy of the License at You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and See the License for the specific language governing permissions and
# limitations under the License. limitations under the License.
# '''
import surt import surt
import json import json
@ -170,7 +170,7 @@ class Site(brozzler.BaseDictable):
Examples of valid rules: Examples of valid rules:
[ [
{ {
"host": "monkey.org", "domain": "monkey.org",
"url_match": "STRING_MATCH", "url_match": "STRING_MATCH",
"value": "bar", "value": "bar",
}, },
@ -179,7 +179,7 @@ class Site(brozzler.BaseDictable):
"value": "http://(com,woop,)/fuh/", "value": "http://(com,woop,)/fuh/",
}, },
{ {
"host": "badhost.com", "domain": "bad.domain.com",
}, },
] ]
""" """
@ -188,7 +188,7 @@ class Site(brozzler.BaseDictable):
else: else:
u = url u = url
if "host" in rule and not u.matches_ip_or_domain(rule["host"]): if "domain" in rule and not u.matches_ip_or_domain(rule["domain"]):
return False return False
if "url_match" in rule: if "url_match" in rule:
if rule["url_match"] == "STRING_MATCH": if rule["url_match"] == "STRING_MATCH":
@ -207,7 +207,7 @@ class Site(brozzler.BaseDictable):
self.logger.warn("invalid rule.url_match=%s", rule.url_match) self.logger.warn("invalid rule.url_match=%s", rule.url_match)
return False return False
else: else:
if "host" in rule: if "domain" in rule:
# we already know that it matches from earlier check # we already know that it matches from earlier check
return True return True
else: else:

View file

@ -21,7 +21,7 @@ import setuptools
setuptools.setup( setuptools.setup(
name='brozzler', name='brozzler',
version='1.1.dev27', version='1.1.dev28',
description='Distributed web crawling with browsers', description='Distributed web crawling with browsers',
url='https://github.com/internetarchive/brozzler', url='https://github.com/internetarchive/brozzler',
author='Noah Levitt', author='Noah Levitt',