little fix, tweak

This commit is contained in:
Noah Levitt 2015-08-05 00:17:43 +00:00
parent b6beac3807
commit 2a7a0b7c30
3 changed files with 5 additions and 3 deletions

View File

@ -62,6 +62,7 @@ for seed_conf in seeds:
extra_headers = {"Warcprox-Meta":warcprox_meta} extra_headers = {"Warcprox-Meta":warcprox_meta}
site = brozzler.Site(seed=merged_conf["url"], site = brozzler.Site(seed=merged_conf["url"],
scope=merged_conf.get("scope"), scope=merged_conf.get("scope"),
time_limit=merged_conf.get("time_limit"),
proxy=merged_conf.get("proxy"), proxy=merged_conf.get("proxy"),
ignore_robots=merged_conf.get("ignore_robots"), ignore_robots=merged_conf.get("ignore_robots"),
enable_warcprox_features=merged_conf.get("enable_warcprox_features"), enable_warcprox_features=merged_conf.get("enable_warcprox_features"),

View File

@ -9,7 +9,7 @@ class Site:
logger = logging.getLogger(__module__ + "." + __qualname__) logger = logging.getLogger(__module__ + "." + __qualname__)
def __init__(self, seed, id=None, scope=None, proxy=None, def __init__(self, seed, id=None, scope=None, proxy=None,
ignore_robots=False, extra_headers=None, ignore_robots=False, time_limit=None, extra_headers=None,
enable_warcprox_features=False, reached_limit=None): enable_warcprox_features=False, reached_limit=None):
self.seed = seed self.seed = seed
self.id = id self.id = id
@ -17,6 +17,7 @@ class Site:
self.ignore_robots = ignore_robots self.ignore_robots = ignore_robots
self.enable_warcprox_features = bool(enable_warcprox_features) self.enable_warcprox_features = bool(enable_warcprox_features)
self.extra_headers = extra_headers self.extra_headers = extra_headers
self.time_limit = time_limit
self.reached_limit = reached_limit self.reached_limit = reached_limit
self.scope = scope or {} self.scope = scope or {}

View File

@ -168,7 +168,7 @@ class BrozzlerWorker:
self.logger.info("brozzling site {}".format(site)) self.logger.info("brozzling site {}".format(site))
ydl = self._youtube_dl(site) ydl = self._youtube_dl(site)
th = threading.Thread(target=lambda: self._brozzle_site(browser, ydl, site), th = threading.Thread(target=lambda: self._brozzle_site(browser, ydl, site),
name="BrowsingThread-{}".format(site.scope_surt)) name="BrowsingThread-{}".format(site.seed))
th.start() th.start()
except: except:
self._browser_pool.release(browser) self._browser_pool.release(browser)
@ -196,7 +196,7 @@ class BrozzlerWorker:
if q_empty: if q_empty:
if latest_state != "no-unclaimed-sites": if latest_state != "no-unclaimed-sites":
self.logger.info("no unclaimed sites to browse") # self.logger.info("no unclaimed sites to browse")
latest_state = "no-unclaimed-sites" latest_state = "no-unclaimed-sites"
time.sleep(0.5) time.sleep(0.5)
except OSError as e: except OSError as e: