mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
little fix, tweak
This commit is contained in:
parent
b6beac3807
commit
2a7a0b7c30
@ -62,6 +62,7 @@ for seed_conf in seeds:
|
||||
extra_headers = {"Warcprox-Meta":warcprox_meta}
|
||||
site = brozzler.Site(seed=merged_conf["url"],
|
||||
scope=merged_conf.get("scope"),
|
||||
time_limit=merged_conf.get("time_limit"),
|
||||
proxy=merged_conf.get("proxy"),
|
||||
ignore_robots=merged_conf.get("ignore_robots"),
|
||||
enable_warcprox_features=merged_conf.get("enable_warcprox_features"),
|
||||
|
@ -9,7 +9,7 @@ class Site:
|
||||
logger = logging.getLogger(__module__ + "." + __qualname__)
|
||||
|
||||
def __init__(self, seed, id=None, scope=None, proxy=None,
|
||||
ignore_robots=False, extra_headers=None,
|
||||
ignore_robots=False, time_limit=None, extra_headers=None,
|
||||
enable_warcprox_features=False, reached_limit=None):
|
||||
self.seed = seed
|
||||
self.id = id
|
||||
@ -17,6 +17,7 @@ class Site:
|
||||
self.ignore_robots = ignore_robots
|
||||
self.enable_warcprox_features = bool(enable_warcprox_features)
|
||||
self.extra_headers = extra_headers
|
||||
self.time_limit = time_limit
|
||||
self.reached_limit = reached_limit
|
||||
|
||||
self.scope = scope or {}
|
||||
|
@ -168,7 +168,7 @@ class BrozzlerWorker:
|
||||
self.logger.info("brozzling site {}".format(site))
|
||||
ydl = self._youtube_dl(site)
|
||||
th = threading.Thread(target=lambda: self._brozzle_site(browser, ydl, site),
|
||||
name="BrowsingThread-{}".format(site.scope_surt))
|
||||
name="BrowsingThread-{}".format(site.seed))
|
||||
th.start()
|
||||
except:
|
||||
self._browser_pool.release(browser)
|
||||
@ -196,7 +196,7 @@ class BrozzlerWorker:
|
||||
|
||||
if q_empty:
|
||||
if latest_state != "no-unclaimed-sites":
|
||||
self.logger.info("no unclaimed sites to browse")
|
||||
# self.logger.info("no unclaimed sites to browse")
|
||||
latest_state = "no-unclaimed-sites"
|
||||
time.sleep(0.5)
|
||||
except OSError as e:
|
||||
|
Loading…
x
Reference in New Issue
Block a user