mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-20 23:56:34 -04:00
make it work again, and list discovered outlinks
This commit is contained in:
parent
8b45d7eb69
commit
3af1e10e13
@ -38,13 +38,14 @@ site = brozzler.Site(id=-1, seed=args.url, proxy=args.proxy,
|
||||
enable_warcprox_features=args.enable_warcprox_features,
|
||||
extra_headers=extra_headers)
|
||||
page = brozzler.Page(url=args.url, site_id=site.id)
|
||||
worker = brozzler.BrozzlerWorker()
|
||||
worker = brozzler.BrozzlerWorker(frontier=None)
|
||||
ydl = worker._youtube_dl(site)
|
||||
|
||||
browser = brozzler.Browser(chrome_exe=args.chrome_exe)
|
||||
browser.start(proxy=site.proxy)
|
||||
try:
|
||||
worker.brozzle_page(browser, ydl, site, page)
|
||||
outlinks = worker.brozzle_page(browser, ydl, site, page)
|
||||
logging.info("outlinks: \n\t%s", "\n\t".join(sorted(outlinks)))
|
||||
except brozzler.ReachedLimit as e:
|
||||
logging.error("reached limit %s", e)
|
||||
finally:
|
||||
|
Loading…
x
Reference in New Issue
Block a user