mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-02-24 08:39:59 -05:00
look for a sensible default chromium/chrome executable
This commit is contained in:
parent
c902a70450
commit
37bff5328b
@ -35,6 +35,7 @@ import time
|
|||||||
import traceback
|
import traceback
|
||||||
import warnings
|
import warnings
|
||||||
import yaml
|
import yaml
|
||||||
|
import shutil
|
||||||
|
|
||||||
def _add_common_options(arg_parser):
|
def _add_common_options(arg_parser):
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
@ -47,8 +48,8 @@ def _add_common_options(arg_parser):
|
|||||||
'--trace', dest='log_level',
|
'--trace', dest='log_level',
|
||||||
action='store_const', default=logging.INFO, const=brozzler.TRACE)
|
action='store_const', default=logging.INFO, const=brozzler.TRACE)
|
||||||
# arg_parser.add_argument(
|
# arg_parser.add_argument(
|
||||||
# '-s', '--silent', dest='log_level',
|
# '-s', '--silent', dest='log_level', action='store_const',
|
||||||
# action='store_const', default=logging.INFO, const=logging.CRITICAL)
|
# default=logging.INFO, const=logging.CRITICAL)
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'--version', action='version',
|
'--version', action='version',
|
||||||
version='brozzler %s - %s' % (
|
version='brozzler %s - %s' % (
|
||||||
@ -85,6 +86,26 @@ def _configure_logging(args):
|
|||||||
warnings.simplefilter(
|
warnings.simplefilter(
|
||||||
'ignore', category=requests.packages.urllib3.exceptions.InsecurePlatformWarning)
|
'ignore', category=requests.packages.urllib3.exceptions.InsecurePlatformWarning)
|
||||||
|
|
||||||
|
def suggest_default_chome_exe():
|
||||||
|
# mac os x application executable paths
|
||||||
|
for path in [
|
||||||
|
'/Applications/Chromium.app/Contents/MacOS/Chromium',
|
||||||
|
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome']:
|
||||||
|
if os.path.exists(path):
|
||||||
|
return path
|
||||||
|
|
||||||
|
# "chromium-browser" is the executable on ubuntu trusty
|
||||||
|
# https://github.com/internetarchive/brozzler/pull/6/files uses "chromium"
|
||||||
|
# google chrome executable names taken from these packages:
|
||||||
|
# http://www.ubuntuupdates.org/ppa/google_chrome
|
||||||
|
for exe in [
|
||||||
|
'chromium-browser', 'chromium', 'google-chrome',
|
||||||
|
'google-chrome-stable', 'google-chrome-beta',
|
||||||
|
'google-chrome-unstable']:
|
||||||
|
if shutil.which(exe):
|
||||||
|
return exe
|
||||||
|
return 'chromium-browser'
|
||||||
|
|
||||||
def brozzle_page():
|
def brozzle_page():
|
||||||
'''
|
'''
|
||||||
Command line utility entry point for brozzling a single page. Opens url in
|
Command line utility entry point for brozzling a single page. Opens url in
|
||||||
@ -96,7 +117,8 @@ def brozzle_page():
|
|||||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||||
arg_parser.add_argument('url', metavar='URL', help='page url')
|
arg_parser.add_argument('url', metavar='URL', help='page url')
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'-e', '--executable', dest='chrome_exe', default='chromium-browser',
|
'-e', '--chrome-exe', dest='chrome_exe',
|
||||||
|
default=suggest_default_chome_exe(),
|
||||||
help='executable to use to invoke chrome')
|
help='executable to use to invoke chrome')
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'--proxy', dest='proxy', default=None,
|
'--proxy', dest='proxy', default=None,
|
||||||
@ -164,7 +186,7 @@ def brozzler_new_job():
|
|||||||
|
|
||||||
def brozzler_new_site():
|
def brozzler_new_site():
|
||||||
'''
|
'''
|
||||||
Command line utility entry point for queuing a new brozzler site.
|
Command line utility entry point for queuing a new brozzler site.
|
||||||
Takes a seed url and creates a site and page object in rethinkdb, which
|
Takes a seed url and creates a site and page object in rethinkdb, which
|
||||||
brozzler-workers will look at and start crawling.
|
brozzler-workers will look at and start crawling.
|
||||||
'''
|
'''
|
||||||
@ -215,7 +237,8 @@ def brozzler_worker():
|
|||||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||||
_add_rethinkdb_options(arg_parser)
|
_add_rethinkdb_options(arg_parser)
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'-e', '--executable', dest='chrome_exe', default='chromium-browser',
|
'-e', '--chrome-exe', dest='chrome_exe',
|
||||||
|
default=suggest_default_chome_exe(),
|
||||||
help='executable to use to invoke chrome')
|
help='executable to use to invoke chrome')
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'-n', '--max-browsers', dest='max_browsers', default='1',
|
'-n', '--max-browsers', dest='max_browsers', default='1',
|
||||||
@ -304,4 +327,3 @@ def brozzler_ensure_tables():
|
|||||||
|
|
||||||
# sites, pages, jobs tables
|
# sites, pages, jobs tables
|
||||||
brozzler.frontier.RethinkDbFrontier(r)
|
brozzler.frontier.RethinkDbFrontier(r)
|
||||||
|
|
||||||
|
@ -36,6 +36,7 @@ except ImportError as e:
|
|||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
import argparse
|
import argparse
|
||||||
import brozzler
|
import brozzler
|
||||||
|
import brozzler.cli
|
||||||
import os
|
import os
|
||||||
import socket
|
import socket
|
||||||
import signal
|
import signal
|
||||||
@ -84,7 +85,8 @@ def _build_arg_parser(prog=os.path.basename(sys.argv[0])):
|
|||||||
|
|
||||||
# === brozzler-worker args ===
|
# === brozzler-worker args ===
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'-e', '--executable', dest='chrome_exe', default='chromium-browser',
|
'-e', '--chrome-exe', dest='chrome_exe',
|
||||||
|
default=brozzler.cli.suggest_default_chome_exe(),
|
||||||
help='executable to use to invoke chrome')
|
help='executable to use to invoke chrome')
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
'-n', '--max-browsers', dest='max_browsers', default='1',
|
'-n', '--max-browsers', dest='max_browsers', default='1',
|
||||||
|
3
setup.py
3
setup.py
@ -21,7 +21,7 @@ import setuptools
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='brozzler',
|
name='brozzler',
|
||||||
version='1.1b3.dev55',
|
version='1.1b3.dev56',
|
||||||
description='Distributed web crawling with browsers',
|
description='Distributed web crawling with browsers',
|
||||||
url='https://github.com/internetarchive/brozzler',
|
url='https://github.com/internetarchive/brozzler',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
@ -66,4 +66,3 @@ setuptools.setup(
|
|||||||
'Topic :: Internet :: WWW/HTTP',
|
'Topic :: Internet :: WWW/HTTP',
|
||||||
'Topic :: System :: Archiving',
|
'Topic :: System :: Archiving',
|
||||||
])
|
])
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user