mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-12 00:10:28 -04:00
chrome: convert to structlog
This commit is contained in:
parent
97f225d54c
commit
19dd7c97f0
1 changed files with 39 additions and 50 deletions
|
@ -16,7 +16,6 @@ See the License for the specific language governing permissions and
|
||||||
limitations under the License.
|
limitations under the License.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import time
|
import time
|
||||||
import threading
|
import threading
|
||||||
|
@ -27,6 +26,7 @@ import select
|
||||||
import re
|
import re
|
||||||
import signal
|
import signal
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
import structlog
|
||||||
import json
|
import json
|
||||||
import tempfile
|
import tempfile
|
||||||
import sys
|
import sys
|
||||||
|
@ -65,7 +65,7 @@ def check_version(chrome_exe):
|
||||||
|
|
||||||
|
|
||||||
class Chrome:
|
class Chrome:
|
||||||
logger = logging.getLogger(__module__ + "." + __qualname__)
|
logger = structlog.get_logger(__module__ + "." + __qualname__)
|
||||||
|
|
||||||
def __init__(self, chrome_exe, port=9222, ignore_cert_errors=False):
|
def __init__(self, chrome_exe, port=9222, ignore_cert_errors=False):
|
||||||
"""
|
"""
|
||||||
|
@ -97,22 +97,22 @@ class Chrome:
|
||||||
def _init_cookie_db(self, cookie_db):
|
def _init_cookie_db(self, cookie_db):
|
||||||
cookie_dir = os.path.join(self._chrome_user_data_dir, "Default")
|
cookie_dir = os.path.join(self._chrome_user_data_dir, "Default")
|
||||||
cookie_location = os.path.join(cookie_dir, "Cookies")
|
cookie_location = os.path.join(cookie_dir, "Cookies")
|
||||||
self.logger.debug("cookie DB provided, writing to %s", cookie_location)
|
cookie_logger = self.logger.bind(cookie_location=cookie_location)
|
||||||
|
|
||||||
|
cookie_logger.debug("cookie DB provided, writing to")
|
||||||
os.makedirs(cookie_dir, exist_ok=True)
|
os.makedirs(cookie_dir, exist_ok=True)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(cookie_location, "wb") as cookie_file:
|
with open(cookie_location, "wb") as cookie_file:
|
||||||
cookie_file.write(cookie_db)
|
cookie_file.write(cookie_db)
|
||||||
except OSError:
|
except OSError:
|
||||||
self.logger.error(
|
cookie_logger.exception("exception writing cookie file")
|
||||||
"exception writing cookie file at %s", cookie_location, exc_info=True
|
|
||||||
)
|
|
||||||
|
|
||||||
def persist_and_read_cookie_db(self):
|
def persist_and_read_cookie_db(self):
|
||||||
cookie_location = os.path.join(self._chrome_user_data_dir, "Default", "Cookies")
|
cookie_location = os.path.join(self._chrome_user_data_dir, "Default", "Cookies")
|
||||||
self.logger.debug(
|
cookie_logger = self.logger.bind(cookie_location=cookie_location)
|
||||||
"marking cookies persistent then reading file into memory: %s",
|
cookie_logger.debug(
|
||||||
cookie_location,
|
"marking cookies persistent then reading file into memory",
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
with sqlite3.connect(cookie_location) as conn:
|
with sqlite3.connect(cookie_location) as conn:
|
||||||
|
@ -125,20 +125,14 @@ class Chrome:
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
cur.execute("UPDATE cookies SET persistent = 1")
|
cur.execute("UPDATE cookies SET persistent = 1")
|
||||||
except sqlite3.Error:
|
except sqlite3.Error:
|
||||||
self.logger.error(
|
cookie_logger.exception("exception updating cookie DB")
|
||||||
"exception updating cookie DB %s", cookie_location, exc_info=True
|
|
||||||
)
|
|
||||||
|
|
||||||
cookie_db = None
|
cookie_db = None
|
||||||
try:
|
try:
|
||||||
with open(cookie_location, "rb") as cookie_file:
|
with open(cookie_location, "rb") as cookie_file:
|
||||||
cookie_db = cookie_file.read()
|
cookie_db = cookie_file.read()
|
||||||
except OSError:
|
except OSError:
|
||||||
self.logger.error(
|
cookie_logger.exception("exception reading from cookie DB file")
|
||||||
"exception reading from cookie DB file %s",
|
|
||||||
cookie_location,
|
|
||||||
exc_info=True,
|
|
||||||
)
|
|
||||||
return cookie_db
|
return cookie_db
|
||||||
|
|
||||||
def start(
|
def start(
|
||||||
|
@ -227,7 +221,7 @@ class Chrome:
|
||||||
if proxy:
|
if proxy:
|
||||||
chrome_args.append("--proxy-server=%s" % proxy)
|
chrome_args.append("--proxy-server=%s" % proxy)
|
||||||
chrome_args.append("about:blank")
|
chrome_args.append("about:blank")
|
||||||
self.logger.info("running: %r", subprocess.list2cmdline(chrome_args))
|
self.logger.info("running", chrome_args=subprocess.list2cmdline(chrome_args))
|
||||||
# start_new_session - new process group so we can kill the whole group
|
# start_new_session - new process group so we can kill the whole group
|
||||||
self.chrome_process = subprocess.Popen(
|
self.chrome_process = subprocess.Popen(
|
||||||
chrome_args,
|
chrome_args,
|
||||||
|
@ -243,12 +237,13 @@ class Chrome:
|
||||||
daemon=True,
|
daemon=True,
|
||||||
)
|
)
|
||||||
self._out_reader_thread.start()
|
self._out_reader_thread.start()
|
||||||
self.logger.info("chrome running, pid %s" % self.chrome_process.pid)
|
self.logger.info("chrome running", pid=self.chrome_process.pid)
|
||||||
|
|
||||||
return self._websocket_url(timeout_sec=websocket_timeout)
|
return self._websocket_url(timeout_sec=websocket_timeout)
|
||||||
|
|
||||||
def _websocket_url(self, timeout_sec=60):
|
def _websocket_url(self, timeout_sec=60):
|
||||||
json_url = "http://localhost:%s/json" % self.port
|
json_url = "http://localhost:%s/json" % self.port
|
||||||
|
url_logger = self.logger.bind(json_url=json_url)
|
||||||
# make this a member variable so that kill -QUIT reports it
|
# make this a member variable so that kill -QUIT reports it
|
||||||
self._start = time.time()
|
self._start = time.time()
|
||||||
self._last_warning = self._start
|
self._last_warning = self._start
|
||||||
|
@ -259,24 +254,21 @@ class Chrome:
|
||||||
debug_info = [x for x in all_debug_info if x["url"] == "about:blank"]
|
debug_info = [x for x in all_debug_info if x["url"] == "about:blank"]
|
||||||
|
|
||||||
if debug_info and "webSocketDebuggerUrl" in debug_info[0]:
|
if debug_info and "webSocketDebuggerUrl" in debug_info[0]:
|
||||||
self.logger.debug("%s returned %s", json_url, raw_json)
|
url_logger.debug("webSocketDebuggerUrl returned", raw_json=raw_json)
|
||||||
url = debug_info[0]["webSocketDebuggerUrl"]
|
url = debug_info[0]["webSocketDebuggerUrl"]
|
||||||
self.logger.info(
|
url_logger.info(
|
||||||
"got chrome window websocket debug url %s from %s",
|
"got chrome window websocket debug url",
|
||||||
url,
|
debug_url=url,
|
||||||
json_url,
|
|
||||||
)
|
)
|
||||||
return url
|
return url
|
||||||
except brozzler.ShutdownRequested:
|
except brozzler.ShutdownRequested:
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if time.time() - self._last_warning > 30:
|
if time.time() - self._last_warning > 30:
|
||||||
self.logger.warning(
|
url_logger.warning(
|
||||||
"problem with %s (will keep trying until timeout "
|
"problem accessing url (will keep trying until timeout)",
|
||||||
"of %d seconds): %s",
|
timeout=timeout_sec,
|
||||||
json_url,
|
exc_info=True
|
||||||
timeout_sec,
|
|
||||||
e,
|
|
||||||
)
|
)
|
||||||
self._last_warning = time.time()
|
self._last_warning = time.time()
|
||||||
finally:
|
finally:
|
||||||
|
@ -322,26 +314,28 @@ class Chrome:
|
||||||
while not self._shutdown.is_set():
|
while not self._shutdown.is_set():
|
||||||
buf = readline_nonblock(self.chrome_process.stdout)
|
buf = readline_nonblock(self.chrome_process.stdout)
|
||||||
if buf:
|
if buf:
|
||||||
self.logger.trace(
|
self.logger.debug(
|
||||||
"chrome pid %s STDOUT %s", self.chrome_process.pid, buf
|
"chrome pid %s STDOUT %s", self.chrome_process.pid, buf
|
||||||
)
|
)
|
||||||
|
|
||||||
buf = readline_nonblock(self.chrome_process.stderr)
|
buf = readline_nonblock(self.chrome_process.stderr)
|
||||||
if buf:
|
if buf:
|
||||||
self.logger.trace(
|
self.logger.debug(
|
||||||
"chrome pid %s STDERR %s", self.chrome_process.pid, buf
|
"chrome pid %s STDERR %s", self.chrome_process.pid, buf
|
||||||
)
|
)
|
||||||
except:
|
except:
|
||||||
self.logger.error("unexpected exception", exc_info=True)
|
self.logger.exception("unexpected exception")
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
if not self.chrome_process or self._shutdown.is_set():
|
if not self.chrome_process or self._shutdown.is_set():
|
||||||
return
|
return
|
||||||
self._shutdown.set()
|
self._shutdown.set()
|
||||||
|
|
||||||
|
pid_logger = self.logger.bind(pid=self.chrome_process.pid)
|
||||||
|
|
||||||
timeout_sec = 300
|
timeout_sec = 300
|
||||||
if self.chrome_process.poll() is None:
|
if self.chrome_process.poll() is None:
|
||||||
self.logger.info("terminating chrome pgid %s", self.chrome_process.pid)
|
pid_logger.info("terminating chrome")
|
||||||
|
|
||||||
os.killpg(self.chrome_process.pid, signal.SIGTERM)
|
os.killpg(self.chrome_process.pid, signal.SIGTERM)
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
|
@ -351,14 +345,11 @@ class Chrome:
|
||||||
status = self.chrome_process.poll()
|
status = self.chrome_process.poll()
|
||||||
if status is not None:
|
if status is not None:
|
||||||
if status == 0:
|
if status == 0:
|
||||||
self.logger.info(
|
pid_logger.info("chrome exited normally")
|
||||||
"chrome pid %s exited normally", self.chrome_process.pid
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
self.logger.warning(
|
pid_logger.warning(
|
||||||
"chrome pid %s exited with nonzero status %s",
|
"chrome exited with nonzero status",
|
||||||
self.chrome_process.pid,
|
status=status,
|
||||||
status,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# XXX I would like to forcefully kill the process group
|
# XXX I would like to forcefully kill the process group
|
||||||
|
@ -368,18 +359,16 @@ class Chrome:
|
||||||
return
|
return
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
|
|
||||||
self.logger.warning(
|
pid_logger.warning(
|
||||||
"chrome pid %s still alive %.1f seconds after sending "
|
"chrome still alive %.1f seconds after sending "
|
||||||
"SIGTERM, sending SIGKILL",
|
"SIGTERM, sending SIGKILL",
|
||||||
self.chrome_process.pid,
|
|
||||||
time.time() - t0,
|
time.time() - t0,
|
||||||
)
|
)
|
||||||
os.killpg(self.chrome_process.pid, signal.SIGKILL)
|
os.killpg(self.chrome_process.pid, signal.SIGKILL)
|
||||||
status = self.chrome_process.wait()
|
status = self.chrome_process.wait()
|
||||||
self.logger.warning(
|
pid_logger.warning(
|
||||||
"chrome pid %s reaped (status=%s) after killing with " "SIGKILL",
|
"chrome reaped after killing with " "SIGKILL",
|
||||||
self.chrome_process.pid,
|
status=status,
|
||||||
status,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
@ -388,8 +377,8 @@ class Chrome:
|
||||||
try:
|
try:
|
||||||
self._home_tmpdir.cleanup()
|
self._home_tmpdir.cleanup()
|
||||||
except:
|
except:
|
||||||
self.logger.error(
|
self.logger.exception(
|
||||||
"exception deleting %s", self._home_tmpdir, exc_info=True
|
"exception deleting self._home_tmpdir", tmpdir=self._home_tmpdir
|
||||||
)
|
)
|
||||||
self._out_reader_thread.join()
|
self._out_reader_thread.join()
|
||||||
self.chrome_process = None
|
self.chrome_process = None
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue