From c288c9ae98dab6eff740ab18cc25bea09342773f Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Wed, 6 Feb 2019 16:22:10 +0000 Subject: [PATCH 1/4] Add disk cache options to Chrome Add `Chrome` options `disk_cache` and `disk_cache_size` which add chromium options `--disk-cache-dir=` and `--disk-cache-size=N` (bytes). The default is to use `--disable-cache` (no disk caching). There are two ways to use the new vars, if you just use `Chrome(disk_cache=True)` the chromium cli option `--disable-cache` is NOT used and chromium writes disk cache inside profile dir. If you use `Chrome(disk_cache='/tmp/custom_dir', disk_cache_size=10000)` chromium will use `--disk-cache-dir=/tmp/custom_dir --disk-cache-size=10000`. --- brozzler/chrome.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/brozzler/chrome.py b/brozzler/chrome.py index 7423328..0036c1b 100644 --- a/brozzler/chrome.py +++ b/brozzler/chrome.py @@ -62,7 +62,8 @@ def check_version(chrome_exe): class Chrome: logger = logging.getLogger(__module__ + '.' + __qualname__) - def __init__(self, chrome_exe, port=9222, ignore_cert_errors=False): + def __init__(self, chrome_exe, port=9222, ignore_cert_errors=False, + disk_cache=None, disk_cache_size=None): ''' Initializes instance of this class. @@ -79,6 +80,8 @@ class Chrome: self.ignore_cert_errors = ignore_cert_errors self._shutdown = threading.Event() self.chrome_process = None + self.disk_cache = disk_cache + self.disk_cache_size = disk_cache_size def __enter__(self): ''' @@ -134,7 +137,8 @@ class Chrome: cookie_location, exc_info=True) return cookie_db - def start(self, proxy=None, cookie_db=None): + def start(self, proxy=None, cookie_db=None, disk_cache=None, + disk_cache_size=None): ''' Starts chrome/chromium process. @@ -144,7 +148,12 @@ class Chrome: which, if supplied, will be written to {chrome_user_data_dir}/Default/Cookies before running the browser (default None) - + disk_cache: use disk cache. If True, use default cache location inside + `self._home_tmpdir`. If its a string, try to use that path for + disk cache (default None) + disk_cache_size: Forces the maximum disk space to be used by the disk + cache, in bytes. Used only when `cache` is a disk path. + (default None) Returns: websocket url to chrome window with about:blank loaded ''' @@ -154,6 +163,10 @@ class Chrome: self._home_tmpdir.name, 'chrome-user-data') if cookie_db: self._init_cookie_db(cookie_db) + if disk_cache: + self.disk_cache = disk_cache + if disk_cache_size: + self.disk_cache_size = disk_cache_size self._shutdown.clear() new_env = os.environ.copy() @@ -166,12 +179,22 @@ class Chrome: '--disable-background-networking', '--disable-renderer-backgrounding', '--disable-hang-monitor', '--disable-background-timer-throttling', '--mute-audio', - '--disable-web-sockets', '--disable-cache', + '--disable-web-sockets', '--window-size=1100,900', '--no-default-browser-check', '--disable-first-run-ui', '--no-first-run', '--homepage=about:blank', '--disable-direct-npapi-requests', '--disable-web-security', '--disable-notifications', '--disable-extensions', '--disable-save-password-bubble'] + + if self.disk_cache: + if isinstance(self.disk_cache, str): + chrome_args.append('--disk-cache-dir=%s' % self.disk_cache) + if self.disk_cache_size: + chrome_args.append('--disk-cache-size=%s' % + self.disk_cache_size) + else: + chrome_args.append('--disable-cache') + if self.ignore_cert_errors: chrome_args.append('--ignore-certificate-errors') if proxy: From 31e611771ea6c5608fadcc10f149b03ef8a77fa4 Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Thu, 7 Feb 2019 07:42:45 +0000 Subject: [PATCH 2/4] Improve disk cache options Remove `--disable-cache`, its not used any more. Rename `disk_cache` to `disk_cache_dir` and use only path (str) argument. Decouple `--disk-cache-size` from `--disk-cache-dir` so it is possible to use either or both. --- brozzler/chrome.py | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/brozzler/chrome.py b/brozzler/chrome.py index 0036c1b..d83b7b1 100644 --- a/brozzler/chrome.py +++ b/brozzler/chrome.py @@ -63,7 +63,7 @@ class Chrome: logger = logging.getLogger(__module__ + '.' + __qualname__) def __init__(self, chrome_exe, port=9222, ignore_cert_errors=False, - disk_cache=None, disk_cache_size=None): + disk_cache_dir=None, disk_cache_size=None): ''' Initializes instance of this class. @@ -80,7 +80,7 @@ class Chrome: self.ignore_cert_errors = ignore_cert_errors self._shutdown = threading.Event() self.chrome_process = None - self.disk_cache = disk_cache + self.disk_cache_dir = disk_cache_dir self.disk_cache_size = disk_cache_size def __enter__(self): @@ -137,7 +137,7 @@ class Chrome: cookie_location, exc_info=True) return cookie_db - def start(self, proxy=None, cookie_db=None, disk_cache=None, + def start(self, proxy=None, cookie_db=None, disk_cache_dir=None, disk_cache_size=None): ''' Starts chrome/chromium process. @@ -148,9 +148,8 @@ class Chrome: which, if supplied, will be written to {chrome_user_data_dir}/Default/Cookies before running the browser (default None) - disk_cache: use disk cache. If True, use default cache location inside - `self._home_tmpdir`. If its a string, try to use that path for - disk cache (default None) + disk_cache_dir: use directory for disk cache. The default location + is inside `self._home_tmpdir` (default None). disk_cache_size: Forces the maximum disk space to be used by the disk cache, in bytes. Used only when `cache` is a disk path. (default None) @@ -163,8 +162,8 @@ class Chrome: self._home_tmpdir.name, 'chrome-user-data') if cookie_db: self._init_cookie_db(cookie_db) - if disk_cache: - self.disk_cache = disk_cache + if disk_cache_dir: + self.disk_cache_dir = disk_cache_dir if disk_cache_size: self.disk_cache_size = disk_cache_size self._shutdown.clear() @@ -186,15 +185,10 @@ class Chrome: '--disable-web-security', '--disable-notifications', '--disable-extensions', '--disable-save-password-bubble'] - if self.disk_cache: - if isinstance(self.disk_cache, str): - chrome_args.append('--disk-cache-dir=%s' % self.disk_cache) - if self.disk_cache_size: - chrome_args.append('--disk-cache-size=%s' % - self.disk_cache_size) - else: - chrome_args.append('--disable-cache') - + if self.disk_cache_dir: + chrome_args.append('--disk-cache-dir=%s' % self.disk_cache_dir) + if self.disk_cache_size: + chrome_args.append('--disk-cache-size=%s' % self.disk_cache_size) if self.ignore_cert_errors: chrome_args.append('--ignore-certificate-errors') if proxy: From adeca823dd28e6a613f87638395f99174442b80b Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Tue, 12 Feb 2019 07:21:44 +0000 Subject: [PATCH 3/4] Remove stale comment --- brozzler/chrome.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/brozzler/chrome.py b/brozzler/chrome.py index d83b7b1..cb69bda 100644 --- a/brozzler/chrome.py +++ b/brozzler/chrome.py @@ -151,8 +151,7 @@ class Chrome: disk_cache_dir: use directory for disk cache. The default location is inside `self._home_tmpdir` (default None). disk_cache_size: Forces the maximum disk space to be used by the disk - cache, in bytes. Used only when `cache` is a disk path. - (default None) + cache, in bytes. (default None) Returns: websocket url to chrome window with about:blank loaded ''' From 9c48a6fa11122ca0ba4df339a721ebf33b67ccdd Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Tue, 12 Feb 2019 20:59:08 +0000 Subject: [PATCH 4/4] Use disk cache params only on Chrome.start Use `disk_cache_dir` and `disk_cache_size` only on `Chrome.start` and not on `Chrome.__init__`. Drop `disk_cache_dir` and `disk_cache_size` class attributes. --- brozzler/chrome.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/brozzler/chrome.py b/brozzler/chrome.py index cb69bda..5928586 100644 --- a/brozzler/chrome.py +++ b/brozzler/chrome.py @@ -62,8 +62,7 @@ def check_version(chrome_exe): class Chrome: logger = logging.getLogger(__module__ + '.' + __qualname__) - def __init__(self, chrome_exe, port=9222, ignore_cert_errors=False, - disk_cache_dir=None, disk_cache_size=None): + def __init__(self, chrome_exe, port=9222, ignore_cert_errors=False): ''' Initializes instance of this class. @@ -80,8 +79,6 @@ class Chrome: self.ignore_cert_errors = ignore_cert_errors self._shutdown = threading.Event() self.chrome_process = None - self.disk_cache_dir = disk_cache_dir - self.disk_cache_size = disk_cache_size def __enter__(self): ''' @@ -161,10 +158,6 @@ class Chrome: self._home_tmpdir.name, 'chrome-user-data') if cookie_db: self._init_cookie_db(cookie_db) - if disk_cache_dir: - self.disk_cache_dir = disk_cache_dir - if disk_cache_size: - self.disk_cache_size = disk_cache_size self._shutdown.clear() new_env = os.environ.copy() @@ -184,10 +177,10 @@ class Chrome: '--disable-web-security', '--disable-notifications', '--disable-extensions', '--disable-save-password-bubble'] - if self.disk_cache_dir: - chrome_args.append('--disk-cache-dir=%s' % self.disk_cache_dir) - if self.disk_cache_size: - chrome_args.append('--disk-cache-size=%s' % self.disk_cache_size) + if disk_cache_dir: + chrome_args.append('--disk-cache-dir=%s' % disk_cache_dir) + if disk_cache_size: + chrome_args.append('--disk-cache-size=%s' % disk_cache_size) if self.ignore_cert_errors: chrome_args.append('--ignore-certificate-errors') if proxy: