From b41393fac5353fe01154c90fc54ee532e149ec59 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Tue, 23 Jul 2024 17:57:14 -0700 Subject: [PATCH 1/7] yt_dlp 2024.7.16, websocket-client==1.8.0 --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 4238e71..e2e2269 100644 --- a/setup.py +++ b/setup.py @@ -66,10 +66,10 @@ setuptools.setup( }, install_requires=[ "PyYAML>=5.1", - "yt_dlp<2023.11.16", + "yt_dlp==2024.7.16", "reppy==0.3.4", "requests>=2.21", - "websocket-client>=0.39.0,<=0.48.0", + "websocket-client==1.8.0", "pillow>=5.2.0", "urlcanon>=0.1.dev23", "doublethink @ git+https://github.com/internetarchive/doublethink.git@Py311", From f06e0348eaf47f264698db3a8768ac981f311a28 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Tue, 23 Jul 2024 17:58:04 -0700 Subject: [PATCH 2/7] fix doc typos --- job-conf.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/job-conf.rst b/job-conf.rst index 08707b6..64432b0 100644 --- a/job-conf.rst +++ b/job-conf.rst @@ -3,7 +3,7 @@ Brozzler Job Configuration Jobs are used to brozzle multiple seeds and/or apply settings and scope rules, as defined byusing YAML files. At least one seed URL must be specified. -All other configurartions are optional. +All other configurations are optional. .. contents:: @@ -44,7 +44,7 @@ How inheritance works Most of the settings that apply to seeds can also be specified at the top level, in which case all seeds inherit those settings. If an option is specified both at the top level and at the seed level, the results are merged. -In cases of coflict, the seed-level value takes precedence. +In cases of conflict, the seed-level value takes precedence. In the example yaml above, ``warcprox_meta`` is specified at the top level and at the seed level for the seed http://one.example.org/. At the top level we From 704db5b957ce3b506863ead553d7caeafb6f534e Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Thu, 25 Jul 2024 14:57:00 -0700 Subject: [PATCH 3/7] update on_close for websocket-client update --- brozzler/browser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/brozzler/browser.py b/brozzler/browser.py index 7ae5828..ae845df 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -1,7 +1,7 @@ """ brozzler/browser.py - manages the browsers for brozzler -Copyright (C) 2014-2023 Internet Archive +Copyright (C) 2014-2024 Internet Archive Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -33,6 +33,7 @@ from brozzler.chrome import Chrome import socket import urlcanon +websocket.enableTrace(True) class BrowsingException(Exception): pass @@ -173,7 +174,7 @@ class WebsockReceiverThread(threading.Thread): def pop_result(self, msg_id): return self._result_messages.pop(msg_id) - def _on_close(self, websock): + def _on_close(self, websock, close_status_code, close_msg): pass # self.logger.info('GOODBYE GOODBYE WEBSOCKET') From 1ec4d0361a1a0e12c0a61fc42ef92e621442d8a5 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Thu, 25 Jul 2024 16:03:39 -0700 Subject: [PATCH 4/7] skip websockt.enableTrace for now --- brozzler/browser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/brozzler/browser.py b/brozzler/browser.py index ae845df..588dc40 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -33,7 +33,6 @@ from brozzler.chrome import Chrome import socket import urlcanon -websocket.enableTrace(True) class BrowsingException(Exception): pass @@ -139,6 +138,9 @@ class BrowserPool: return len(self._in_use) +# uncomment the next line for LOTS of debugging logging +# websocket.enableTrace(True) + class WebsockReceiverThread(threading.Thread): logger = logging.getLogger(__module__ + "." + __qualname__) From 81e640cd12dd89c9777b96b39f128c8d6836e76a Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Mon, 29 Jul 2024 13:47:11 -0700 Subject: [PATCH 5/7] black'd --- brozzler/browser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/brozzler/browser.py b/brozzler/browser.py index 588dc40..ca92ffa 100644 --- a/brozzler/browser.py +++ b/brozzler/browser.py @@ -141,6 +141,7 @@ class BrowserPool: # uncomment the next line for LOTS of debugging logging # websocket.enableTrace(True) + class WebsockReceiverThread(threading.Thread): logger = logging.getLogger(__module__ + "." + __qualname__) From 20841fb49dff6814b36f434c0ff407edb981a22d Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Mon, 29 Jul 2024 14:20:48 -0700 Subject: [PATCH 6/7] mv yt-dlp to extras & update --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e2e2269..a443f90 100644 --- a/setup.py +++ b/setup.py @@ -66,7 +66,6 @@ setuptools.setup( }, install_requires=[ "PyYAML>=5.1", - "yt_dlp==2024.7.16", "reppy==0.3.4", "requests>=2.21", "websocket-client==1.8.0", @@ -80,6 +79,7 @@ setuptools.setup( "python-magic>=0.4.15", ], extras_require={ + "yt-dlp": ["yt-dlp==2024.7.25"], "dashboard": ["flask>=1.0", "gunicorn>=19.8.1"], "easy": [ "warcprox>=2.4.31", From 124a390c110e1ff7a19329ef2c9869536b54eef7 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Mon, 29 Jul 2024 14:28:53 -0700 Subject: [PATCH 7/7] Revert "fix doc typos" This reverts commit f06e0348eaf47f264698db3a8768ac981f311a28. --- job-conf.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/job-conf.rst b/job-conf.rst index 64432b0..08707b6 100644 --- a/job-conf.rst +++ b/job-conf.rst @@ -3,7 +3,7 @@ Brozzler Job Configuration Jobs are used to brozzle multiple seeds and/or apply settings and scope rules, as defined byusing YAML files. At least one seed URL must be specified. -All other configurations are optional. +All other configurartions are optional. .. contents:: @@ -44,7 +44,7 @@ How inheritance works Most of the settings that apply to seeds can also be specified at the top level, in which case all seeds inherit those settings. If an option is specified both at the top level and at the seed level, the results are merged. -In cases of conflict, the seed-level value takes precedence. +In cases of coflict, the seed-level value takes precedence. In the example yaml above, ``warcprox_meta`` is specified at the top level and at the seed level for the seed http://one.example.org/. At the top level we