From d749b3b31495a18eeaf5d022c1691696435ba90d Mon Sep 17 00:00:00 2001 From: gfw-report Date: Sat, 26 Mar 2022 05:21:57 +0000 Subject: [PATCH] Use the latest Chrome user agent to bypass NDSS's HTTP 403. --- fetch_pdfs.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fetch_pdfs.py b/fetch_pdfs.py index 77b84be..aef4c20 100755 --- a/fetch_pdfs.py +++ b/fetch_pdfs.py @@ -34,7 +34,8 @@ def download_pdf(url, file_name): print("Now fetching %s" % url) try: - fetched_file = urllib.request.urlopen(url) + req = urllib.request.Request(url, headers={'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36"}) + fetched_file = urllib.request.urlopen(req) except Exception as err: print(url, err, file=sys.stderr) return