diff --git a/README.md b/README.md index 2d974b8..db7063e 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ pulls reddit data from the [pushshift](https://github.com/pushshift/api) api and ### install -requires python 3 +requires python 3 on linux, OSX, or Windows sudo apt-get install pip pip install psaw @@ -44,6 +44,15 @@ to update an html archive, delete everything in `r` aside from `r/static` and re copy the contents of the `r` directory to a web root or appropriately served git repo. +### Windows users + +Windows users may need to run the following before running `fetch_links.py` or `write_html.py`: + + chcp 65001 + set PYTHONIOENCODING=utf-8 + +This will resolve errors encoding errors such as 'codec can't encode character'. + ### potential improvements * fetch_links diff --git a/fetch_links.py b/fetch_links.py index b2e3416..e7bdc8c 100755 --- a/fetch_links.py +++ b/fetch_links.py @@ -120,17 +120,17 @@ def write_links(subreddit, links): filename = 'links.csv' filepath = path + '/' + filename if not os.path.isfile(filepath): - file = open(filepath, 'a') + file = open(filepath, 'a', encoding='utf-8') writer = csv.DictWriter(file, fieldnames=link_fields) writer.writeheader() # print('created %s' % filepath) else: - with open(filepath, 'r') as file: + with open(filepath, 'r', encoding='utf-8') as file: reader = csv.DictReader(file) for row in reader: existing_link_ids.append(row['id']) - file = open(filepath, 'a') + file = open(filepath, 'a', encoding='utf-8') writer = csv.DictWriter(file, fieldnames=link_fields) # create and parse existing comments @@ -138,17 +138,17 @@ def write_links(subreddit, links): filename = r['id'] + '.csv' filepath = path + '/' + filename if not os.path.isfile(filepath): - comments_file = open(filepath, 'a') + comments_file = open(filepath, 'a', encoding='utf-8') comments_writer = csv.DictWriter(comments_file, fieldnames=comment_fields) comments_writer.writeheader() # print('created %s' % filepath) else: - with open(filepath, 'r') as comments_file: + with open(filepath, 'r', encoding='utf-8') as comments_file: reader = csv.DictReader(comments_file) for row in reader: existing_comment_ids.append(row['id']) - comments_file = open(filepath, 'a') + comments_file = open(filepath, 'a', encoding='utf-8') comments_writer = csv.DictWriter(comments_file, fieldnames=comment_fields) # write link row