improve Windows unicode handling

2025-06-30 17:27:07 -04:00 · 2019-03-26 22:53:16 -07:00 · 2019-03-26 22:53:16 -07:00 · ceaa6b86a9
commit ceaa6b86a9
parent 8385bb6f66
2 changed files with 16 additions and 7 deletions
--- a/README.md
+++ b/README.md
@ -4,7 +4,7 @@ pulls reddit data from the [pushshift](https://github.com/pushshift/api) api and
 ### install
-requires python 3
+requires python 3 on linux, OSX, or Windows
    sudo apt-get install pip
    pip install psaw
@ -44,6 +44,15 @@ to update an html archive, delete everything in `r` aside from `r/static` and re
 copy the contents of the `r` directory to a web root or appropriately served git repo.
 ### Windows users
 Windows users may need to run the following before running `fetch_links.py` or `write_html.py`:
    chcp 65001
    set PYTHONIOENCODING=utf-8
 This will resolve errors encoding errors such as 'codec can't encode character'.
 ### potential improvements
 * fetch_links
--- a/fetch_links.py
+++ b/fetch_links.py
@ -120,17 +120,17 @@ def write_links(subreddit, links):
                filename = 'links.csv'
                filepath = path + '/' + filename
                if not os.path.isfile(filepath):
-                    file = open(filepath, 'a')
+                    file = open(filepath, 'a', encoding='utf-8')
                    writer = csv.DictWriter(file, fieldnames=link_fields)
                    writer.writeheader()
                    # print('created %s' % filepath)
                else:
-                    with open(filepath, 'r') as file:
+                    with open(filepath, 'r', encoding='utf-8') as file:
                        reader = csv.DictReader(file)
                        for row in reader:
                            existing_link_ids.append(row['id'])
-                    file = open(filepath, 'a')
+                    file = open(filepath, 'a', encoding='utf-8')
                    writer = csv.DictWriter(file, fieldnames=link_fields)
            # create and parse existing comments
@ -138,17 +138,17 @@ def write_links(subreddit, links):
            filename = r['id'] + '.csv'
            filepath = path + '/' + filename
            if not os.path.isfile(filepath):
-                comments_file = open(filepath, 'a')
+                comments_file = open(filepath, 'a', encoding='utf-8')
                comments_writer = csv.DictWriter(comments_file, fieldnames=comment_fields)
                comments_writer.writeheader()
                # print('created %s' % filepath)
            else:
-                with open(filepath, 'r') as comments_file:
+                with open(filepath, 'r', encoding='utf-8') as comments_file:
                    reader = csv.DictReader(comments_file)
                    for row in reader:
                        existing_comment_ids.append(row['id'])
-                comments_file = open(filepath, 'a')
+                comments_file = open(filepath, 'a', encoding='utf-8')
                comments_writer = csv.DictWriter(comments_file, fieldnames=comment_fields)
            # write link row