improve Windows unicode handling

This commit is contained in:
libertysoft3 2019-03-26 22:53:16 -07:00
parent 8385bb6f66
commit ceaa6b86a9
2 changed files with 16 additions and 7 deletions

View file

@ -4,7 +4,7 @@ pulls reddit data from the [pushshift](https://github.com/pushshift/api) api and
### install ### install
requires python 3 requires python 3 on linux, OSX, or Windows
sudo apt-get install pip sudo apt-get install pip
pip install psaw pip install psaw
@ -44,6 +44,15 @@ to update an html archive, delete everything in `r` aside from `r/static` and re
copy the contents of the `r` directory to a web root or appropriately served git repo. copy the contents of the `r` directory to a web root or appropriately served git repo.
### Windows users
Windows users may need to run the following before running `fetch_links.py` or `write_html.py`:
chcp 65001
set PYTHONIOENCODING=utf-8
This will resolve errors encoding errors such as 'codec can't encode character'.
### potential improvements ### potential improvements
* fetch_links * fetch_links

View file

@ -120,17 +120,17 @@ def write_links(subreddit, links):
filename = 'links.csv' filename = 'links.csv'
filepath = path + '/' + filename filepath = path + '/' + filename
if not os.path.isfile(filepath): if not os.path.isfile(filepath):
file = open(filepath, 'a') file = open(filepath, 'a', encoding='utf-8')
writer = csv.DictWriter(file, fieldnames=link_fields) writer = csv.DictWriter(file, fieldnames=link_fields)
writer.writeheader() writer.writeheader()
# print('created %s' % filepath) # print('created %s' % filepath)
else: else:
with open(filepath, 'r') as file: with open(filepath, 'r', encoding='utf-8') as file:
reader = csv.DictReader(file) reader = csv.DictReader(file)
for row in reader: for row in reader:
existing_link_ids.append(row['id']) existing_link_ids.append(row['id'])
file = open(filepath, 'a') file = open(filepath, 'a', encoding='utf-8')
writer = csv.DictWriter(file, fieldnames=link_fields) writer = csv.DictWriter(file, fieldnames=link_fields)
# create and parse existing comments # create and parse existing comments
@ -138,17 +138,17 @@ def write_links(subreddit, links):
filename = r['id'] + '.csv' filename = r['id'] + '.csv'
filepath = path + '/' + filename filepath = path + '/' + filename
if not os.path.isfile(filepath): if not os.path.isfile(filepath):
comments_file = open(filepath, 'a') comments_file = open(filepath, 'a', encoding='utf-8')
comments_writer = csv.DictWriter(comments_file, fieldnames=comment_fields) comments_writer = csv.DictWriter(comments_file, fieldnames=comment_fields)
comments_writer.writeheader() comments_writer.writeheader()
# print('created %s' % filepath) # print('created %s' % filepath)
else: else:
with open(filepath, 'r') as comments_file: with open(filepath, 'r', encoding='utf-8') as comments_file:
reader = csv.DictReader(comments_file) reader = csv.DictReader(comments_file)
for row in reader: for row in reader:
existing_comment_ids.append(row['id']) existing_comment_ids.append(row['id'])
comments_file = open(filepath, 'a') comments_file = open(filepath, 'a', encoding='utf-8')
comments_writer = csv.DictWriter(comments_file, fieldnames=comment_fields) comments_writer = csv.DictWriter(comments_file, fieldnames=comment_fields)
# write link row # write link row