fix subreddit name casing bugs, resolves #3

This commit is contained in:
libertysoft3 2019-04-09 02:59:18 -07:00
parent 43a93e1a04
commit 83fb77b41c
3 changed files with 22 additions and 21 deletions

View file

@ -16,6 +16,13 @@ requires python 3 on linux, OSX, or Windows
cd reddit-html-archiver
chmod u+x *.py
Windows users may need to run
chcp 65001
set PYTHONIOENCODING=utf-8
before running `fetch_links.py` or `write_html.py` to resolve encoding errors such as 'codec can't encode character'.
### fetch reddit data
data is fetched by subreddit and date range and is stored as csv files in `data`.
@ -44,15 +51,6 @@ to update an html archive, delete everything in `r` aside from `r/static` and re
copy the contents of the `r` directory to a web root or appropriately served git repo.
### Windows users
Windows users may need to run the following before running `fetch_links.py` or `write_html.py`:
chcp 65001
set PYTHONIOENCODING=utf-8
This will resolve errors encoding errors such as 'codec can't encode character'.
### potential improvements
* fetch_links
@ -62,15 +60,16 @@ This will resolve errors encoding errors such as 'codec can't encode character'.
* score update
* scores from reddit with [praw](https://github.com/praw-dev/praw)
* real templating
* filter output per sub, individual min score and comments filters
* js markdown url previews
* js powered search page, show no links by default
* link and comments pages
* view on reddit.com
* show link domains
* choose [Bootswatch](https://bootswatch.com/) theme
* specify subreddits to output
* show link domain/post type
* user pages
* add pagination, posts sorted by score, comments, date, sub
* too many files in one directory
* view on reddit.com
* js powered search page, show no links by default
* js inline media embeds/expandos
* archive.org links
### see also

View file

@ -214,4 +214,6 @@ if __name__ == '__main__':
if args.self_only:
self_only = True
args.subreddit = args.subreddit.lower()
fetch_links(args.subreddit, args.date_start, args.date_stop, args.limit, args.score, self_only)

View file

@ -163,7 +163,7 @@ def write_subreddit_pages(subreddit, subs, link_index, stat_sub_filtered_links,
author_link_html = author_link_html.replace('###URL_AUTHOR###', author_url).replace('###AUTHOR###', l['author'])
link_url = l['url']
link_comments_url = sort_based_prefix + l['permalink'].strip('/')
link_comments_url = sort_based_prefix + l['permalink'].lower().strip('/')
link_comments_url = link_comments_url.replace('r/' + subreddit + '/', '')
idpath = '/'.join(list(l['id']))
link_comments_url = link_comments_url.replace(l['id'], idpath)
@ -232,7 +232,7 @@ def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False)
# reddit: https://www.reddit.com/r/conspiracy/comments/8742iv/happening_now_classmate_former_friend_of/
# archive: r/conspiracy/comments/8/7/4/2/i/v/happening_now_classmate_former_friend_of.html
idpath = '/'.join(list(link['id']))
filepath = link['permalink'].strip('/') + '.html'
filepath = link['permalink'].lower().strip('/') + '.html'
filepath = filepath.replace(link['id'], idpath)
if os.path.isfile(filepath):
return True
@ -325,7 +325,7 @@ def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False)
# reddit: https://www.reddit.com/r/conspiracy/comments/8742iv/happening_now_classmate_former_friend_of/
# archive: r/conspiracy/comments/8/7/4/2/i/v/happening_now_classmate_former_friend_of.html
idpath = '/'.join(list(link['id']))
filepath = link['permalink'].strip('/') + '.html'
filepath = link['permalink'].lower().strip('/') + '.html'
filepath = filepath.replace(link['id'], idpath)
if not os.path.isfile(filepath):
os.makedirs(os.path.dirname(filepath), exist_ok=True)
@ -350,7 +350,7 @@ def write_subreddit_search_page(subreddit, subs, link_index, stat_sub_filtered_l
links_html = ''
for l in links:
link_comments_url = l['permalink'].strip('/').replace('r/' + subreddit + '/', '')
link_comments_url = l['permalink'].lower().strip('/').replace('r/' + subreddit + '/', '')
idpath = '/'.join(list(l['id']))
link_comments_url = link_comments_url.replace(l['id'], idpath)
link_comments_url += '.html'
@ -417,7 +417,7 @@ def write_user_page(subs, user_index):
author_url = l['author'] + '.html'
author_link_html = author_link_html.replace('###URL_AUTHOR###', author_url).replace('###AUTHOR###', l['author'])
link_comments_url = '../' + l['permalink'].strip('/').strip('r/')
link_comments_url = '../' + l['permalink'].lower().strip('/').strip('r/')
idpath = '/'.join(list(l['id']))
link_comments_url = link_comments_url.replace(l['id'], idpath)
link_comments_url += '.html'
@ -618,7 +618,7 @@ def get_subs():
return subs
for d in os.listdir('data'):
if os.path.isdir('data' + '/' + d):
subs.append(d)
subs.append(d.lower())
return subs
def get_pager_html(page_num=1, pages=1):