fix subreddit name casing bugs, resolves #3

2025-09-21 13:14:48 -04:00 · 2019-04-09 02:59:18 -07:00 · 2019-04-09 02:59:18 -07:00 · 83fb77b41c
commit 83fb77b41c
parent 43a93e1a04
3 changed files with 22 additions and 21 deletions
--- a/README.md
+++ b/README.md
@ -16,6 +16,13 @@ requires python 3 on linux, OSX, or Windows
    cd reddit-html-archiver
    chmod u+x *.py

+Windows users may need to run
+
+    chcp 65001
+    set PYTHONIOENCODING=utf-8
+
+before running `fetch_links.py` or `write_html.py` to resolve encoding errors such as 'codec can't encode character'.
+
 ### fetch reddit data

 data is fetched by subreddit and date range and is stored as csv files in `data`.
@ -44,15 +51,6 @@ to update an html archive, delete everything in `r` aside from `r/static` and re

 copy the contents of the `r` directory to a web root or appropriately served git repo.

-### Windows users
-
-Windows users may need to run the following before running `fetch_links.py` or `write_html.py`:
-
-    chcp 65001
-    set PYTHONIOENCODING=utf-8
-
-This will resolve errors encoding errors such as 'codec can't encode character'.
-
 ### potential improvements

 * fetch_links
@ -62,15 +60,16 @@ This will resolve errors encoding errors such as 'codec can't encode character'.
  * score update
  * scores from reddit with [praw](https://github.com/praw-dev/praw)
 * real templating
-* filter output per sub, individual min score and comments filters
-* js markdown url previews
-* js powered search page, show no links by default
-* link and comments pages
-  * view on reddit.com
-  * show link domains
+* choose [Bootswatch](https://bootswatch.com/) theme
+* specify subreddits to output
+* show link domain/post type
 * user pages
  * add pagination, posts sorted by score, comments, date, sub
  * too many files in one directory
+* view on reddit.com
+* js powered search page, show no links by default
+* js inline media embeds/expandos
+* archive.org links

 ### see also

--- a/fetch_links.py
+++ b/fetch_links.py
@ -214,4 +214,6 @@ if __name__ == '__main__':
    if args.self_only:
        self_only = True

+    args.subreddit = args.subreddit.lower()
+
    fetch_links(args.subreddit, args.date_start, args.date_stop, args.limit, args.score, self_only)
--- a/write_html.py
+++ b/write_html.py
@ -163,7 +163,7 @@ def write_subreddit_pages(subreddit, subs, link_index, stat_sub_filtered_links,
                author_link_html = author_link_html.replace('###URL_AUTHOR###', author_url).replace('###AUTHOR###', l['author'])

                link_url = l['url']
-                link_comments_url = sort_based_prefix + l['permalink'].strip('/')
+                link_comments_url = sort_based_prefix + l['permalink'].lower().strip('/')
                link_comments_url = link_comments_url.replace('r/' + subreddit + '/', '')
                idpath = '/'.join(list(l['id']))
                link_comments_url = link_comments_url.replace(l['id'], idpath)
@ -232,7 +232,7 @@ def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False)
    # reddit:  https://www.reddit.com/r/conspiracy/comments/8742iv/happening_now_classmate_former_friend_of/
    # archive: r/conspiracy/comments/8/7/4/2/i/v/happening_now_classmate_former_friend_of.html
    idpath = '/'.join(list(link['id']))
-    filepath = link['permalink'].strip('/') + '.html'
+    filepath = link['permalink'].lower().strip('/') + '.html'
    filepath = filepath.replace(link['id'], idpath)
    if os.path.isfile(filepath):
        return True
@ -325,7 +325,7 @@ def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False)
    # reddit:  https://www.reddit.com/r/conspiracy/comments/8742iv/happening_now_classmate_former_friend_of/
    # archive: r/conspiracy/comments/8/7/4/2/i/v/happening_now_classmate_former_friend_of.html
    idpath = '/'.join(list(link['id']))
-    filepath = link['permalink'].strip('/') + '.html'
+    filepath = link['permalink'].lower().strip('/') + '.html'
    filepath = filepath.replace(link['id'], idpath)
    if not os.path.isfile(filepath):
        os.makedirs(os.path.dirname(filepath), exist_ok=True)
@ -350,7 +350,7 @@ def write_subreddit_search_page(subreddit, subs, link_index, stat_sub_filtered_l

    links_html = ''
    for l in links:
-        link_comments_url = l['permalink'].strip('/').replace('r/' + subreddit + '/', '')
+        link_comments_url = l['permalink'].lower().strip('/').replace('r/' + subreddit + '/', '')
        idpath = '/'.join(list(l['id']))
        link_comments_url = link_comments_url.replace(l['id'], idpath)
        link_comments_url += '.html'
@ -417,7 +417,7 @@ def write_user_page(subs, user_index):
            author_url = l['author'] + '.html'
            author_link_html = author_link_html.replace('###URL_AUTHOR###', author_url).replace('###AUTHOR###', l['author'])

-            link_comments_url = '../' + l['permalink'].strip('/').strip('r/')
+            link_comments_url = '../' + l['permalink'].lower().strip('/').strip('r/')
            idpath = '/'.join(list(l['id']))
            link_comments_url = link_comments_url.replace(l['id'], idpath)
            link_comments_url += '.html'
@ -618,7 +618,7 @@ def get_subs():
        return subs
    for d in os.listdir('data'):
        if os.path.isdir('data' + '/' + d):
-            subs.append(d)
+            subs.append(d.lower())
    return subs

 def get_pager_html(page_num=1, pages=1):