diff --git a/README.md b/README.md index e1ede93..a33a98f 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,13 @@ requires python 3 on linux, OSX, or Windows cd reddit-html-archiver chmod u+x *.py +Windows users may need to run + + chcp 65001 + set PYTHONIOENCODING=utf-8 + +before running `fetch_links.py` or `write_html.py` to resolve encoding errors such as 'codec can't encode character'. + ### fetch reddit data data is fetched by subreddit and date range and is stored as csv files in `data`. @@ -44,15 +51,6 @@ to update an html archive, delete everything in `r` aside from `r/static` and re copy the contents of the `r` directory to a web root or appropriately served git repo. -### Windows users - -Windows users may need to run the following before running `fetch_links.py` or `write_html.py`: - - chcp 65001 - set PYTHONIOENCODING=utf-8 - -This will resolve errors encoding errors such as 'codec can't encode character'. - ### potential improvements * fetch_links @@ -62,15 +60,16 @@ This will resolve errors encoding errors such as 'codec can't encode character'. * score update * scores from reddit with [praw](https://github.com/praw-dev/praw) * real templating -* filter output per sub, individual min score and comments filters -* js markdown url previews -* js powered search page, show no links by default -* link and comments pages - * view on reddit.com - * show link domains +* choose [Bootswatch](https://bootswatch.com/) theme +* specify subreddits to output +* show link domain/post type * user pages * add pagination, posts sorted by score, comments, date, sub * too many files in one directory +* view on reddit.com +* js powered search page, show no links by default +* js inline media embeds/expandos +* archive.org links ### see also diff --git a/fetch_links.py b/fetch_links.py index e7bdc8c..ec77bcc 100755 --- a/fetch_links.py +++ b/fetch_links.py @@ -214,4 +214,6 @@ if __name__ == '__main__': if args.self_only: self_only = True + args.subreddit = args.subreddit.lower() + fetch_links(args.subreddit, args.date_start, args.date_stop, args.limit, args.score, self_only) diff --git a/write_html.py b/write_html.py index fa47ca8..fdfbbb2 100755 --- a/write_html.py +++ b/write_html.py @@ -163,7 +163,7 @@ def write_subreddit_pages(subreddit, subs, link_index, stat_sub_filtered_links, author_link_html = author_link_html.replace('###URL_AUTHOR###', author_url).replace('###AUTHOR###', l['author']) link_url = l['url'] - link_comments_url = sort_based_prefix + l['permalink'].strip('/') + link_comments_url = sort_based_prefix + l['permalink'].lower().strip('/') link_comments_url = link_comments_url.replace('r/' + subreddit + '/', '') idpath = '/'.join(list(l['id'])) link_comments_url = link_comments_url.replace(l['id'], idpath) @@ -232,7 +232,7 @@ def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False) # reddit: https://www.reddit.com/r/conspiracy/comments/8742iv/happening_now_classmate_former_friend_of/ # archive: r/conspiracy/comments/8/7/4/2/i/v/happening_now_classmate_former_friend_of.html idpath = '/'.join(list(link['id'])) - filepath = link['permalink'].strip('/') + '.html' + filepath = link['permalink'].lower().strip('/') + '.html' filepath = filepath.replace(link['id'], idpath) if os.path.isfile(filepath): return True @@ -325,7 +325,7 @@ def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False) # reddit: https://www.reddit.com/r/conspiracy/comments/8742iv/happening_now_classmate_former_friend_of/ # archive: r/conspiracy/comments/8/7/4/2/i/v/happening_now_classmate_former_friend_of.html idpath = '/'.join(list(link['id'])) - filepath = link['permalink'].strip('/') + '.html' + filepath = link['permalink'].lower().strip('/') + '.html' filepath = filepath.replace(link['id'], idpath) if not os.path.isfile(filepath): os.makedirs(os.path.dirname(filepath), exist_ok=True) @@ -350,7 +350,7 @@ def write_subreddit_search_page(subreddit, subs, link_index, stat_sub_filtered_l links_html = '' for l in links: - link_comments_url = l['permalink'].strip('/').replace('r/' + subreddit + '/', '') + link_comments_url = l['permalink'].lower().strip('/').replace('r/' + subreddit + '/', '') idpath = '/'.join(list(l['id'])) link_comments_url = link_comments_url.replace(l['id'], idpath) link_comments_url += '.html' @@ -417,7 +417,7 @@ def write_user_page(subs, user_index): author_url = l['author'] + '.html' author_link_html = author_link_html.replace('###URL_AUTHOR###', author_url).replace('###AUTHOR###', l['author']) - link_comments_url = '../' + l['permalink'].strip('/').strip('r/') + link_comments_url = '../' + l['permalink'].lower().strip('/').strip('r/') idpath = '/'.join(list(l['id'])) link_comments_url = link_comments_url.replace(l['id'], idpath) link_comments_url += '.html' @@ -618,7 +618,7 @@ def get_subs(): return subs for d in os.listdir('data'): if os.path.isdir('data' + '/' + d): - subs.append(d) + subs.append(d.lower()) return subs def get_pager_html(page_num=1, pages=1):