From 67a7356ede4255a2a6adb4c2b7b44a58b44808e1 Mon Sep 17 00:00:00 2001 From: libertysoft3 Date: Sun, 28 Oct 2018 22:22:20 -0700 Subject: [PATCH] comments without scores show as 'n/a' --- README.md | 21 ++++++++++++--------- write_html.py | 15 ++++++++------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 469fcae..2d974b8 100644 --- a/README.md +++ b/README.md @@ -16,12 +16,12 @@ requires python 3 cd reddit-html-archiver chmod u+x *.py -### fetch reddit data from pushshift +### fetch reddit data -data is fetched by subreddit and date range. +data is fetched by subreddit and date range and is stored as csv files in `data`. ./fetch_links.py politics 2017-1-1 2017-2-1 - # or add some link/post request parameters + # or add some link/post request filters ./fetch_links.py --self_only --score "> 2000" politics 2015-1-1 2016-1-1 ./fetch_links.py -h @@ -29,19 +29,20 @@ you may need decrease your date range or adjust `pushshift_rate_limit_per_minute ### write web pages -write html files for all subreddits. +write html files for all subreddits to `r`. ./write_html.py # or add some output filtering ./write_html.py --min-score 100 --min-comments 100 --hide-deleted-comments ./write_html.py -h - -if you add more data later, delete everything in `r` aside from `r/static` and re-run the script to refresh your archive's pages. +your html archive has been written to `r`. once you are satisfied with your archive feel free to copy/move the contents of `r` to elsewhere and to delete the git repos you have created. everything in `r` is fully self contained. + +to update an html archive, delete everything in `r` aside from `r/static` and re-run `write_html.py` to regenerate everything. ### hosting the archived pages -copy the contents of the `r` directory to a web root or appropriately served git repo. or serve it directly. +copy the contents of the `r` directory to a web root or appropriately served git repo. ### potential improvements @@ -49,7 +50,9 @@ copy the contents of the `r` directory to a web root or appropriately served git * num_comments filtering * thumbnails or thumbnail urls * media posts - * update scores from the reddit api with [praw](https://github.com/praw-dev/praw) + * score update + * scores from reddit with [praw](https://github.com/praw-dev/praw) +* view on reddit.com * real templating * filter output per sub, individual min score and comments filters * js markdown url previews @@ -60,7 +63,7 @@ copy the contents of the `r` directory to a web root or appropriately served git ### see also -* [pushshift](https://github.com/pushshift/api) [subreddit](https://www.reddit.com/r/pushshift/) +* [pushshift](https://github.com/pushshift/api), [r/pushshift](https://www.reddit.com/r/pushshift/) * [psaw](https://github.com/dmarx/psaw) * [snudown](https://github.com/reddit/snudown) * [redditsearch.io](https://redditsearch.io/) diff --git a/write_html.py b/write_html.py index 89efffc..507a2f3 100755 --- a/write_html.py +++ b/write_html.py @@ -30,6 +30,7 @@ sort_indexes = { 'slug': 'date', } } +missing_comment_score_label = 'n/a' template_index = '' with open('templates/index.html', 'r') as file: @@ -174,8 +175,8 @@ def write_subreddit_pages(subreddit, subs, link_index, stat_sub_filtered_links, '###TITLE###': l['title'], '###URL###': link_url, '###URL_COMMENTS###': link_comments_url, - '###SCORE###': l['score'], - '###NUM_COMMENTS###': l['num_comments'] if int(l['num_comments']) > 0 else 0, + '###SCORE###': str(l['score']), + '###NUM_COMMENTS###': l['num_comments'] if int(l['num_comments']) > 0 else str(0), '###DATE###': datetime.utcfromtimestamp(int(l['created_utc'])).strftime('%Y-%m-%d'), '###LINK_DOMAIN###': '(self.' + l['subreddit'] + ')' if l['is_self'] is True or l['is_self'] == 'True' else '', '###HTML_AUTHOR_URL###': author_link_html, @@ -264,7 +265,7 @@ def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False) '###PARENT_ID###': c['parent_id'], '###DEPTH###': str(c['depth']), '###DATE###': created.strftime('%Y-%m-%d'), - '###SCORE###': c['score'], + '###SCORE###': str(c['score']) if len(str(c['score'])) > 0 else missing_comment_score_label, '###BODY###': snudown.markdown(c['body'].replace('>','>')), '###CSS_CLASSES###': css_classes, '###CLASS_SCORE###': 'badge-danger' if len(c['score']) > 0 and int(c['score']) < 1 else 'badge-secondary', @@ -302,8 +303,8 @@ def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False) '###ID###': link['id'], '###DATE###': created.strftime('%Y-%m-%d'), '###ARCHIVE_DATE###': datetime.utcfromtimestamp(int(link['retrieved_on'])).strftime('%Y-%m-%d') if link['retrieved_on'] != '' else 'n/a', - '###SCORE###': link['score'], - '###NUM_COMMENTS###': link['num_comments'], + '###SCORE###': str(link['score']), + '###NUM_COMMENTS###': str(link['num_comments']), '###URL_PROJECT###': url_project, '###URL_SUBS###': static_include_path + 'index.html', '###URL_SUB###': static_include_path + subreddit + '/index.html', @@ -428,8 +429,8 @@ def write_user_page(subs, user_index): '###TITLE###': l['title'], '###URL###': link_url, '###URL_COMMENTS###': link_comments_url, - '###SCORE###': l['score'], - '###NUM_COMMENTS###': l['num_comments'] if int(l['num_comments']) > 0 else 0, + '###SCORE###': str(l['score']), + '###NUM_COMMENTS###': str(l['num_comments']) if int(l['num_comments']) > 0 else str(0), '###DATE###': datetime.utcfromtimestamp(int(l['created_utc'])).strftime('%Y-%m-%d'), '###SUB###': l['subreddit'], '###SUB_URL###': '../' + l['subreddit'] + '/index.html',