comments without scores show as 'n/a'

2025-11-25 18:06:39 -05:00 · 2018-10-28 22:22:20 -07:00 · 2018-10-28 22:22:20 -07:00 · 67a7356ede
commit 67a7356ede
parent d9d000055b
2 changed files with 20 additions and 16 deletions
--- a/README.md
+++ b/README.md
@ -16,12 +16,12 @@ requires python 3
    cd reddit-html-archiver
    chmod u+x *.py
-### fetch reddit data from pushshift
+### fetch reddit data
-data is fetched by subreddit and date range.
+data is fetched by subreddit and date range and is stored as csv files in `data`.
    ./fetch_links.py politics 2017-1-1 2017-2-1
-    # or add some link/post request parameters
+    # or add some link/post request filters
    ./fetch_links.py --self_only --score "> 2000" politics 2015-1-1 2016-1-1
    ./fetch_links.py -h
@ -29,19 +29,20 @@ you may need decrease your date range or adjust `pushshift_rate_limit_per_minute
 ### write web pages
-write html files for all subreddits.
+write html files for all subreddits to `r`.
    ./write_html.py
    # or add some output filtering
    ./write_html.py --min-score 100 --min-comments 100 --hide-deleted-comments
    ./write_html.py -h
 your html archive has been written to `r`. once you are satisfied with your archive feel free to copy/move the contents of `r` to elsewhere and to delete the git repos you have created. everything in `r` is fully self contained.
-if you add more data later, delete everything in `r` aside from `r/static` and re-run the script to refresh your archive's pages.
+to update an html archive, delete everything in `r` aside from `r/static` and re-run `write_html.py` to regenerate everything.
 ### hosting the archived pages
-copy the contents of the `r` directory to a web root or appropriately served git repo. or serve it directly.
+copy the contents of the `r` directory to a web root or appropriately served git repo.
 ### potential improvements
@ -49,7 +50,9 @@ copy the contents of the `r` directory to a web root or appropriately served git
  * num_comments filtering
  * thumbnails or thumbnail urls
  * media posts
-  * update scores from the reddit api with [praw](https://github.com/praw-dev/praw)
+  * score update
  * scores from reddit with [praw](https://github.com/praw-dev/praw)
 * view on reddit.com
 * real templating
 * filter output per sub, individual min score and comments filters
 * js markdown url previews
@ -60,7 +63,7 @@ copy the contents of the `r` directory to a web root or appropriately served git
 ### see also
-* [pushshift](https://github.com/pushshift/api) [subreddit](https://www.reddit.com/r/pushshift/)
+* [pushshift](https://github.com/pushshift/api), [r/pushshift](https://www.reddit.com/r/pushshift/)
 * [psaw](https://github.com/dmarx/psaw)
 * [snudown](https://github.com/reddit/snudown)
 * [redditsearch.io](https://redditsearch.io/)
--- a/write_html.py
+++ b/write_html.py
@ -30,6 +30,7 @@ sort_indexes = {
        'slug': 'date',
    }
 }
 missing_comment_score_label = 'n/a'
 template_index = ''
 with open('templates/index.html', 'r') as file:
@ -174,8 +175,8 @@ def write_subreddit_pages(subreddit, subs, link_index, stat_sub_filtered_links,
                    '###TITLE###':              l['title'],
                    '###URL###':                link_url,
                    '###URL_COMMENTS###':       link_comments_url,
-                    '###SCORE###':              l['score'],
+                    '###SCORE###':              str(l['score']),
-                    '###NUM_COMMENTS###':       l['num_comments'] if int(l['num_comments']) > 0 else 0,
+                    '###NUM_COMMENTS###':       l['num_comments'] if int(l['num_comments']) > 0 else str(0),
                    '###DATE###':               datetime.utcfromtimestamp(int(l['created_utc'])).strftime('%Y-%m-%d'),
                    '###LINK_DOMAIN###':        '(self.' + l['subreddit'] + ')' if l['is_self'] is True or l['is_self'] == 'True' else '',
                    '###HTML_AUTHOR_URL###':    author_link_html,
@ -264,7 +265,7 @@ def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False)
            '###PARENT_ID###':          c['parent_id'],
            '###DEPTH###':              str(c['depth']),
            '###DATE###':               created.strftime('%Y-%m-%d'),
-            '###SCORE###':              c['score'],
+            '###SCORE###':              str(c['score']) if len(str(c['score'])) > 0 else missing_comment_score_label,
            '###BODY###':               snudown.markdown(c['body'].replace('&gt;','>')),
            '###CSS_CLASSES###':        css_classes,
            '###CLASS_SCORE###':        'badge-danger' if len(c['score']) > 0 and int(c['score']) < 1 else 'badge-secondary',
@ -302,8 +303,8 @@ def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False)
        '###ID###':                 link['id'],
        '###DATE###':               created.strftime('%Y-%m-%d'),
        '###ARCHIVE_DATE###':       datetime.utcfromtimestamp(int(link['retrieved_on'])).strftime('%Y-%m-%d') if link['retrieved_on'] != '' else 'n/a',
-        '###SCORE###':              link['score'],
+        '###SCORE###':              str(link['score']),
-        '###NUM_COMMENTS###':       link['num_comments'],
+        '###NUM_COMMENTS###':       str(link['num_comments']),
        '###URL_PROJECT###':        url_project,
        '###URL_SUBS###':           static_include_path + 'index.html',
        '###URL_SUB###':            static_include_path + subreddit + '/index.html',
@ -428,8 +429,8 @@ def write_user_page(subs, user_index):
                '###TITLE###':              l['title'],
                '###URL###':                link_url,
                '###URL_COMMENTS###':       link_comments_url,
-                '###SCORE###':              l['score'],
+                '###SCORE###':              str(l['score']),
-                '###NUM_COMMENTS###':       l['num_comments'] if int(l['num_comments']) > 0 else 0,
+                '###NUM_COMMENTS###':       str(l['num_comments']) if int(l['num_comments']) > 0 else str(0),
                '###DATE###':               datetime.utcfromtimestamp(int(l['created_utc'])).strftime('%Y-%m-%d'),
                '###SUB###':                l['subreddit'],
                '###SUB_URL###':            '../' + l['subreddit'] + '/index.html',