mirror of
https://github.com/sys-nyx/red-arch.git
synced 2025-05-06 08:45:31 -04:00
comments without scores show as 'n/a'
This commit is contained in:
parent
d9d000055b
commit
67a7356ede
2 changed files with 20 additions and 16 deletions
19
README.md
19
README.md
|
@ -16,12 +16,12 @@ requires python 3
|
|||
cd reddit-html-archiver
|
||||
chmod u+x *.py
|
||||
|
||||
### fetch reddit data from pushshift
|
||||
### fetch reddit data
|
||||
|
||||
data is fetched by subreddit and date range.
|
||||
data is fetched by subreddit and date range and is stored as csv files in `data`.
|
||||
|
||||
./fetch_links.py politics 2017-1-1 2017-2-1
|
||||
# or add some link/post request parameters
|
||||
# or add some link/post request filters
|
||||
./fetch_links.py --self_only --score "> 2000" politics 2015-1-1 2016-1-1
|
||||
./fetch_links.py -h
|
||||
|
||||
|
@ -29,19 +29,20 @@ you may need decrease your date range or adjust `pushshift_rate_limit_per_minute
|
|||
|
||||
### write web pages
|
||||
|
||||
write html files for all subreddits.
|
||||
write html files for all subreddits to `r`.
|
||||
|
||||
./write_html.py
|
||||
# or add some output filtering
|
||||
./write_html.py --min-score 100 --min-comments 100 --hide-deleted-comments
|
||||
./write_html.py -h
|
||||
|
||||
your html archive has been written to `r`. once you are satisfied with your archive feel free to copy/move the contents of `r` to elsewhere and to delete the git repos you have created. everything in `r` is fully self contained.
|
||||
|
||||
if you add more data later, delete everything in `r` aside from `r/static` and re-run the script to refresh your archive's pages.
|
||||
to update an html archive, delete everything in `r` aside from `r/static` and re-run `write_html.py` to regenerate everything.
|
||||
|
||||
### hosting the archived pages
|
||||
|
||||
copy the contents of the `r` directory to a web root or appropriately served git repo. or serve it directly.
|
||||
copy the contents of the `r` directory to a web root or appropriately served git repo.
|
||||
|
||||
### potential improvements
|
||||
|
||||
|
@ -49,7 +50,9 @@ copy the contents of the `r` directory to a web root or appropriately served git
|
|||
* num_comments filtering
|
||||
* thumbnails or thumbnail urls
|
||||
* media posts
|
||||
* update scores from the reddit api with [praw](https://github.com/praw-dev/praw)
|
||||
* score update
|
||||
* scores from reddit with [praw](https://github.com/praw-dev/praw)
|
||||
* view on reddit.com
|
||||
* real templating
|
||||
* filter output per sub, individual min score and comments filters
|
||||
* js markdown url previews
|
||||
|
@ -60,7 +63,7 @@ copy the contents of the `r` directory to a web root or appropriately served git
|
|||
|
||||
### see also
|
||||
|
||||
* [pushshift](https://github.com/pushshift/api) [subreddit](https://www.reddit.com/r/pushshift/)
|
||||
* [pushshift](https://github.com/pushshift/api), [r/pushshift](https://www.reddit.com/r/pushshift/)
|
||||
* [psaw](https://github.com/dmarx/psaw)
|
||||
* [snudown](https://github.com/reddit/snudown)
|
||||
* [redditsearch.io](https://redditsearch.io/)
|
||||
|
|
|
@ -30,6 +30,7 @@ sort_indexes = {
|
|||
'slug': 'date',
|
||||
}
|
||||
}
|
||||
missing_comment_score_label = 'n/a'
|
||||
|
||||
template_index = ''
|
||||
with open('templates/index.html', 'r') as file:
|
||||
|
@ -174,8 +175,8 @@ def write_subreddit_pages(subreddit, subs, link_index, stat_sub_filtered_links,
|
|||
'###TITLE###': l['title'],
|
||||
'###URL###': link_url,
|
||||
'###URL_COMMENTS###': link_comments_url,
|
||||
'###SCORE###': l['score'],
|
||||
'###NUM_COMMENTS###': l['num_comments'] if int(l['num_comments']) > 0 else 0,
|
||||
'###SCORE###': str(l['score']),
|
||||
'###NUM_COMMENTS###': l['num_comments'] if int(l['num_comments']) > 0 else str(0),
|
||||
'###DATE###': datetime.utcfromtimestamp(int(l['created_utc'])).strftime('%Y-%m-%d'),
|
||||
'###LINK_DOMAIN###': '(self.' + l['subreddit'] + ')' if l['is_self'] is True or l['is_self'] == 'True' else '',
|
||||
'###HTML_AUTHOR_URL###': author_link_html,
|
||||
|
@ -264,7 +265,7 @@ def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False)
|
|||
'###PARENT_ID###': c['parent_id'],
|
||||
'###DEPTH###': str(c['depth']),
|
||||
'###DATE###': created.strftime('%Y-%m-%d'),
|
||||
'###SCORE###': c['score'],
|
||||
'###SCORE###': str(c['score']) if len(str(c['score'])) > 0 else missing_comment_score_label,
|
||||
'###BODY###': snudown.markdown(c['body'].replace('>','>')),
|
||||
'###CSS_CLASSES###': css_classes,
|
||||
'###CLASS_SCORE###': 'badge-danger' if len(c['score']) > 0 and int(c['score']) < 1 else 'badge-secondary',
|
||||
|
@ -302,8 +303,8 @@ def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False)
|
|||
'###ID###': link['id'],
|
||||
'###DATE###': created.strftime('%Y-%m-%d'),
|
||||
'###ARCHIVE_DATE###': datetime.utcfromtimestamp(int(link['retrieved_on'])).strftime('%Y-%m-%d') if link['retrieved_on'] != '' else 'n/a',
|
||||
'###SCORE###': link['score'],
|
||||
'###NUM_COMMENTS###': link['num_comments'],
|
||||
'###SCORE###': str(link['score']),
|
||||
'###NUM_COMMENTS###': str(link['num_comments']),
|
||||
'###URL_PROJECT###': url_project,
|
||||
'###URL_SUBS###': static_include_path + 'index.html',
|
||||
'###URL_SUB###': static_include_path + subreddit + '/index.html',
|
||||
|
@ -428,8 +429,8 @@ def write_user_page(subs, user_index):
|
|||
'###TITLE###': l['title'],
|
||||
'###URL###': link_url,
|
||||
'###URL_COMMENTS###': link_comments_url,
|
||||
'###SCORE###': l['score'],
|
||||
'###NUM_COMMENTS###': l['num_comments'] if int(l['num_comments']) > 0 else 0,
|
||||
'###SCORE###': str(l['score']),
|
||||
'###NUM_COMMENTS###': str(l['num_comments']) if int(l['num_comments']) > 0 else str(0),
|
||||
'###DATE###': datetime.utcfromtimestamp(int(l['created_utc'])).strftime('%Y-%m-%d'),
|
||||
'###SUB###': l['subreddit'],
|
||||
'###SUB_URL###': '../' + l['subreddit'] + '/index.html',
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue