mirror of
https://github.com/sys-nyx/red-arch.git
synced 2025-05-05 08:15:28 -04:00
fix subreddit name casing bugs, resolves #3
This commit is contained in:
parent
43a93e1a04
commit
83fb77b41c
3 changed files with 22 additions and 21 deletions
29
README.md
29
README.md
|
@ -16,6 +16,13 @@ requires python 3 on linux, OSX, or Windows
|
|||
cd reddit-html-archiver
|
||||
chmod u+x *.py
|
||||
|
||||
Windows users may need to run
|
||||
|
||||
chcp 65001
|
||||
set PYTHONIOENCODING=utf-8
|
||||
|
||||
before running `fetch_links.py` or `write_html.py` to resolve encoding errors such as 'codec can't encode character'.
|
||||
|
||||
### fetch reddit data
|
||||
|
||||
data is fetched by subreddit and date range and is stored as csv files in `data`.
|
||||
|
@ -44,15 +51,6 @@ to update an html archive, delete everything in `r` aside from `r/static` and re
|
|||
|
||||
copy the contents of the `r` directory to a web root or appropriately served git repo.
|
||||
|
||||
### Windows users
|
||||
|
||||
Windows users may need to run the following before running `fetch_links.py` or `write_html.py`:
|
||||
|
||||
chcp 65001
|
||||
set PYTHONIOENCODING=utf-8
|
||||
|
||||
This will resolve errors encoding errors such as 'codec can't encode character'.
|
||||
|
||||
### potential improvements
|
||||
|
||||
* fetch_links
|
||||
|
@ -62,15 +60,16 @@ This will resolve errors encoding errors such as 'codec can't encode character'.
|
|||
* score update
|
||||
* scores from reddit with [praw](https://github.com/praw-dev/praw)
|
||||
* real templating
|
||||
* filter output per sub, individual min score and comments filters
|
||||
* js markdown url previews
|
||||
* js powered search page, show no links by default
|
||||
* link and comments pages
|
||||
* view on reddit.com
|
||||
* show link domains
|
||||
* choose [Bootswatch](https://bootswatch.com/) theme
|
||||
* specify subreddits to output
|
||||
* show link domain/post type
|
||||
* user pages
|
||||
* add pagination, posts sorted by score, comments, date, sub
|
||||
* too many files in one directory
|
||||
* view on reddit.com
|
||||
* js powered search page, show no links by default
|
||||
* js inline media embeds/expandos
|
||||
* archive.org links
|
||||
|
||||
### see also
|
||||
|
||||
|
|
|
@ -214,4 +214,6 @@ if __name__ == '__main__':
|
|||
if args.self_only:
|
||||
self_only = True
|
||||
|
||||
args.subreddit = args.subreddit.lower()
|
||||
|
||||
fetch_links(args.subreddit, args.date_start, args.date_stop, args.limit, args.score, self_only)
|
||||
|
|
|
@ -163,7 +163,7 @@ def write_subreddit_pages(subreddit, subs, link_index, stat_sub_filtered_links,
|
|||
author_link_html = author_link_html.replace('###URL_AUTHOR###', author_url).replace('###AUTHOR###', l['author'])
|
||||
|
||||
link_url = l['url']
|
||||
link_comments_url = sort_based_prefix + l['permalink'].strip('/')
|
||||
link_comments_url = sort_based_prefix + l['permalink'].lower().strip('/')
|
||||
link_comments_url = link_comments_url.replace('r/' + subreddit + '/', '')
|
||||
idpath = '/'.join(list(l['id']))
|
||||
link_comments_url = link_comments_url.replace(l['id'], idpath)
|
||||
|
@ -232,7 +232,7 @@ def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False)
|
|||
# reddit: https://www.reddit.com/r/conspiracy/comments/8742iv/happening_now_classmate_former_friend_of/
|
||||
# archive: r/conspiracy/comments/8/7/4/2/i/v/happening_now_classmate_former_friend_of.html
|
||||
idpath = '/'.join(list(link['id']))
|
||||
filepath = link['permalink'].strip('/') + '.html'
|
||||
filepath = link['permalink'].lower().strip('/') + '.html'
|
||||
filepath = filepath.replace(link['id'], idpath)
|
||||
if os.path.isfile(filepath):
|
||||
return True
|
||||
|
@ -325,7 +325,7 @@ def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False)
|
|||
# reddit: https://www.reddit.com/r/conspiracy/comments/8742iv/happening_now_classmate_former_friend_of/
|
||||
# archive: r/conspiracy/comments/8/7/4/2/i/v/happening_now_classmate_former_friend_of.html
|
||||
idpath = '/'.join(list(link['id']))
|
||||
filepath = link['permalink'].strip('/') + '.html'
|
||||
filepath = link['permalink'].lower().strip('/') + '.html'
|
||||
filepath = filepath.replace(link['id'], idpath)
|
||||
if not os.path.isfile(filepath):
|
||||
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
||||
|
@ -350,7 +350,7 @@ def write_subreddit_search_page(subreddit, subs, link_index, stat_sub_filtered_l
|
|||
|
||||
links_html = ''
|
||||
for l in links:
|
||||
link_comments_url = l['permalink'].strip('/').replace('r/' + subreddit + '/', '')
|
||||
link_comments_url = l['permalink'].lower().strip('/').replace('r/' + subreddit + '/', '')
|
||||
idpath = '/'.join(list(l['id']))
|
||||
link_comments_url = link_comments_url.replace(l['id'], idpath)
|
||||
link_comments_url += '.html'
|
||||
|
@ -417,7 +417,7 @@ def write_user_page(subs, user_index):
|
|||
author_url = l['author'] + '.html'
|
||||
author_link_html = author_link_html.replace('###URL_AUTHOR###', author_url).replace('###AUTHOR###', l['author'])
|
||||
|
||||
link_comments_url = '../' + l['permalink'].strip('/').strip('r/')
|
||||
link_comments_url = '../' + l['permalink'].lower().strip('/').strip('r/')
|
||||
idpath = '/'.join(list(l['id']))
|
||||
link_comments_url = link_comments_url.replace(l['id'], idpath)
|
||||
link_comments_url += '.html'
|
||||
|
@ -618,7 +618,7 @@ def get_subs():
|
|||
return subs
|
||||
for d in os.listdir('data'):
|
||||
if os.path.isdir('data' + '/' + d):
|
||||
subs.append(d)
|
||||
subs.append(d.lower())
|
||||
return subs
|
||||
|
||||
def get_pager_html(page_num=1, pages=1):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue