mirror of
https://github.com/sys-nyx/red-arch.git
synced 2025-05-06 08:45:31 -04:00
styled search, had to modify several templates to ponit to it correctly
This commit is contained in:
parent
2d105f25c4
commit
8003b64c9f
11 changed files with 496 additions and 47 deletions
17
config.toml
17
config.toml
|
@ -1,12 +1,13 @@
|
|||
|
||||
#subreddit name
|
||||
[funny]
|
||||
#label for organizational purposes
|
||||
[transdiy]
|
||||
#path to pushshift comments file
|
||||
comments = funny_comments.zst
|
||||
comments = funnymemes_comments.zst
|
||||
#path to pushshift submissions file
|
||||
posts = funny_submissions.zst
|
||||
posts = funnymemes_submissions.zst
|
||||
|
||||
#Add other subs if you like
|
||||
#[funnymemes]
|
||||
#comments = funnymemes_comments.zst
|
||||
#posts = funnymemes_submissions.zst
|
||||
|
||||
# Add as many other sections as you want.
|
||||
[funnymemescontinued]
|
||||
comments = funnymemes_comments.zst
|
||||
posts = funnymemes_submissions.zst
|
||||
|
|
293
filter_file.py
Normal file
293
filter_file.py
Normal file
|
@ -0,0 +1,293 @@
|
|||
import os
|
||||
import sys
|
||||
import csv
|
||||
import json
|
||||
import argparse
|
||||
import zstandard
|
||||
import logging.handlers
|
||||
from datetime import datetime
|
||||
|
||||
# put the path to the input file, or a folder of files to process all of
|
||||
input_file = r"\\MYCLOUDPR4100\Public\reddit\subreddits23\wallstreetbets_submissions.zst"
|
||||
# put the name or path to the output file. The file extension from below will be added automatically. If the input file is a folder, the output will be treated as a folder as well
|
||||
output_file = r"\\MYCLOUDPR4100\Public\output"
|
||||
# the format to output in, pick from the following options
|
||||
# zst: same as the input, a zstandard compressed ndjson file. Can be read by the other scripts in the repo
|
||||
# txt: an ndjson file, which is a text file with a separate json object on each line. Can be opened by any text editor
|
||||
# csv: a comma separated value file. Can be opened by a text editor or excel
|
||||
# WARNING READ THIS: if you use txt or csv output on a large input file without filtering out most of the rows, the resulting file will be extremely large. Usually about 7 times as large as the compressed input file
|
||||
output_format = "csv"
|
||||
# override the above format and output only this field into a text file, one per line. Useful if you want to make a list of authors or ids. See the examples below
|
||||
# any field that's in the dump is supported, but useful ones are
|
||||
# author: the username of the author
|
||||
# id: the id of the submission or comment
|
||||
# link_id: only for comments, the fullname of the submission the comment is associated with
|
||||
# parent_id: only for comments, the fullname of the parent of the comment. Either another comment or the submission if it's top level
|
||||
single_field = None
|
||||
# the fields in the file are different depending on whether it has comments or submissions. If we're writing a csv, we need to know which fields to write.
|
||||
# set this to true to write out to the log every time there's a bad line, set to false if you're expecting only some of the lines to match the key
|
||||
write_bad_lines = True
|
||||
|
||||
# only output items between these two dates
|
||||
from_date = datetime.strptime("2005-01-01", "%Y-%m-%d")
|
||||
to_date = datetime.strptime("2030-12-31", "%Y-%m-%d")
|
||||
|
||||
# the field to filter on, the values to filter with and whether it should be an exact match
|
||||
# some examples:
|
||||
#
|
||||
# return only objects where the author is u/watchful1 or u/spez
|
||||
# field = "author"
|
||||
# values = ["watchful1","spez"]
|
||||
# exact_match = True
|
||||
#
|
||||
# return only objects where the title contains either "stonk" or "moon"
|
||||
# field = "title"
|
||||
# values = ["stonk","moon"]
|
||||
# exact_match = False
|
||||
#
|
||||
# return only objects where the body contains either "stonk" or "moon". For submissions the body is in the "selftext" field, for comments it's in the "body" field
|
||||
# field = "selftext"
|
||||
# values = ["stonk","moon"]
|
||||
# exact_match = False
|
||||
#
|
||||
#
|
||||
# filter a submission file and then get a file with all the comments only in those submissions. This is a multi step process
|
||||
# add your submission filters and set the output file name to something unique
|
||||
# input_file = "redditdev_submissions.zst"
|
||||
# output_file = "filtered_submissions"
|
||||
# output_format = "csv"
|
||||
# field = "author"
|
||||
# values = ["watchful1"]
|
||||
#
|
||||
# run the script, this will result in a file called "filtered_submissions.csv" that contains only submissions by u/watchful1
|
||||
# now we'll run the script again with the same input and filters, but set the output to single field. Be sure to change the output file to a new name, but don't change any of the other inputs
|
||||
# output_file = "submission_ids"
|
||||
# single_field = "id"
|
||||
#
|
||||
# run the script again, this will result in a file called "submission_ids.txt" that has an id on each line
|
||||
# now we'll remove all the other filters and update the script to input from the comments file, and use the submission ids list we created before. And change the output name again so we don't override anything
|
||||
# input_file = "redditdev_comments.zst"
|
||||
# output_file = "filtered_comments"
|
||||
# single_field = None # resetting this back so it's not used
|
||||
# field = "link_id" # in the comment object, this is the field that contains the submission id
|
||||
# values_file = "submission_ids.txt"
|
||||
# exact_match = False # the link_id field has a prefix on it, so we can't do an exact match
|
||||
#
|
||||
# run the script one last time and now you have a file called "filtered_comments.csv" that only has comments from your submissions above
|
||||
# if you want only top level comments instead of all comments, you can set field to "parent_id" instead of "link_id"
|
||||
|
||||
# change this to field = None if you don't want to filter by anything
|
||||
field = "body"
|
||||
values = ['']
|
||||
# if you have a long list of values, you can put them in a file and put the filename here. If set this overrides the value list above
|
||||
# if this list is very large, it could greatly slow down the process
|
||||
values_file = None
|
||||
exact_match = False
|
||||
|
||||
|
||||
# sets up logging to the console as well as a file
|
||||
log = logging.getLogger("bot")
|
||||
log.setLevel(logging.INFO)
|
||||
log_formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s')
|
||||
log_str_handler = logging.StreamHandler()
|
||||
log_str_handler.setFormatter(log_formatter)
|
||||
log.addHandler(log_str_handler)
|
||||
if not os.path.exists("logs"):
|
||||
os.makedirs("logs")
|
||||
log_file_handler = logging.handlers.RotatingFileHandler(os.path.join("logs", "bot.log"), maxBytes=1024*1024*16, backupCount=5)
|
||||
log_file_handler.setFormatter(log_formatter)
|
||||
log.addHandler(log_file_handler)
|
||||
|
||||
|
||||
def write_line_zst(handle, line):
|
||||
handle.write(line.encode('utf-8'))
|
||||
handle.write("\n".encode('utf-8'))
|
||||
|
||||
|
||||
def write_line_json(handle, obj):
|
||||
handle.write(json.dumps(obj))
|
||||
handle.write("\n")
|
||||
|
||||
|
||||
def write_line_single(handle, obj, field):
|
||||
if field in obj:
|
||||
handle.write(obj[field])
|
||||
else:
|
||||
log.info(f"{field} not in object {obj['id']}")
|
||||
handle.write("\n")
|
||||
|
||||
|
||||
def write_line_csv(writer, obj, is_submission):
|
||||
output_list = []
|
||||
output_list.append(str(obj['score']))
|
||||
output_list.append(datetime.fromtimestamp(int(obj['created_utc'])).strftime("%Y-%m-%d"))
|
||||
if is_submission:
|
||||
output_list.append(obj['title'])
|
||||
output_list.append(f"u/{obj['author']}")
|
||||
if 'permalink' in obj:
|
||||
output_list.append(f"https://www.reddit.com{obj['permalink']}")
|
||||
else:
|
||||
output_list.append(f"https://www.reddit.com/r/{obj['subreddit']}/comments/{obj['link_id'][3:]}/_/{obj['id']}")
|
||||
if is_submission:
|
||||
if obj['is_self']:
|
||||
if 'selftext' in obj:
|
||||
output_list.append(obj['selftext'])
|
||||
else:
|
||||
output_list.append("")
|
||||
else:
|
||||
output_list.append(obj['url'])
|
||||
else:
|
||||
output_list.append(obj['body'])
|
||||
writer.writerow(output_list)
|
||||
|
||||
|
||||
def read_and_decode(reader, chunk_size, max_window_size, previous_chunk=None, bytes_read=0):
|
||||
chunk = reader.read(chunk_size)
|
||||
bytes_read += chunk_size
|
||||
if previous_chunk is not None:
|
||||
chunk = previous_chunk + chunk
|
||||
try:
|
||||
return chunk.decode()
|
||||
except UnicodeDecodeError:
|
||||
if bytes_read > max_window_size:
|
||||
raise UnicodeError(f"Unable to decode frame after reading {bytes_read:,} bytes")
|
||||
log.info(f"Decoding error with {bytes_read:,} bytes, reading another chunk")
|
||||
return read_and_decode(reader, chunk_size, max_window_size, chunk, bytes_read)
|
||||
|
||||
|
||||
def read_lines_zst(file_name):
|
||||
with open(file_name, 'rb') as file_handle:
|
||||
buffer = ''
|
||||
reader = zstandard.ZstdDecompressor(max_window_size=2**31).stream_reader(file_handle)
|
||||
while True:
|
||||
chunk = read_and_decode(reader, 2**27, (2**29) * 2)
|
||||
|
||||
if not chunk:
|
||||
break
|
||||
lines = (buffer + chunk).split("\n")
|
||||
|
||||
for line in lines[:-1]:
|
||||
yield line.strip(), file_handle.tell()
|
||||
|
||||
buffer = lines[-1]
|
||||
|
||||
reader.close()
|
||||
|
||||
|
||||
def process_file(input_file, output_file, output_format, field, values, from_date, to_date, single_field, exact_match):
|
||||
output_path = f"{output_file}.{output_format}"
|
||||
is_submission = "submission" in input_file
|
||||
log.info(f"Input: {input_file} : Output: {output_path} : Is submission {is_submission}")
|
||||
writer = None
|
||||
if output_format == "zst":
|
||||
handle = zstandard.ZstdCompressor().stream_writer(open(output_path, 'wb'))
|
||||
elif output_format == "txt":
|
||||
handle = open(output_path, 'w', encoding='UTF-8')
|
||||
elif output_format == "csv":
|
||||
handle = open(output_path, 'w', encoding='UTF-8', newline='')
|
||||
writer = csv.writer(handle)
|
||||
else:
|
||||
log.error(f"Unsupported output format {output_format}")
|
||||
sys.exit()
|
||||
|
||||
file_size = os.stat(input_file).st_size
|
||||
created = None
|
||||
matched_lines = 0
|
||||
bad_lines = 0
|
||||
total_lines = 0
|
||||
for line, file_bytes_processed in read_lines_zst(input_file):
|
||||
total_lines += 1
|
||||
if total_lines % 100000 == 0:
|
||||
log.info(f"{created.strftime('%Y-%m-%d %H:%M:%S')} : {total_lines:,} : {matched_lines:,} : {bad_lines:,} : {file_bytes_processed:,}:{(file_bytes_processed / file_size) * 100:.0f}%")
|
||||
|
||||
try:
|
||||
obj = json.loads(line)
|
||||
created = datetime.utcfromtimestamp(int(obj['created_utc']))
|
||||
|
||||
if created < from_date:
|
||||
continue
|
||||
if created > to_date:
|
||||
continue
|
||||
|
||||
if field is not None:
|
||||
field_value = obj[field].lower()
|
||||
matched = False
|
||||
for value in values:
|
||||
if exact_match:
|
||||
if value == field_value:
|
||||
matched = True
|
||||
break
|
||||
else:
|
||||
if value in field_value:
|
||||
matched = True
|
||||
break
|
||||
if not matched:
|
||||
continue
|
||||
|
||||
matched_lines += 1
|
||||
if output_format == "zst":
|
||||
write_line_zst(handle, line)
|
||||
elif output_format == "csv":
|
||||
write_line_csv(writer, obj, is_submission)
|
||||
elif output_format == "txt":
|
||||
if single_field is not None:
|
||||
write_line_single(handle, obj, single_field)
|
||||
else:
|
||||
write_line_json(handle, obj)
|
||||
else:
|
||||
log.info(f"Something went wrong, invalid output format {output_format}")
|
||||
except (KeyError, json.JSONDecodeError) as err:
|
||||
bad_lines += 1
|
||||
if write_bad_lines:
|
||||
if isinstance(err, KeyError):
|
||||
log.warning(f"Key {field} is not in the object: {err}")
|
||||
elif isinstance(err, json.JSONDecodeError):
|
||||
log.warning(f"Line decoding failed: {err}")
|
||||
log.warning(line)
|
||||
|
||||
handle.close()
|
||||
log.info(f"Complete : {total_lines:,} : {matched_lines:,} : {bad_lines:,}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if single_field is not None:
|
||||
log.info("Single field output mode, changing output file format to txt")
|
||||
output_format = "txt"
|
||||
|
||||
if values_file is not None:
|
||||
values = []
|
||||
with open(values_file, 'r') as values_handle:
|
||||
for value in values_handle:
|
||||
values.append(value.strip().lower())
|
||||
log.info(f"Loaded {len(values)} from values file {values_file}")
|
||||
else:
|
||||
values = [value.lower() for value in values] # convert to lowercase
|
||||
|
||||
log.info(f"Filtering field: {field}")
|
||||
if len(values) <= 20:
|
||||
log.info(f"On values: {','.join(values)}")
|
||||
else:
|
||||
log.info(f"On values:")
|
||||
for value in values:
|
||||
log.info(value)
|
||||
log.info(f"Exact match {('on' if exact_match else 'off')}. Single field {single_field}.")
|
||||
log.info(f"From date {from_date.strftime('%Y-%m-%d')} to date {to_date.strftime('%Y-%m-%d')}")
|
||||
log.info(f"Output format set to {output_format}")
|
||||
|
||||
input_files = []
|
||||
if os.path.isdir(input_file):
|
||||
if not os.path.exists(output_file):
|
||||
os.makedirs(output_file)
|
||||
for file in os.listdir(input_file):
|
||||
if not os.path.isdir(file) and file.endswith(".zst"):
|
||||
input_name = os.path.splitext(os.path.splitext(os.path.basename(file))[0])[0]
|
||||
input_files.append((os.path.join(input_file, file), os.path.join(output_file, input_name)))
|
||||
else:
|
||||
input_files.append((input_file, output_file))
|
||||
log.info(f"Processing {len(input_files)} files")
|
||||
for file_in, file_out in input_files:
|
||||
try:
|
||||
process_file(file_in, file_out, output_format, field, values, from_date, to_date, single_field, exact_match)
|
||||
except Exception as err:
|
||||
log.warning(f"Error processing {file_in}: {err}")
|
||||
log.warning(traceback.format_exc())
|
113
r/search.html
Normal file
113
r/search.html
Normal file
|
@ -0,0 +1,113 @@
|
|||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
|
||||
<link rel="stylesheet" href="../static/css/lato.css">
|
||||
<link rel="stylesheet" href="../static/css/bootstrap-superhero.min.css">
|
||||
<link rel="stylesheet" href="../static/css/archive.css">
|
||||
<link rel="stylesheet" href="../static/css/main.css">
|
||||
<script>
|
||||
const searchWorker = new Worker("../static/js/search.js");
|
||||
function displaySearch(results) {
|
||||
let links = document.querySelector('.links')
|
||||
links.innerHTML = ''
|
||||
results.forEach(r => {
|
||||
|
||||
d = document.createElement('div')
|
||||
d.classList.add('link')
|
||||
d.classList.add('mt-3')
|
||||
|
||||
t = document.createElement('h5')
|
||||
t.classList.add('title')
|
||||
t.classList.add('mb-0')
|
||||
a = document.createElement('a')
|
||||
a.innerText = r.meta.title
|
||||
a.href = `${r.meta.subreddit}/${r.meta.path}`
|
||||
a.class = 'link'
|
||||
t.appendChild(a)
|
||||
|
||||
p = document.createElement('p')
|
||||
p.innerText = `"${r.meta.body_short.replace(/[\n\r\t]/g, " ")}..."`
|
||||
p.classList.add('text-muted')
|
||||
p.style.marginBottom = 0
|
||||
|
||||
s = document.createElement('small')
|
||||
s.classList.add('text-muted')
|
||||
|
||||
score = document.createElement('span')
|
||||
score.innerText = r.meta.score
|
||||
score.classList.add('badge')
|
||||
score.classList.add('badge-secondary')
|
||||
|
||||
c = document.createElement('a')
|
||||
c.href = `${r.meta.subreddit}/${r.meta.path}`
|
||||
c.innerText = ` ${r.meta.replies} Comments`
|
||||
s.appendChild(c)
|
||||
|
||||
post_meta = document.createElement('span')
|
||||
post_meta.classList.add('text-muted')
|
||||
post_meta.innerHTML = ` ${r.meta.date} <a href="user/${r.meta.author}.html">${r.meta.author}</a>`
|
||||
|
||||
s.appendChild(post_meta)
|
||||
d.appendChild(t)
|
||||
d.appendChild(p)
|
||||
d.appendChild(score)
|
||||
d.appendChild(s)
|
||||
links.appendChild(d)
|
||||
});
|
||||
}
|
||||
searchWorker.onmessage = (e) => {
|
||||
displaySearch(e.data)
|
||||
}
|
||||
</script>
|
||||
<title>search</title>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<nav class="navbar navbar-expand-sm navbar-dark bg-primary">
|
||||
<a class="navbar-brand" href="index.html">Search</a>
|
||||
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation">
|
||||
<span class="navbar-toggler-icon"></span>
|
||||
</button>
|
||||
<div class="collapse navbar-collapse" id="navbarNav">
|
||||
<ul class="navbar-nav">
|
||||
|
||||
<li class="nav-item dropdown">
|
||||
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">subreddits</a>
|
||||
<div class="dropdown-menu" aria-labelledby="navbarDropdown">
|
||||
<a class="dropdown-item" href="../index.html">All</a>
|
||||
<a class="dropdown-item" href="../transdiy/index.html">transdiy</a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
</header>
|
||||
<main role="main" class="container-fluid">
|
||||
<form class="search">
|
||||
<input type="text" name="query" id="query">
|
||||
<input type="button" class="btn btn-info" value="Search" onclick="searchWorker.postMessage(query.value)"></form>
|
||||
|
||||
</form>
|
||||
<div class="links search mt-3">
|
||||
|
||||
</div>
|
||||
</main>
|
||||
<footer class="container-fluid">
|
||||
<p class="small mb-0">r/transdiy archive has 48616 posts and 318879 comments. <a href="https://github.com/libertysoft3/reddit-html-archiver">source code</a>.</p>
|
||||
</footer>
|
||||
<script src="../static/js/jquery-3.3.1.slim.min.js"></script>
|
||||
<script src="../static/js/bootstrap.min.js"></script>
|
||||
<script>
|
||||
$(document).ready(function() {
|
||||
$(window).keydown(function(event){
|
||||
if(event.keyCode == 13) {
|
||||
event.preventDefault();
|
||||
return false;
|
||||
}
|
||||
});
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,15 @@
|
|||
form {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
margin: 2em;
|
||||
}
|
||||
|
||||
.search > #query {
|
||||
max-width: 500px;
|
||||
margin: 2em;
|
||||
}
|
||||
|
||||
.search > .btn {
|
||||
max-width: 100px;
|
||||
}
|
|
@ -5,6 +5,7 @@ import json
|
|||
import argparse
|
||||
import configparser
|
||||
from lunr import lunr
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse
|
||||
from write_html import generate_html
|
||||
from watchful import return_redd_objects
|
||||
|
@ -52,7 +53,9 @@ def get_thread_meta(thread: dict) -> dict:
|
|||
'score': thread['score'],
|
||||
'replies': str(len(thread['comments'])),
|
||||
'body_short': thread['selftext'][:200],
|
||||
'date': thread['created_utc'],
|
||||
'date': datetime.utcfromtimestamp(int(thread['created_utc'])).strftime('%Y-%m-%d'),
|
||||
'author': thread['author'],
|
||||
'subreddit': thread['subreddit']
|
||||
}
|
||||
|
||||
|
||||
|
@ -63,7 +66,8 @@ def get_comment_meta(comment: dict) -> dict:
|
|||
'title': comment['title'],
|
||||
'score': comment['score'],
|
||||
'body_short': comment['selftext'][:200],
|
||||
'date': comment['created_utc'],
|
||||
'date': datetime.utcfromtimestamp(int(comment['created_utc'])).strftime('%Y-%m-%d'),
|
||||
'author': comment['author']
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -24,6 +24,9 @@
|
|||
###HTML_SUBS_MENU###
|
||||
</div>
|
||||
</li>
|
||||
<li class="nav-item ">
|
||||
<a class="nav-link" href="/search.html">search</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
|
|
|
@ -26,8 +26,8 @@
|
|||
<li class="nav-item">
|
||||
<a class="nav-link" href="###URL_SUB_DATE###">date</a>
|
||||
</li>
|
||||
<li class="nav-item ###URL_SEARCH_CSS###">
|
||||
<a class="nav-link" href="###URL_SEARCH###">search</a>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="/search.html">search</a>
|
||||
</li>
|
||||
<li class="nav-item dropdown">
|
||||
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">subreddits</a>
|
||||
|
|
|
@ -3,9 +3,10 @@
|
|||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
|
||||
<link rel="stylesheet" href="###INCLUDE_PATH###static/css/lato.css">
|
||||
<link rel="stylesheet" href="###INCLUDE_PATH###static/css/bootstrap-superhero.min.css">
|
||||
<link rel="stylesheet" href="###INCLUDE_PATH###static/css/archive.css">
|
||||
<link rel="stylesheet" href="../static/css/lato.css">
|
||||
<link rel="stylesheet" href="../static/css/bootstrap-superhero.min.css">
|
||||
<link rel="stylesheet" href="../static/css/archive.css">
|
||||
<link rel="stylesheet" href="../static/css/main.css">
|
||||
<script>
|
||||
const searchWorker = new Worker("../static/js/search.js");
|
||||
function displaySearch(results) {
|
||||
|
@ -14,17 +15,45 @@
|
|||
results.forEach(r => {
|
||||
|
||||
d = document.createElement('div')
|
||||
d.classList.add('search-result')
|
||||
t = document.createElement('a')
|
||||
t.innerText = r.meta.title
|
||||
t.href = r.meta.path
|
||||
t.class = 'link'
|
||||
d.classList.add('link')
|
||||
d.classList.add('mt-3')
|
||||
|
||||
t = document.createElement('h5')
|
||||
t.classList.add('title')
|
||||
t.classList.add('mb-0')
|
||||
a = document.createElement('a')
|
||||
a.innerText = r.meta.title
|
||||
a.href = `${r.meta.subreddit}/${r.meta.path}`
|
||||
a.class = 'link'
|
||||
t.appendChild(a)
|
||||
|
||||
p = document.createElement('p')
|
||||
p.innerText = r.meta.body_short.replace(/[\n\r\t]/g, " ")
|
||||
p.innerText = `"${r.meta.body_short.replace(/[\n\r\t]/g, " ")}..."`
|
||||
p.classList.add('text-muted')
|
||||
p.style.marginBottom = 0
|
||||
|
||||
s = document.createElement('small')
|
||||
s.classList.add('text-muted')
|
||||
|
||||
score = document.createElement('span')
|
||||
score.innerText = r.meta.score
|
||||
score.classList.add('badge')
|
||||
score.classList.add('badge-secondary')
|
||||
|
||||
c = document.createElement('a')
|
||||
c.href = `${r.meta.subreddit}/${r.meta.path}`
|
||||
c.innerText = ` ${r.meta.replies} Comments`
|
||||
s.appendChild(c)
|
||||
|
||||
post_meta = document.createElement('span')
|
||||
post_meta.classList.add('text-muted')
|
||||
post_meta.innerHTML = ` ${r.meta.date} <a href="user/${r.meta.author}.html">${r.meta.author}</a>`
|
||||
|
||||
s.appendChild(post_meta)
|
||||
d.appendChild(t)
|
||||
d.appendChild(p)
|
||||
d.appendChild(score)
|
||||
d.appendChild(s)
|
||||
links.appendChild(d)
|
||||
});
|
||||
}
|
||||
|
@ -32,34 +61,23 @@
|
|||
displaySearch(e.data)
|
||||
}
|
||||
</script>
|
||||
<title>r/###SUB### ###TITLE###</title>
|
||||
<title>search</title>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<nav class="navbar navbar-expand-sm navbar-dark bg-primary">
|
||||
<a class="navbar-brand" href="###URL_IDX_SCORE###">r/###SUB###</a>
|
||||
<a class="navbar-brand" href="index.html">Search</a>
|
||||
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation">
|
||||
<span class="navbar-toggler-icon"></span>
|
||||
</button>
|
||||
<div class="collapse navbar-collapse" id="navbarNav">
|
||||
<ul class="navbar-nav">
|
||||
<li class="nav-item ###URL_IDX_SCORE_CSS###">
|
||||
<a class="nav-link" href="###URL_IDX_SCORE###">score</a>
|
||||
</li>
|
||||
<li class="nav-item ###URL_IDX_CMNT_CSS###">
|
||||
<a class="nav-link" href="###URL_IDX_CMNT###">comments</a>
|
||||
</li>
|
||||
<li class="nav-item ###URL_IDX_DATE_CSS###">
|
||||
<a class="nav-link" href="###URL_IDX_DATE###">date</a>
|
||||
</li>
|
||||
<li class="nav-item ###URL_SEARCH_CSS###">
|
||||
<a class="nav-link" href="###URL_SEARCH###">search</a>
|
||||
</li>
|
||||
|
||||
<li class="nav-item dropdown">
|
||||
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">subreddits</a>
|
||||
<div class="dropdown-menu" aria-labelledby="navbarDropdown">
|
||||
<a class="dropdown-item" href="###URL_SUBS###">All</a>
|
||||
###HTML_SUBS_MENU###
|
||||
<a class="dropdown-item" href="../index.html">All</a>
|
||||
<a class="dropdown-item" href="../transdiy/index.html">transdiy</a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
|
@ -77,10 +95,10 @@
|
|||
</div>
|
||||
</main>
|
||||
<footer class="container-fluid">
|
||||
<p class="small mb-0">r/###SUB### archive has ###ARCH_NUM_POSTS### posts and ###ARCH_NUM_COMMENTS### comments. <a href="###URL_PROJECT###">source code</a>.</p>
|
||||
<p class="small mb-0">r/transdiy archive has 48616 posts and 318879 comments. <a href="https://github.com/libertysoft3/reddit-html-archiver">source code</a>.</p>
|
||||
</footer>
|
||||
<script src="###INCLUDE_PATH###static/js/jquery-3.3.1.slim.min.js"></script>
|
||||
<script src="###INCLUDE_PATH###static/js/bootstrap.min.js"></script>
|
||||
<script src="../static/js/jquery-3.3.1.slim.min.js"></script>
|
||||
<script src="../static/js/bootstrap.min.js"></script>
|
||||
<script>
|
||||
$(document).ready(function() {
|
||||
$(window).keydown(function(event){
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
<a class="nav-link" href="###URL_IDX_DATE###">date</a>
|
||||
</li>
|
||||
<li class="nav-item ###URL_SEARCH_CSS###">
|
||||
<a class="nav-link" href="###URL_SEARCH###">search</a>
|
||||
<a class="nav-link" href="../search.html">search</a>
|
||||
</li>
|
||||
<li class="nav-item dropdown">
|
||||
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">subreddits</a>
|
||||
|
|
|
@ -24,6 +24,9 @@
|
|||
###HTML_SUBS_MENU###
|
||||
</div>
|
||||
</li>
|
||||
<li class="nav-item ###URL_SEARCH_CSS###">
|
||||
<a class="nav-link" href="search.html">search</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
|
|
|
@ -7,12 +7,11 @@ import re
|
|||
import psutil
|
||||
|
||||
url_project = 'https://github.com/libertysoft3/reddit-html-archiver'
|
||||
links_per_page = 30
|
||||
links_per_page = 100
|
||||
pager_skip = 10
|
||||
pager_skip_long = 100
|
||||
start_date = date(2005, 1, 1)
|
||||
end_date = datetime.today().date() + timedelta(days=1)
|
||||
source_data_links = 'links.csv'
|
||||
max_comment_depth = 8 # mostly for mobile, which might be silly
|
||||
removed_content_identifiers = ['[deleted]','deleted','[removed]','removed']
|
||||
default_sort = 'score'
|
||||
|
@ -102,7 +101,7 @@ def generate_html(subreddits, min_score=0, min_comments=0, hide_deleted_comments
|
|||
stat_filtered_links = 0
|
||||
subs = [s.lower() for s in subreddits.keys()]
|
||||
for sub in subs:
|
||||
print("Building current sub: ", sub)
|
||||
print("Building sub: ", sub)
|
||||
|
||||
threads = subreddits[sub]
|
||||
print("Total threads to convert: ", len(threads))
|
||||
|
@ -218,7 +217,7 @@ def write_subreddit_pages(subreddit, subs, link_index, stat_sub_filtered_links,
|
|||
'###URL_IDX_SCORE###': sort_based_prefix + 'index.html',
|
||||
'###URL_IDX_CMNT###': sort_based_prefix + 'index-' + sort_indexes['num_comments']['slug'] + '/index.html',
|
||||
'###URL_IDX_DATE###': sort_based_prefix + 'index-' + sort_indexes['created_utc']['slug'] + '/index.html',
|
||||
'###URL_SEARCH###': sort_based_prefix + 'search.html',
|
||||
'###URL_SEARCH###': '/search.html',
|
||||
'###URL_IDX_SCORE_CSS###': 'active' if sort == 'score' else '',
|
||||
'###URL_IDX_CMNT_CSS###': 'active' if sort == 'num_comments' else '',
|
||||
'###URL_IDX_DATE_CSS###': 'active' if sort == 'created_utc' else '',
|
||||
|
@ -332,7 +331,7 @@ def write_link_page(subreddits, link, subreddit='', hide_deleted_comments=False)
|
|||
'###URL_SUB###': static_include_path + subreddit + '/index.html',
|
||||
'###URL_SUB_CMNT###': static_include_path + subreddit + '/index-' + sort_indexes['num_comments']['slug'] + '/index.html',
|
||||
'###URL_SUB_DATE###': static_include_path + subreddit + '/index-' + sort_indexes['created_utc']['slug'] + '/index.html',
|
||||
'###URL_SEARCH###': static_include_path + subreddit + '/search.html',
|
||||
'###URL_SEARCH###': static_include_path + '/search.html',
|
||||
'###HTML_SUBS_MENU###': subs_menu_html,
|
||||
'###HTML_SELFTEXT###': selftext_html,
|
||||
'###HTML_COMMENTS###': comments_html,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue