This commit is contained in:
Watchful1 2023-08-26 20:18:38 -07:00
parent 4f56c141fd
commit cf4962fd4c
3 changed files with 10 additions and 12 deletions

View file

@ -11,6 +11,8 @@ import json
import praw
from praw import endpoints
sys.path.append('personal')
log = discord_logging.init_logging(debug=False)
import utils
@ -31,7 +33,7 @@ def query_pushshift(ids, bearer, object_type):
for i in range(4):
response = requests.get(url, headers={
'User-Agent': "In script by /u/Watchful1",
'Authorization': f"Bearer {bearer}"})
'Authorization': f"Bearer {bearer}"}, timeout=15)
if response.status_code == 200:
break
if response.status_code == 403:
@ -166,16 +168,11 @@ if __name__ == "__main__":
log.error(f"Invalid type: {args.type}")
sys.exit(2)
config = discord_logging.get_config()
user_name = "Watchful12"
reddit = praw.Reddit(
username=user_name,
password=discord_logging.get_config_var(config, user_name, "password"),
client_id=discord_logging.get_config_var(config, user_name, f"client_id_1"),
client_secret=discord_logging.get_config_var(config, user_name, f"client_secret_1"),
user_agent=f"Remindme ingest script")
reddit = praw.Reddit(user_name)
pushshift_token = "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyX2lkIjoiV2F0Y2hmdWwxIiwiZXhwaXJlcyI6MTY5MzA5OTE4OC4wMjU3MDU4fQ.HJJd73nwHArOz2lErpubUuTVd_gdJ44SfpKDjb91tIY"
config = discord_logging.get_config()
pushshift_token = discord_logging.get_config_var(config, user_name, "pushshift_token")
while start_date <= end_date:
build_day(start_date, input_folders, args.output, object_type, reddit, pushshift_token)

View file

@ -11,6 +11,7 @@ from collections import defaultdict
log = discord_logging.get_logger()
import utils
import merge
NEWLINE_ENCODED = "\n".encode('utf-8')
@ -258,12 +259,12 @@ class ObjectDict:
created_minute = created_utc.replace(second=0, microsecond=0)
if obj['id'] in self.by_id:
existing_obj = self.by_id[obj['id']]
unmatched_field = utils.merge_fields(existing_obj, obj, self.obj_type)
unmatched_field = merge.merge_fields(existing_obj, obj, self.obj_type)
self.counts[created_minute][ingest_type][False] += 1
return unmatched_field
if created_utc < self.min_datetime or created_utc > self.max_datetime:
return False
unmatched_field = utils.parse_fields(obj, self.obj_type)
unmatched_field = merge.parse_fields(obj, self.obj_type)
self.by_id[obj['id']] = obj
self.by_minute[created_minute].add(obj)
self.counts[created_minute][ingest_type][True] += 1

View file

@ -189,7 +189,7 @@ field_actions = {
"is_robot_indexable": FieldAction.OVERWRITE,
"is_self": FieldAction.DONT_OVERWRITE,
"is_survey_ad": FieldAction.ALLOW_EMPTY,
"is_video": FieldAction.ALLOW,
"is_video": FieldAction.OVERWRITE,
"likes": FieldAction.ALLOW_EMPTY,
"link_flair_background_color": FieldAction.OVERWRITE_NOT_NONE,
"link_flair_css_class": FieldAction.OVERWRITE_NOT_NONE,