Refactor of hop referrer passing

This commit is contained in:
Adam Miller 2022-03-24 21:38:47 +00:00
parent f4a9e77b06
commit cd16985724
2 changed files with 7 additions and 2 deletions

View File

@ -304,6 +304,7 @@ class RethinkDbFrontier:
'hops_from_seed': parent_page.hops_from_seed + 1,
'hop_path': str(parent_page.hop_path if parent_page.hop_path else "") + "L",
'via_page_id': parent_page.id,
'via_page_url': parent_page.url,
'hops_off_surt': hops_off,
'hashtags': [hashtag] if hashtag else []})
return page

View File

@ -273,10 +273,12 @@ class Site(doublethink.Document, ElapsedMixIn):
if self.warcprox_meta:
if page is not None:
self.warcprox_meta["metadata"]["hop_path"] = page.hop_path
self.warcprox_meta["metadata"]["hop_path_referer"] = page.url
self.warcprox_meta["metadata"]["brozzled_url"] = page.url
self.warcprox_meta["metadata"]["hop_via_url"] = page.via_page_url
warcprox_meta_json = json.dumps(self.warcprox_meta, separators=(',', ':'))
del self.warcprox_meta["metadata"]["hop_path"]
del self.warcprox_meta["metadata"]["hop_path_referer"]
del self.warcprox_meta["metadata"]["brozzled_url"]
del self.warcprox_meta["metadata"]["hop_via_url"]
else:
warcprox_meta_json= json.dumps(self.warcprox_meta, separators=(',', ':'))
hdrs["Warcprox-Meta"] = warcprox_meta_json
@ -348,6 +350,8 @@ class Page(doublethink.Document):
self.hops_from_seed = 0
if not "hop_path" in self:
self.hop_path = None
if not "via_page_url" in self:
self.via_page_url = None
if not "brozzle_count" in self:
self.brozzle_count = 0
if not "claimed" in self: