mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-04-20 23:56:34 -04:00
Refactor of hop referrer passing
This commit is contained in:
parent
f4a9e77b06
commit
cd16985724
@ -304,6 +304,7 @@ class RethinkDbFrontier:
|
||||
'hops_from_seed': parent_page.hops_from_seed + 1,
|
||||
'hop_path': str(parent_page.hop_path if parent_page.hop_path else "") + "L",
|
||||
'via_page_id': parent_page.id,
|
||||
'via_page_url': parent_page.url,
|
||||
'hops_off_surt': hops_off,
|
||||
'hashtags': [hashtag] if hashtag else []})
|
||||
return page
|
||||
|
@ -273,10 +273,12 @@ class Site(doublethink.Document, ElapsedMixIn):
|
||||
if self.warcprox_meta:
|
||||
if page is not None:
|
||||
self.warcprox_meta["metadata"]["hop_path"] = page.hop_path
|
||||
self.warcprox_meta["metadata"]["hop_path_referer"] = page.url
|
||||
self.warcprox_meta["metadata"]["brozzled_url"] = page.url
|
||||
self.warcprox_meta["metadata"]["hop_via_url"] = page.via_page_url
|
||||
warcprox_meta_json = json.dumps(self.warcprox_meta, separators=(',', ':'))
|
||||
del self.warcprox_meta["metadata"]["hop_path"]
|
||||
del self.warcprox_meta["metadata"]["hop_path_referer"]
|
||||
del self.warcprox_meta["metadata"]["brozzled_url"]
|
||||
del self.warcprox_meta["metadata"]["hop_via_url"]
|
||||
else:
|
||||
warcprox_meta_json= json.dumps(self.warcprox_meta, separators=(',', ':'))
|
||||
hdrs["Warcprox-Meta"] = warcprox_meta_json
|
||||
@ -348,6 +350,8 @@ class Page(doublethink.Document):
|
||||
self.hops_from_seed = 0
|
||||
if not "hop_path" in self:
|
||||
self.hop_path = None
|
||||
if not "via_page_url" in self:
|
||||
self.via_page_url = None
|
||||
if not "brozzle_count" in self:
|
||||
self.brozzle_count = 0
|
||||
if not "claimed" in self:
|
||||
|
Loading…
x
Reference in New Issue
Block a user