mirror of
https://github.com/internetarchive/brozzler.git
synced 2025-08-07 05:52:27 -04:00
Refactor of hop referrer passing
This commit is contained in:
parent
f4a9e77b06
commit
cd16985724
2 changed files with 7 additions and 2 deletions
|
@ -304,6 +304,7 @@ class RethinkDbFrontier:
|
||||||
'hops_from_seed': parent_page.hops_from_seed + 1,
|
'hops_from_seed': parent_page.hops_from_seed + 1,
|
||||||
'hop_path': str(parent_page.hop_path if parent_page.hop_path else "") + "L",
|
'hop_path': str(parent_page.hop_path if parent_page.hop_path else "") + "L",
|
||||||
'via_page_id': parent_page.id,
|
'via_page_id': parent_page.id,
|
||||||
|
'via_page_url': parent_page.url,
|
||||||
'hops_off_surt': hops_off,
|
'hops_off_surt': hops_off,
|
||||||
'hashtags': [hashtag] if hashtag else []})
|
'hashtags': [hashtag] if hashtag else []})
|
||||||
return page
|
return page
|
||||||
|
|
|
@ -273,10 +273,12 @@ class Site(doublethink.Document, ElapsedMixIn):
|
||||||
if self.warcprox_meta:
|
if self.warcprox_meta:
|
||||||
if page is not None:
|
if page is not None:
|
||||||
self.warcprox_meta["metadata"]["hop_path"] = page.hop_path
|
self.warcprox_meta["metadata"]["hop_path"] = page.hop_path
|
||||||
self.warcprox_meta["metadata"]["hop_path_referer"] = page.url
|
self.warcprox_meta["metadata"]["brozzled_url"] = page.url
|
||||||
|
self.warcprox_meta["metadata"]["hop_via_url"] = page.via_page_url
|
||||||
warcprox_meta_json = json.dumps(self.warcprox_meta, separators=(',', ':'))
|
warcprox_meta_json = json.dumps(self.warcprox_meta, separators=(',', ':'))
|
||||||
del self.warcprox_meta["metadata"]["hop_path"]
|
del self.warcprox_meta["metadata"]["hop_path"]
|
||||||
del self.warcprox_meta["metadata"]["hop_path_referer"]
|
del self.warcprox_meta["metadata"]["brozzled_url"]
|
||||||
|
del self.warcprox_meta["metadata"]["hop_via_url"]
|
||||||
else:
|
else:
|
||||||
warcprox_meta_json= json.dumps(self.warcprox_meta, separators=(',', ':'))
|
warcprox_meta_json= json.dumps(self.warcprox_meta, separators=(',', ':'))
|
||||||
hdrs["Warcprox-Meta"] = warcprox_meta_json
|
hdrs["Warcprox-Meta"] = warcprox_meta_json
|
||||||
|
@ -348,6 +350,8 @@ class Page(doublethink.Document):
|
||||||
self.hops_from_seed = 0
|
self.hops_from_seed = 0
|
||||||
if not "hop_path" in self:
|
if not "hop_path" in self:
|
||||||
self.hop_path = None
|
self.hop_path = None
|
||||||
|
if not "via_page_url" in self:
|
||||||
|
self.via_page_url = None
|
||||||
if not "brozzle_count" in self:
|
if not "brozzle_count" in self:
|
||||||
self.brozzle_count = 0
|
self.brozzle_count = 0
|
||||||
if not "claimed" in self:
|
if not "claimed" in self:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue