Fix video indexing

This commit is contained in:
Omar Roth 2018-02-04 19:42:13 -06:00
parent 1facca5408
commit d79c1ff3e9
2 changed files with 24 additions and 9 deletions

View File

@ -100,27 +100,38 @@ def fetch_video(id, client)
views = info["view_count"].to_i64 views = info["view_count"].to_i64
likes = html.xpath_node(%q(//button[@title="I like this"]/span)) likes = html.xpath_node(%q(//button[@title="I like this"]/span))
likes = likes ? likes.content.delete(",").to_i : 1 likes = likes ? likes.content.delete(",").to_i : 0
dislikes = html.xpath_node(%q(//button[@title="I dislike this"]/span)) dislikes = html.xpath_node(%q(//button[@title="I dislike this"]/span))
dislikes = dislikes ? dislikes.content.delete(",").to_i : 0 dislikes = dislikes ? dislikes.content.delete(",").to_i : 0
wilson_score = ci_lower_bound(likes, likes + dislikes) wilson_score = ci_lower_bound(likes, likes + dislikes)
published = html.xpath_node(%q(//strong[@class="watch-time-text"])) published = html.xpath_node(%q(//strong[contains(@class,"watch-time-text")]))
if published if published
published = published.content published = published.content
published = published.lchop("Published on ") else
published = published.lchop("Streamed live on ") raise "Could not find date published"
published = published.lchop("Started streaming on ") end
published = published.lchop("Published ")
published = published.lchop("Streamed live ")
published = published.lchop("Started streaming ")
published = published.lchop("on ")
published = published.lchop("Scheduled for ")
if !published.includes?("ago") if !published.includes?("ago")
published = Time.parse(published, "%b %-d, %Y") published = Time.parse(published, "%b %-d, %Y")
else else
# Time matches format "20 hours ago", "40 minutes ago"... # Time matches format "20 hours ago", "40 minutes ago"...
published = Time.now.date delta = published.split(" ")[0].to_i
end case published
when .includes? "minute"
published = Time.now - delta.minutes
when .includes? "hour"
published = Time.now - delta.hours
else else
published = Time.epoch(0) raise "Could not parse #{published}"
end
end end
video = Video.new(id, info, html, Time.now, title, views, likes, dislikes, wilson_score, published) video = Video.new(id, info, html, Time.now, title, views, likes, dislikes, wilson_score, published)
@ -165,7 +176,7 @@ def search(query, client)
end end
def decrypt_signature(a) def decrypt_signature(a)
a = a.split(""); a = a.split("")
a.delete_at(0..2) a.delete_at(0..2)
a = a.reverse a = a.reverse
c = a[0] c = a[0]

View File

@ -182,7 +182,11 @@ get "/watch" do |env|
rating = video.info["avg_rating"].to_f64 rating = video.info["avg_rating"].to_f64
engagement = ((video.dislikes.to_f + video.likes.to_f)/video.views * 100) engagement = ((video.dislikes.to_f + video.likes.to_f)/video.views * 100)
if video.likes > 0 || video.dislikes > 0
calculated_rating = (video.likes.to_f/(video.likes.to_f + video.dislikes.to_f) * 4 + 1) calculated_rating = (video.likes.to_f/(video.likes.to_f + video.dislikes.to_f) * 4 + 1)
else
calculated_rating = 0.0
end
templated "watch" templated "watch"
end end