Refactor 'description_html'

This commit is contained in:
Omar Roth 2019-06-08 15:08:27 -05:00
parent 12b2ab5da8
commit b43e9ed7e7
No known key found for this signature in database
GPG Key ID: B8254FB7EC3D37F2
8 changed files with 69 additions and 103 deletions

View File

@ -473,9 +473,8 @@ get "/watch" do |env|
aspect_ratio = "16:9" aspect_ratio = "16:9"
video.description = fill_links(video.description, "https", "www.youtube.com") video.description_html = fill_links(video.description_html, "https", "www.youtube.com")
video.description = replace_links(video.description) video.description_html = replace_links(video.description_html)
description = video.short_description
host_url = make_host_url(config, Kemal.config) host_url = make_host_url(config, Kemal.config)
host_params = env.request.query_params host_params = env.request.query_params
@ -648,9 +647,8 @@ get "/embed/:id" do |env|
aspect_ratio = nil aspect_ratio = nil
video.description = fill_links(video.description, "https", "www.youtube.com") video.description_html = fill_links(video.description_html, "https", "www.youtube.com")
video.description = replace_links(video.description) video.description_html = replace_links(video.description_html)
description = video.short_description
host_url = make_host_url(config, Kemal.config) host_url = make_host_url(config, Kemal.config)
host_params = env.request.query_params host_params = env.request.query_params
@ -2466,7 +2464,7 @@ get "/feed/channel/:ucid" do |env|
author = entry.xpath_node("author/name").not_nil!.content author = entry.xpath_node("author/name").not_nil!.content
ucid = entry.xpath_node("channelid").not_nil!.content ucid = entry.xpath_node("channelid").not_nil!.content
description = entry.xpath_node("group/description").not_nil!.content description_html = entry.xpath_node("group/description").not_nil!.to_s
views = entry.xpath_node("group/community/statistics").not_nil!.["views"].to_i64 views = entry.xpath_node("group/community/statistics").not_nil!.["views"].to_i64
videos << SearchVideo.new( videos << SearchVideo.new(
@ -2476,8 +2474,7 @@ get "/feed/channel/:ucid" do |env|
ucid: ucid, ucid: ucid,
published: published, published: published,
views: views, views: views,
description: description, description_html: description_html,
description_html: "",
length_seconds: 0, length_seconds: 0,
live_now: false, live_now: false,
paid: false, paid: false,
@ -3460,11 +3457,8 @@ get "/api/v1/top" do |env|
json.field "published", video.published.to_unix json.field "published", video.published.to_unix
json.field "publishedText", translate(locale, "`x` ago", recode_date(video.published, locale)) json.field "publishedText", translate(locale, "`x` ago", recode_date(video.published, locale))
description = video.description.gsub("<br>", "\n") json.field "description", html_to_content(video.description_html)
description = description.gsub("<br/>", "\n") json.field "descriptionHtml", video.description_html
description = XML.parse_html(description)
json.field "description", description.content
json.field "descriptionHtml", video.description
end end
end end
end end
@ -3511,8 +3505,7 @@ get "/api/v1/channels/:ucid" do |env|
author = channel_html.xpath_node(%q(//a[contains(@class, "branded-page-header-title-link")])).not_nil!.content author = channel_html.xpath_node(%q(//a[contains(@class, "branded-page-header-title-link")])).not_nil!.content
author_url = channel_html.xpath_node(%q(//a[@class="channel-header-profile-image-container spf-link"])).not_nil!["href"] author_url = channel_html.xpath_node(%q(//a[@class="channel-header-profile-image-container spf-link"])).not_nil!["href"]
author_thumbnail = channel_html.xpath_node(%q(//img[@class="channel-header-profile-image"])).not_nil!["src"] author_thumbnail = channel_html.xpath_node(%q(//img[@class="channel-header-profile-image"])).not_nil!["src"]
description_html = channel_html.xpath_node(%q(//div[contains(@class,"about-description")])) description_html = channel_html.xpath_node(%q(//div[contains(@class,"about-description")])).try &.to_s || ""
description_html, description = html_to_content(description_html)
paid = channel_html.xpath_node(%q(//meta[@itemprop="paid"])).not_nil!["content"] == "True" paid = channel_html.xpath_node(%q(//meta[@itemprop="paid"])).not_nil!["content"] == "True"
is_family_friendly = channel_html.xpath_node(%q(//meta[@itemprop="isFamilyFriendly"])).not_nil!["content"] == "True" is_family_friendly = channel_html.xpath_node(%q(//meta[@itemprop="isFamilyFriendly"])).not_nil!["content"] == "True"
@ -3607,7 +3600,7 @@ get "/api/v1/channels/:ucid" do |env|
json.field "autoGenerated", auto_generated json.field "autoGenerated", auto_generated
json.field "isFamilyFriendly", is_family_friendly json.field "isFamilyFriendly", is_family_friendly
json.field "description", description json.field "description", html_to_content(description_html)
json.field "descriptionHtml", description_html json.field "descriptionHtml", description_html
json.field "allowedRegions", allowed_regions json.field "allowedRegions", allowed_regions
@ -3884,7 +3877,7 @@ get "/api/v1/playlists/:plid" do |env|
end end
end end
json.field "description", playlist.description json.field "description", html_to_content(playlist.description_html)
json.field "descriptionHtml", playlist.description_html json.field "descriptionHtml", playlist.description_html
json.field "videoCount", playlist.video_count json.field "videoCount", playlist.video_count

View File

@ -138,13 +138,8 @@ def fetch_youtube_comments(id, db, continuation, proxies, format, locale, thin_m
node_comment = node["commentRenderer"] node_comment = node["commentRenderer"]
end end
content_html = node_comment["contentText"]["simpleText"]?.try &.as_s.rchop('\ufeff') content_html = node_comment["contentText"]["simpleText"]?.try &.as_s.rchop('\ufeff').try { |block| HTML.escape(block) }.to_s ||
if content_html content_to_comment_html(node_comment["contentText"]["runs"].as_a).try &.to_s || ""
content_html = HTML.escape(content_html)
end
content_html ||= content_to_comment_html(node_comment["contentText"]["runs"].as_a)
content_html, content = html_to_content(content_html)
author = node_comment["authorText"]?.try &.["simpleText"] author = node_comment["authorText"]?.try &.["simpleText"]
author ||= "" author ||= ""
@ -179,7 +174,7 @@ def fetch_youtube_comments(id, db, continuation, proxies, format, locale, thin_m
json.field "isEdited", false json.field "isEdited", false
end end
json.field "content", content json.field "content", html_to_content(content_html)
json.field "contentHtml", content_html json.field "contentHtml", content_html
json.field "published", published.to_unix json.field "published", published.to_unix
json.field "publishedText", translate(locale, "`x` ago", recode_date(published, locale)) json.field "publishedText", translate(locale, "`x` ago", recode_date(published, locale))

View File

@ -177,23 +177,17 @@ def login_req(login_form, f_req)
return HTTP::Params.encode(data) return HTTP::Params.encode(data)
end end
def html_to_content(description_html) def html_to_content(description_html : String)
if !description_html description = description_html.gsub(/(<br>)|(<br\/>)/, {
description = "" "<br>": "\n",
description_html = "" "<br/>": "\n",
else })
description_html = description_html.to_s
description = description_html.gsub("<br>", "\n")
description = description.gsub("<br/>", "\n")
if description.empty? if !description.empty?
description = "" description = XML.parse_html(description).content.strip("\n ")
else
description = XML.parse_html(description).content.strip("\n ")
end
end end
return description_html, description return description
end end
def extract_videos(nodeset, ucid = nil, author_name = nil) def extract_videos(nodeset, ucid = nil, author_name = nil)
@ -230,8 +224,7 @@ def extract_items(nodeset, ucid = nil, author_name = nil)
author ||= "" author ||= ""
author_id ||= "" author_id ||= ""
description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")])) description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")])).try &.to_s || ""
description_html, description = html_to_content(description_html)
tile = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-tile")])) tile = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-tile")]))
if !tile if !tile
@ -330,7 +323,6 @@ def extract_items(nodeset, ucid = nil, author_name = nil)
author_thumbnail: author_thumbnail, author_thumbnail: author_thumbnail,
subscriber_count: subscriber_count, subscriber_count: subscriber_count,
video_count: video_count, video_count: video_count,
description: description,
description_html: description_html description_html: description_html
) )
else else
@ -396,7 +388,6 @@ def extract_items(nodeset, ucid = nil, author_name = nil)
ucid: author_id, ucid: author_id,
published: published, published: published,
views: view_count, views: view_count,
description: description,
description_html: description_html, description_html: description_html,
length_seconds: length_seconds, length_seconds: length_seconds,
live_now: live_now, live_now: live_now,

View File

@ -47,7 +47,6 @@ struct Playlist
author: String, author: String,
author_thumbnail: String, author_thumbnail: String,
ucid: String, ucid: String,
description: String,
description_html: String, description_html: String,
video_count: Int32, video_count: Int32,
views: Int64, views: Int64,
@ -214,9 +213,8 @@ def fetch_playlist(plid, locale)
end end
title = title.content.strip(" \n") title = title.content.strip(" \n")
description_html = document.xpath_node(%q(//span[@class="pl-header-description-text"]/div/div[1])) description_html = document.xpath_node(%q(//span[@class="pl-header-description-text"]/div/div[1])).try &.to_s ||
description_html ||= document.xpath_node(%q(//span[@class="pl-header-description-text"])) document.xpath_node(%q(//span[@class="pl-header-description-text"])).try &.to_s || ""
description_html, description = html_to_content(description_html)
# YouTube allows anonymous playlists, so most of this can be empty or optional # YouTube allows anonymous playlists, so most of this can be empty or optional
anchor = document.xpath_node(%q(//ul[@class="pl-header-details"])) anchor = document.xpath_node(%q(//ul[@class="pl-header-details"]))
@ -245,7 +243,6 @@ def fetch_playlist(plid, locale)
author: author, author: author,
author_thumbnail: author_thumbnail, author_thumbnail: author_thumbnail,
ucid: ucid, ucid: ucid,
description: description,
description_html: description_html, description_html: description_html,
video_count: video_count, video_count: video_count,
views: views, views: views,

View File

@ -31,7 +31,7 @@ struct SearchVideo
xml.element("media:title") { xml.text self.title } xml.element("media:title") { xml.text self.title }
xml.element("media:thumbnail", url: "#{host_url}/vi/#{self.id}/mqdefault.jpg", xml.element("media:thumbnail", url: "#{host_url}/vi/#{self.id}/mqdefault.jpg",
width: "320", height: "180") width: "320", height: "180")
xml.element("media:description") { xml.text self.description } xml.element("media:description") { xml.text html_to_content(self.description_html) }
end end
xml.element("media:community") do xml.element("media:community") do
@ -64,7 +64,7 @@ struct SearchVideo
generate_thumbnails(json, self.id, config, kemal_config) generate_thumbnails(json, self.id, config, kemal_config)
end end
json.field "description", self.description json.field "description", html_to_content(self.description_html)
json.field "descriptionHtml", self.description_html json.field "descriptionHtml", self.description_html
json.field "viewCount", self.views json.field "viewCount", self.views
@ -94,7 +94,6 @@ struct SearchVideo
ucid: String, ucid: String,
published: Time, published: Time,
views: Int64, views: Int64,
description: String,
description_html: String, description_html: String,
length_seconds: Int32, length_seconds: Int32,
live_now: Bool, live_now: Bool,
@ -187,7 +186,7 @@ struct SearchChannel
json.field "subCount", self.subscriber_count json.field "subCount", self.subscriber_count
json.field "videoCount", self.video_count json.field "videoCount", self.video_count
json.field "description", self.description json.field "description", html_to_content(self.description_html)
json.field "descriptionHtml", self.description_html json.field "descriptionHtml", self.description_html
end end
end end
@ -208,7 +207,6 @@ struct SearchChannel
author_thumbnail: String, author_thumbnail: String,
subscriber_count: Int32, subscriber_count: Int32,
video_count: Int32, video_count: Int32,
description: String,
description_html: String, description_html: String,
}) })
end end

View File

@ -286,10 +286,8 @@ struct Video
generate_storyboards(json, self.id, self.storyboards, config, kemal_config) generate_storyboards(json, self.id, self.storyboards, config, kemal_config)
end end
description_html, description = html_to_content(self.description) json.field "description", html_to_content(self.description_html)
json.field "descriptionHtml", self.description_html
json.field "description", description
json.field "descriptionHtml", description_html
json.field "published", self.published.to_unix json.field "published", self.published.to_unix
json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale)) json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale))
json.field "keywords", self.keywords json.field "keywords", self.keywords
@ -467,6 +465,17 @@ struct Video
end end
end end
# `description_html` is stored in DB as `description`, which can be
# quite confusing. Since it currently isn't very practical to rename
# it, we instead define a getter and setter here.
def description_html
self.description
end
def description_html=(other : String)
self.description = other
end
def allow_ratings def allow_ratings
allow_ratings = player_response["videoDetails"]?.try &.["allowRatings"]?.try &.as_bool allow_ratings = player_response["videoDetails"]?.try &.["allowRatings"]?.try &.as_bool
@ -796,14 +805,19 @@ struct Video
end end
def short_description def short_description
description = self.description.gsub("<br>", " ") short_description = self.description_html.gsub(/(<br>)|(<br\/>|"|\n)/, {
description = description.gsub("<br/>", " ") "<br>" => " ",
description = XML.parse_html(description).content[0..200].gsub('"', "&quot;").gsub("\n", " ").strip(" ") "<br/>" => " ",
if description.empty? "\"" => "&quot;",
description = " " "\n" => " ",
})
short_description = XML.parse_html(short_description).content[0..200].strip(" ")
if short_description.empty?
short_description = " "
end end
return description return short_description
end end
def length_seconds def length_seconds
@ -1151,28 +1165,23 @@ def fetch_video(id, proxies, region)
end end
title = info["title"] title = info["title"]
author = info["author"] author = info["author"]? || ""
ucid = info["ucid"] ucid = info["ucid"]? || ""
views = html.xpath_node(%q(//meta[@itemprop="interactionCount"])) views = html.xpath_node(%q(//meta[@itemprop="interactionCount"]))
views = views.try &.["content"].to_i64? .try &.["content"].to_i64? || 0_i64
views ||= 0_i64
likes = html.xpath_node(%q(//button[@title="I like this"]/span)) likes = html.xpath_node(%q(//button[@title="I like this"]/span))
likes = likes.try &.content.delete(",").try &.to_i? .try &.content.delete(",").try &.to_i? || 0
likes ||= 0
dislikes = html.xpath_node(%q(//button[@title="I dislike this"]/span)) dislikes = html.xpath_node(%q(//button[@title="I dislike this"]/span))
dislikes = dislikes.try &.content.delete(",").try &.to_i? .try &.content.delete(",").try &.to_i? || 0
dislikes ||= 0
avg_rating = (likes.to_f/(likes.to_f + dislikes.to_f) * 4 + 1) avg_rating = (likes.to_f/(likes.to_f + dislikes.to_f) * 4 + 1)
avg_rating = avg_rating.nan? ? 0.0 : avg_rating avg_rating = avg_rating.nan? ? 0.0 : avg_rating
info["avg_rating"] = "#{avg_rating}" info["avg_rating"] = "#{avg_rating}"
description = html.xpath_node(%q(//p[@id="eow-description"])) description_html = html.xpath_node(%q(//p[@id="eow-description"])).try &.to_xml(options: XML::SaveOptions::NO_DECL) || ""
description = description ? description.to_xml(options: XML::SaveOptions::NO_DECL) : %q(<p id="eow-description"></p>)
wilson_score = ci_lower_bound(likes, likes + dislikes) wilson_score = ci_lower_bound(likes, likes + dislikes)
published = html.xpath_node(%q(//meta[@itemprop="datePublished"])).try &.["content"] published = html.xpath_node(%q(//meta[@itemprop="datePublished"])).try &.["content"]
@ -1188,7 +1197,8 @@ def fetch_video(id, proxies, region)
genre = html.xpath_node(%q(//meta[@itemprop="genre"])).try &.["content"] genre = html.xpath_node(%q(//meta[@itemprop="genre"])).try &.["content"]
genre ||= "" genre ||= ""
genre_url = html.xpath_node(%(//ul[contains(@class, "watch-info-tag-list")]/li/a[text()="#{genre}"])).try &.["href"] genre_url = html.xpath_node(%(//ul[contains(@class, "watch-info-tag-list")]/li/a[text()="#{genre}"])).try &.["href"]?
genre_url ||= ""
# YouTube provides invalid URLs for some genres, so we fix that here # YouTube provides invalid URLs for some genres, so we fix that here
case genre case genre
@ -1205,30 +1215,12 @@ def fetch_video(id, proxies, region)
when "Trailers" when "Trailers"
genre_url = "/channel/UClgRkhTL3_hImCAmdLfDE4g" genre_url = "/channel/UClgRkhTL3_hImCAmdLfDE4g"
end end
genre_url ||= ""
license = html.xpath_node(%q(//h4[contains(text(),"License")]/parent::*/ul/li)) license = html.xpath_node(%q(//h4[contains(text(),"License")]/parent::*/ul/li)).try &.content || ""
if license sub_count_text = html.xpath_node(%q(//span[contains(@class, "yt-subscriber-count")])).try &.["title"]? || "0"
license = license.content author_thumbnail = html.xpath_node(%(//span[@class="yt-thumb-clip"]/img)).try &.["data-thumb"]? || ""
else
license = ""
end
sub_count_text = html.xpath_node(%q(//span[contains(@class, "yt-subscriber-count")])) video = Video.new(id, info, Time.utc, title, views, likes, dislikes, wilson_score, published, description_html,
if sub_count_text
sub_count_text = sub_count_text["title"]
else
sub_count_text = "0"
end
author_thumbnail = html.xpath_node(%(//span[@class="yt-thumb-clip"]/img))
if author_thumbnail
author_thumbnail = author_thumbnail["data-thumb"]
else
author_thumbnail = ""
end
video = Video.new(id, info, Time.utc, title, views, likes, dislikes, wilson_score, published, description,
nil, author, ucid, allowed_regions, is_family_friendly, genre, genre_url, license, sub_count_text, author_thumbnail) nil, author, ucid, allowed_regions, is_family_friendly, genre, genre_url, license, sub_count_text, author_thumbnail)
return video return video

View File

@ -43,7 +43,7 @@
var player_data = { var player_data = {
aspect_ratio: '<%= aspect_ratio %>', aspect_ratio: '<%= aspect_ratio %>',
title: "<%= video.title.dump_unquoted %>", title: "<%= video.title.dump_unquoted %>",
description: "<%= HTML.escape(description) %>", description: "<%= HTML.escape(video.short_description) %>",
thumbnail: "<%= thumbnail %>" thumbnail: "<%= thumbnail %>"
} }
</script> </script>

View File

@ -1,12 +1,12 @@
<% content_for "header" do %> <% content_for "header" do %>
<meta name="thumbnail" content="<%= thumbnail %>"> <meta name="thumbnail" content="<%= thumbnail %>">
<meta name="description" content="<%= description %>"> <meta name="description" content="<%= video.short_description %>">
<meta name="keywords" content="<%= video.keywords.join(",") %>"> <meta name="keywords" content="<%= video.keywords.join(",") %>">
<meta property="og:site_name" content="Invidious"> <meta property="og:site_name" content="Invidious">
<meta property="og:url" content="<%= host_url %>/watch?v=<%= video.id %>"> <meta property="og:url" content="<%= host_url %>/watch?v=<%= video.id %>">
<meta property="og:title" content="<%= HTML.escape(video.title) %>"> <meta property="og:title" content="<%= HTML.escape(video.title) %>">
<meta property="og:image" content="/vi/<%= video.id %>/maxres.jpg"> <meta property="og:image" content="/vi/<%= video.id %>/maxres.jpg">
<meta property="og:description" content="<%= description %>"> <meta property="og:description" content="<%= video.short_description %>">
<meta property="og:type" content="video.other"> <meta property="og:type" content="video.other">
<meta property="og:video:url" content="<%= host_url %>/embed/<%= video.id %>"> <meta property="og:video:url" content="<%= host_url %>/embed/<%= video.id %>">
<meta property="og:video:secure_url" content="<%= host_url %>/embed/<%= video.id %>"> <meta property="og:video:secure_url" content="<%= host_url %>/embed/<%= video.id %>">
@ -17,7 +17,7 @@
<meta name="twitter:site" content="@omarroth1"> <meta name="twitter:site" content="@omarroth1">
<meta name="twitter:url" content="<%= host_url %>/watch?v=<%= video.id %>"> <meta name="twitter:url" content="<%= host_url %>/watch?v=<%= video.id %>">
<meta name="twitter:title" content="<%= HTML.escape(video.title) %>"> <meta name="twitter:title" content="<%= HTML.escape(video.title) %>">
<meta name="twitter:description" content="<%= description %>"> <meta name="twitter:description" content="<%= video.short_description %>">
<meta name="twitter:image" content="<%= host_url %>/vi/<%= video.id %>/maxres.jpg"> <meta name="twitter:image" content="<%= host_url %>/vi/<%= video.id %>/maxres.jpg">
<meta name="twitter:player" content="<%= host_url %>/embed/<%= video.id %>"> <meta name="twitter:player" content="<%= host_url %>/embed/<%= video.id %>">
<meta name="twitter:player:width" content="1280"> <meta name="twitter:player:width" content="1280">
@ -185,7 +185,7 @@ var video_data = {
</p> </p>
<div> <div>
<%= video.description %> <%= video.description_html %>
</div> </div>
<hr> <hr>