Videos: parse data during first fetching

There will be less data to be stores in the DB cache
This commit is contained in:
Samantaz Fox 2022-07-24 23:30:58 +02:00
parent cd03fa06ae
commit 6aaea7fafa
No known key found for this signature in database
GPG Key ID: F42821059186176E
2 changed files with 182 additions and 120 deletions

View File

@ -22,7 +22,6 @@ Spectator.describe Invidious::Hashtag do
expect(info["likes"].as_i).to eq(2_283) expect(info["likes"].as_i).to eq(2_283)
expect(info["genre"].as_s).to eq("Gaming") expect(info["genre"].as_s).to eq("Gaming")
expect(info["genreUrl"].raw).to be_nil
expect(info["genreUcid"].as_s).to be_empty expect(info["genreUcid"].as_s).to be_empty
expect(info["license"].as_s).to be_empty expect(info["license"].as_s).to be_empty
@ -81,7 +80,6 @@ Spectator.describe Invidious::Hashtag do
expect(info["likes"].as_i).to eq(22) expect(info["likes"].as_i).to eq(22)
expect(info["genre"].as_s).to eq("Entertainment") expect(info["genre"].as_s).to eq("Entertainment")
expect(info["genreUrl"].raw).to be_nil
expect(info["genreUcid"].as_s).to be_empty expect(info["genreUcid"].as_s).to be_empty
expect(info["license"].as_s).to be_empty expect(info["license"].as_s).to be_empty

View File

@ -1,3 +1,9 @@
enum VideoType
Video
Livestream
Scheduled
end
struct Video struct Video
include DB::Serializable include DB::Serializable
@ -27,7 +33,7 @@ struct Video
def to_json(locale : String?, json : JSON::Builder) def to_json(locale : String?, json : JSON::Builder)
json.object do json.object do
json.field "type", "video" json.field "type", self.video_type
json.field "title", self.title json.field "title", self.title
json.field "videoId", self.id json.field "videoId", self.id
@ -253,61 +259,22 @@ struct Video
to_json(nil, json) to_json(nil, json)
end end
def title def video_type : VideoType
info["videoDetails"]["title"]?.try &.as_s || "" video_type = info["videoType"]?.try &.as_s || "video"
end return VideoType.parse?(video_type) || VideoType::Video
def ucid
info["videoDetails"]["channelId"]?.try &.as_s || ""
end
def author
info["videoDetails"]["author"]?.try &.as_s || ""
end
def length_seconds : Int32
info.dig?("microformat", "playerMicroformatRenderer", "lengthSeconds").try &.as_s.to_i ||
info["videoDetails"]["lengthSeconds"]?.try &.as_s.to_i || 0
end
def views : Int64
info["videoDetails"]["viewCount"]?.try &.as_s.to_i64 || 0_i64
end
def likes : Int64
info["likes"]?.try &.as_i64 || 0_i64
end
def dislikes : Int64
info["dislikes"]?.try &.as_i64 || 0_i64
end end
def published : Time def published : Time
info return info["published"]?
.dig?("microformat", "playerMicroformatRenderer", "publishDate")
.try { |t| Time.parse(t.as_s, "%Y-%m-%d", Time::Location::UTC) } || Time.utc .try { |t| Time.parse(t.as_s, "%Y-%m-%d", Time::Location::UTC) } || Time.utc
end end
def published=(other : Time) def published=(other : Time)
info["microformat"].as_h["playerMicroformatRenderer"].as_h["publishDate"] = JSON::Any.new(other.to_s("%Y-%m-%d")) info["published"] = JSON::Any.new(other.to_s("%Y-%m-%d"))
end
def allow_ratings
r = info["videoDetails"]["allowRatings"]?.try &.as_bool
r.nil? ? false : r
end end
def live_now def live_now
info["microformat"]?.try &.["playerMicroformatRenderer"]? return (self.video_type == VideoType::Livestream)
.try &.["liveBroadcastDetails"]?.try &.["isLiveNow"]?.try &.as_bool || false
end
def is_listed
info["videoDetails"]["isCrawlable"]?.try &.as_bool || false
end
def is_upcoming
info["videoDetails"]["isUpcoming"]?.try &.as_bool || false
end end
def premiere_timestamp : Time? def premiere_timestamp : Time?
@ -316,31 +283,11 @@ struct Video
.try { |t| Time.parse_rfc3339(t.as_s) } .try { |t| Time.parse_rfc3339(t.as_s) }
end end
def keywords
info["videoDetails"]["keywords"]?.try &.as_a.map &.as_s || [] of String
end
def related_videos def related_videos
info["relatedVideos"]?.try &.as_a.map { |h| h.as_h.transform_values &.as_s } || [] of Hash(String, String) info["relatedVideos"]?.try &.as_a.map { |h| h.as_h.transform_values &.as_s } || [] of Hash(String, String)
end end
def allowed_regions # Methods for parsing streaming data
info
.dig?("microformat", "playerMicroformatRenderer", "availableCountries")
.try &.as_a.map &.as_s || [] of String
end
def author_thumbnail : String
info["authorThumbnail"]?.try &.as_s || ""
end
def author_verified : Bool
info["authorVerified"]?.try &.as_bool || false
end
def sub_count_text : String
info["subCountText"]?.try &.as_s || "-"
end
def fmt_stream def fmt_stream
return @fmt_stream.as(Array(Hash(String, JSON::Any))) if @fmt_stream return @fmt_stream.as(Array(Hash(String, JSON::Any))) if @fmt_stream
@ -391,6 +338,8 @@ struct Video
adaptive_fmts.select &.["mimeType"]?.try &.as_s.starts_with?("audio") adaptive_fmts.select &.["mimeType"]?.try &.as_s.starts_with?("audio")
end end
# Misc. methods
def storyboards def storyboards
storyboards = info.dig?("storyboards", "playerStoryboardSpecRenderer", "spec") storyboards = info.dig?("storyboards", "playerStoryboardSpecRenderer", "spec")
.try &.as_s.split("|") .try &.as_s.split("|")
@ -454,8 +403,7 @@ struct Video
end end
def paid def paid
reason = info.dig?("playabilityStatus", "reason").try &.as_s || "" return (self.reason || "").includes? "requires payment"
return reason.includes? "requires payment"
end end
def premium def premium
@ -470,29 +418,6 @@ struct Video
return @captions return @captions
end end
def description
description = info
.dig?("microformat", "playerMicroformatRenderer", "description", "simpleText")
.try &.as_s || ""
end
# TODO
def description=(value : String)
@description = value
end
def description_html
info["descriptionHtml"]?.try &.as_s || "<p></p>"
end
def description_html=(value : String)
info["descriptionHtml"] = JSON::Any.new(value)
end
def short_description
info["shortDescription"]?.try &.as_s? || ""
end
def hls_manifest_url : String? def hls_manifest_url : String?
info.dig?("streamingData", "hlsManifestUrl").try &.as_s info.dig?("streamingData", "hlsManifestUrl").try &.as_s
end end
@ -501,25 +426,12 @@ struct Video
info.dig?("streamingData", "dashManifestUrl").try &.as_s info.dig?("streamingData", "dashManifestUrl").try &.as_s
end end
def genre : String
info["genre"]?.try &.as_s || ""
end
def genre_url : String? def genre_url : String?
info["genreUcid"]? ? "/channel/#{info["genreUcid"]}" : nil info["genreUcid"]? ? "/channel/#{info["genreUcid"]}" : nil
end end
def license : String?
info["license"]?.try &.as_s
end
def is_family_friendly : Bool
info.dig?("microformat", "playerMicroformatRenderer", "isFamilySafe").try &.as_bool || false
end
def is_vr : Bool? def is_vr : Bool?
projection_type = info.dig?("streamingData", "adaptiveFormats", 0, "projectionType").try &.as_s return {"EQUIRECTANGULAR", "MESH"}.includes? self.projection_type
return {"EQUIRECTANGULAR", "MESH"}.includes? projection_type
end end
def projection_type : String? def projection_type : String?
@ -529,6 +441,91 @@ struct Video
def reason : String? def reason : String?
info["reason"]?.try &.as_s info["reason"]?.try &.as_s
end end
# Macros defining getters/setters for various types of data
private macro getset_string(name)
# Return {{name.stringify}} from `info`
def {{name.id.underscore}} : String
return info[{{name.stringify}}]?.try &.as_s || ""
end
# Update {{name.stringify}} into `info`
def {{name.id.underscore}}=(value : String)
info[{{name.stringify}}] = JSON::Any.new(value)
end
{% if flag?(:debug_macros) %} {{debug}} {% end %}
end
private macro getset_string_array(name)
# Return {{name.stringify}} from `info`
def {{name.id.underscore}} : Array(String)
return info[{{name.stringify}}]?.try &.as_a.map &.as_s || [] of String
end
# Update {{name.stringify}} into `info`
def {{name.id.underscore}}=(value : Array(String))
info[{{name.stringify}}] = JSON::Any.new(value)
end
{% if flag?(:debug_macros) %} {{debug}} {% end %}
end
{% for op, type in {i32: Int32, i64: Int64} %}
private macro getset_{{op}}(name)
def \{{name.id.underscore}} : {{type}}
return info[\{{name.stringify}}]?.try &.as_i.to_{{op}} || 0_{{op}}
end
def \{{name.id.underscore}}=(value : Int)
info[\{{name.stringify}}] = JSON::Any.new(value.to_i64)
end
\{% if flag?(:debug_macros) %} \{{debug}} \{% end %}
end
{% end %}
private macro getset_bool(name)
# Return {{name.stringify}} from `info`
def {{name.id.underscore}} : Bool
return info[{{name.stringify}}]?.try &.as_bool || false
end
# Update {{name.stringify}} into `info`
def {{name.id.underscore}}=(value : Bool)
info[{{name.stringify}}] = JSON::Any.new(value)
end
{% if flag?(:debug_macros) %} {{debug}} {% end %}
end
# Method definitions, using the macros above
getset_string author
getset_string authorThumbnail
getset_string description
getset_string descriptionHtml
getset_string genre
getset_string genreUcid
getset_string license
getset_string shortDescription
getset_string subCountText
getset_string title
getset_string ucid
getset_string_array allowedRegions
getset_string_array keywords
getset_i32 lengthSeconds
getset_i64 likes
getset_i64 views
getset_bool allowRatings
getset_bool authorVerified
getset_bool isFamilyFriendly
getset_bool isListed
getset_bool isUpcoming
end end
class VideoRedirect < Exception class VideoRedirect < Exception
@ -684,6 +681,42 @@ def parse_video_info(video_id : String, player_response : Hash(String, JSON::Any
raise BrokenTubeException.new("videoPrimaryInfoRenderer") if !video_primary_renderer raise BrokenTubeException.new("videoPrimaryInfoRenderer") if !video_primary_renderer
raise BrokenTubeException.new("videoSecondaryInfoRenderer") if !video_secondary_renderer raise BrokenTubeException.new("videoSecondaryInfoRenderer") if !video_secondary_renderer
video_details = player_response.dig?("videoDetails")
microformat = player_response.dig?("microformat", "playerMicroformatRenderer")
raise BrokenTubeException.new("videoDetails") if !video_details
raise BrokenTubeException.new("microformat") if !microformat
# Basic video infos
title = video_details["title"]?.try &.as_s
views = video_details["viewCount"]?.try &.as_s.to_i64
length_txt = (microformat["lengthSeconds"]? || video_details["lengthSeconds"])
.try &.as_s.to_i64
published = microformat["publishDate"]?
.try { |t| Time.parse(t.as_s, "%Y-%m-%d", Time::Location::UTC) } || Time.utc
premiere_timestamp = microformat.dig?("liveBroadcastDetails", "startTimestamp")
.try { |t| Time.parse_rfc3339(t.as_s) }
live_now = microformat.dig?("liveBroadcastDetails", "isLiveNow")
.try &.as_bool || false
# Extra video infos
allowed_regions = microformat["availableCountries"]?
.try &.as_a.map &.as_s || [] of String
allow_ratings = video_details["allowRatings"]?.try &.as_bool
family_friendly = microformat["isFamilySafe"].try &.as_bool
is_listed = video_details["isCrawlable"]?.try &.as_bool
is_upcoming = video_details["isUpcoming"]?.try &.as_bool
keywords = video_details["keywords"]?
.try &.as_a.map &.as_s || [] of String
# Related videos # Related videos
LOGGER.debug("extract_video_info: parsing related videos...") LOGGER.debug("extract_video_info: parsing related videos...")
@ -738,6 +771,7 @@ def parse_video_info(video_id : String, player_response : Hash(String, JSON::Any
# Description # Description
description = microformat.dig?("description", "simpleText").try &.as_s || ""
short_description = player_response.dig?("videoDetails", "shortDescription") short_description = player_response.dig?("videoDetails", "shortDescription")
description_html = video_secondary_renderer.try &.dig?("description", "runs") description_html = video_secondary_renderer.try &.dig?("description", "runs")
@ -749,7 +783,7 @@ def parse_video_info(video_id : String, player_response : Hash(String, JSON::Any
.try &.dig?("metadataRowContainer", "metadataRowContainerRenderer", "rows") .try &.dig?("metadataRowContainer", "metadataRowContainerRenderer", "rows")
.try &.as_a .try &.as_a
genre = player_response.dig?("microformat", "playerMicroformatRenderer", "category") genre = microformat["category"]?
genre_ucid = nil genre_ucid = nil
license = nil license = nil
@ -771,6 +805,9 @@ def parse_video_info(video_id : String, player_response : Hash(String, JSON::Any
# Author infos # Author infos
author = video_details["author"]?.try &.as_s
ucid = video_details["channelId"]?.try &.as_s
if author_info = video_secondary_renderer.try &.dig?("owner", "videoOwnerRenderer") if author_info = video_secondary_renderer.try &.dig?("owner", "videoOwnerRenderer")
author_thumbnail = author_info.dig?("thumbnail", "thumbnails", 0, "url") author_thumbnail = author_info.dig?("thumbnail", "thumbnails", 0, "url")
author_verified = has_verified_badge?(author_info["badges"]?) author_verified = has_verified_badge?(author_info["badges"]?)
@ -782,16 +819,43 @@ def parse_video_info(video_id : String, player_response : Hash(String, JSON::Any
# Return data # Return data
if live_now
video_type = VideoType::Livestream
elsif premiere_timestamp.not_nil!
video_type = VideoType::Scheduled
published = premiere_timestamp || Time.utc
else
video_type = VideoType::Video
end
params = { params = {
"shortDescription" => JSON::Any.new(short_description.try &.as_s || nil), "videoType" => JSON::Any.new(video_type.to_s),
"relatedVideos" => JSON::Any.new(related), # Basic video infos
"title" => JSON::Any.new(title || ""),
"views" => JSON::Any.new(views || 0_i64),
"likes" => JSON::Any.new(likes || 0_i64), "likes" => JSON::Any.new(likes || 0_i64),
"dislikes" => JSON::Any.new(0_i64), "lengthSeconds" => JSON::Any.new(length_txt || 0_i64),
"published" => JSON::Any.new(published.to_rfc3339),
# Extra video infos
"allowedRegions" => JSON::Any.new(allowed_regions.map { |v| JSON::Any.new(v) }),
"allowRatings" => JSON::Any.new(allow_ratings || false),
"isFamilyFriendly" => JSON::Any.new(family_friendly || false),
"isListed" => JSON::Any.new(is_listed || false),
"isUpcoming" => JSON::Any.new(is_upcoming || false),
"keywords" => JSON::Any.new(keywords.map { |v| JSON::Any.new(v) }),
# Related videos
"relatedVideos" => JSON::Any.new(related),
# Description
"description" => JSON::Any.new(description || ""),
"descriptionHtml" => JSON::Any.new(description_html || "<p></p>"), "descriptionHtml" => JSON::Any.new(description_html || "<p></p>"),
"shortDescription" => JSON::Any.new(short_description.try &.as_s || nil),
# Video metadata
"genre" => JSON::Any.new(genre.try &.as_s || ""), "genre" => JSON::Any.new(genre.try &.as_s || ""),
"genreUrl" => JSON::Any.new(nil),
"genreUcid" => JSON::Any.new(genre_ucid.try &.as_s || ""), "genreUcid" => JSON::Any.new(genre_ucid.try &.as_s || ""),
"license" => JSON::Any.new(license.try &.as_s || ""), "license" => JSON::Any.new(license.try &.as_s || ""),
# Author infos
"author" => JSON::Any.new(author || ""),
"ucid" => JSON::Any.new(ucid || ""),
"authorThumbnail" => JSON::Any.new(author_thumbnail.try &.as_s || ""), "authorThumbnail" => JSON::Any.new(author_thumbnail.try &.as_s || ""),
"authorVerified" => JSON::Any.new(author_verified), "authorVerified" => JSON::Any.new(author_verified),
"subCountText" => JSON::Any.new(subs_text || "-"), "subCountText" => JSON::Any.new(subs_text || "-"),