From 5b0c7f12139d25bcd95173290278188a49467c18 Mon Sep 17 00:00:00 2001 From: syeopite Date: Fri, 28 Feb 2025 18:21:35 -0800 Subject: [PATCH] Handle parse errors gracefully on timeline items Prior to this commit, if even a single item fails to parse Invidious will throw out an error. This means that even if everything else on a page can be parsed and rendered without issues, the single problematic item will cause the entire page to be unusable. This commit gracefully handles parse errors by catching and then replacing the problematic item with a new "timeline error" object that represents the parse error. This will allow the rest of the page to be rendered and an error card that will replace the location of the problematic item. --- src/invidious/helpers/serialized_yt_data.cr | 20 +++++- src/invidious/views/components/item.ecr | 6 +- src/invidious/yt_backend/extractors.cr | 73 ++++++++++++++++----- 3 files changed, 82 insertions(+), 17 deletions(-) diff --git a/src/invidious/helpers/serialized_yt_data.cr b/src/invidious/helpers/serialized_yt_data.cr index f8e8f187..ff233619 100644 --- a/src/invidious/helpers/serialized_yt_data.cr +++ b/src/invidious/helpers/serialized_yt_data.cr @@ -291,6 +291,24 @@ struct SearchHashtag end end +# A `ProblematicTimelineItem` is a `SearchItem` created by Invidious that +# represents an item that caused an exception during parsing. +# +# This is not a parsed object from YouTube but rather an Invidious-only type +# created to gracefully communicate parse errors without throwing away +# the rest of the (hopefully) successfully parsed item on a page. +struct ProblematicTimelineItem + property parse_exception : Exception + + def initialize(@parse_exception); end + + def to_json(locale : String?, json : JSON::Builder) + json.object do + json.field "type", "parse-error" + end + end +end + class Category include DB::Serializable @@ -333,4 +351,4 @@ struct Continuation end end -alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist | SearchHashtag | Category +alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist | SearchHashtag | Category | ProblematicTimelineItem diff --git a/src/invidious/views/components/item.ecr b/src/invidious/views/components/item.ecr index c966a926..79cc4725 100644 --- a/src/invidious/views/components/item.ecr +++ b/src/invidious/views/components/item.ecr @@ -1,6 +1,6 @@ <%- thin_mode = env.get("preferences").as(Preferences).thin_mode - item_watched = !item.is_a?(SearchChannel | SearchHashtag | SearchPlaylist | InvidiousPlaylist | Category) && env.get?("user").try &.as(User).watched.index(item.id) != nil + item_watched = !item.is_a?(SearchChannel | SearchHashtag | SearchPlaylist | InvidiousPlaylist | Category | ProblematicTimelineItem) && env.get?("user").try &.as(User).watched.index(item.id) != nil author_verified = item.responds_to?(:author_verified) && item.author_verified -%> @@ -97,6 +97,10 @@ <% when Category %> + <% when ProblematicTimelineItem %> +
+

Unable to parse this item

+
<% else %> <%- # `endpoint_params` is used for the "video-context-buttons" component diff --git a/src/invidious/yt_backend/extractors.cr b/src/invidious/yt_backend/extractors.cr index edd7bf1b..11ab7483 100644 --- a/src/invidious/yt_backend/extractors.cr +++ b/src/invidious/yt_backend/extractors.cr @@ -35,6 +35,20 @@ record AuthorFallback, name : String, id : String # data is passed to the private `#parse()` method which returns a datastruct of the given # type. Otherwise, nil is returned. private module Parsers + module BaseParser + def parse(*args) + begin + return parse_internal(*args) + rescue ex + LOGGER.debug("#{ {{@type.name}} }: Failed to render item.") + LOGGER.debug("#{ {{@type.name}} }: Got exception: #{ex.message}") + ProblematicTimelineItem.new( + parse_exception: ex + ) + end + end + end + # Parses a InnerTube videoRenderer into a SearchVideo. Returns nil when the given object isn't a videoRenderer # # A videoRenderer renders a video to click on within the YouTube and Invidious UI. It is **not** @@ -45,13 +59,16 @@ private module Parsers # `videoRenderer`s can be found almost everywhere on YouTube. In categories, search results, channels, etc. # module VideoRendererParser + extend self + include BaseParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) if item_contents = (item["videoRenderer"]? || item["gridVideoRenderer"]?) return self.parse(item_contents, author_fallback) end end - private def self.parse(item_contents, author_fallback) + private def parse_internal(item_contents, author_fallback) video_id = item_contents["videoId"].as_s title = extract_text(item_contents["title"]?) || "" @@ -170,13 +187,16 @@ private module Parsers # `channelRenderer`s can be found almost everywhere on YouTube. In categories, search results, channels, etc. # module ChannelRendererParser + extend self + include BaseParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) if item_contents = (item["channelRenderer"]? || item["gridChannelRenderer"]?) return self.parse(item_contents, author_fallback) end end - private def self.parse(item_contents, author_fallback) + private def parse_internal(item_contents, author_fallback) author = extract_text(item_contents["title"]) || author_fallback.name author_id = item_contents["channelId"]?.try &.as_s || author_fallback.id author_verified = has_verified_badge?(item_contents["ownerBadges"]?) @@ -230,13 +250,16 @@ private module Parsers # A `hashtagTileRenderer` is a kind of search result. # It can be found when searching for any hashtag (e.g "#hi" or "#shorts") module HashtagRendererParser + extend self + include BaseParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) if item_contents = item["hashtagTileRenderer"]? return self.parse(item_contents) end end - private def self.parse(item_contents) + private def parse_internal(item_contents) title = extract_text(item_contents["hashtag"]).not_nil! # E.g "#hi" # E.g "/hashtag/hi" @@ -263,10 +286,6 @@ private module Parsers video_count: short_text_to_number(video_count_txt || ""), channel_count: short_text_to_number(channel_count_txt || ""), }) - rescue ex - LOGGER.debug("HashtagRendererParser: Failed to extract renderer.") - LOGGER.debug("HashtagRendererParser: Got exception: #{ex.message}") - return nil end def self.parser_name @@ -284,13 +303,16 @@ private module Parsers # `gridPlaylistRenderer`s can be found on the playlist-tabs of channels and expanded categories. # module GridPlaylistRendererParser + extend self + include BaseParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) if item_contents = item["gridPlaylistRenderer"]? return self.parse(item_contents, author_fallback) end end - private def self.parse(item_contents, author_fallback) + private def parse_internal(item_contents, author_fallback) title = extract_text(item_contents["title"]) || "" plid = item_contents["playlistId"]?.try &.as_s || "" @@ -325,13 +347,16 @@ private module Parsers # `playlistRenderer`s can be found almost everywhere on YouTube. In categories, search results, recommended, etc. # module PlaylistRendererParser + extend self + include BaseParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) if item_contents = item["playlistRenderer"]? return self.parse(item_contents, author_fallback) end end - private def self.parse(item_contents, author_fallback) + private def parse_internal(item_contents, author_fallback) title = extract_text(item_contents["title"]) || "" plid = item_contents["playlistId"]?.try &.as_s || "" @@ -385,13 +410,16 @@ private module Parsers # `shelfRenderer`s can be found almost everywhere on YouTube. In categories, search results, channels, etc. # module CategoryRendererParser + extend self + include BaseParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) if item_contents = item["shelfRenderer"]? return self.parse(item_contents, author_fallback) end end - private def self.parse(item_contents, author_fallback) + private def parse_internal(item_contents, author_fallback) title = extract_text(item_contents["title"]?) || "" url = item_contents.dig?("endpoint", "commandMetadata", "webCommandMetadata", "url") .try &.as_s @@ -450,13 +478,16 @@ private module Parsers # container.It is very similar to RichItemRendererParser # module ItemSectionRendererParser + extend self + include BaseParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) if item_contents = item.dig?("itemSectionRenderer", "contents", 0) return self.parse(item_contents, author_fallback) end end - private def self.parse(item_contents, author_fallback) + private def parse_internal(item_contents, author_fallback) child = VideoRendererParser.process(item_contents, author_fallback) child ||= PlaylistRendererParser.process(item_contents, author_fallback) @@ -476,13 +507,16 @@ private module Parsers # itself inside a richGridRenderer container. # module RichItemRendererParser + extend self + include BaseParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) if item_contents = item.dig?("richItemRenderer", "content") return self.parse(item_contents, author_fallback) end end - private def self.parse(item_contents, author_fallback) + private def parse_internal(item_contents, author_fallback) child = VideoRendererParser.process(item_contents, author_fallback) child ||= ReelItemRendererParser.process(item_contents, author_fallback) child ||= PlaylistRendererParser.process(item_contents, author_fallback) @@ -506,13 +540,16 @@ private module Parsers # TODO: Confirm that hypothesis # module ReelItemRendererParser + extend self + include BaseParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) if item_contents = item["reelItemRenderer"]? return self.parse(item_contents, author_fallback) end end - private def self.parse(item_contents, author_fallback) + private def parse_internal(item_contents, author_fallback) video_id = item_contents["videoId"].as_s reel_player_overlay = item_contents.dig( @@ -600,13 +637,16 @@ private module Parsers # a richItemRenderer or a richGridRenderer. # module LockupViewModelParser + extend self + include BaseParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) if item_contents = item["lockupViewModel"]? return self.parse(item_contents, author_fallback) end end - private def self.parse(item_contents, author_fallback) + private def parse_internal(item_contents, author_fallback) playlist_id = item_contents["contentId"].as_s thumbnail_view_model = item_contents.dig( @@ -675,13 +715,16 @@ private module Parsers # usually (always?) encapsulated in a richItemRenderer. # module ShortsLockupViewModelParser + extend self + include BaseParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) if item_contents = item["shortsLockupViewModel"]? return self.parse(item_contents, author_fallback) end end - private def self.parse(item_contents, author_fallback) + private def parse_internal(item_contents, author_fallback) # TODO: Maybe add support for "oardefault.jpg" thumbnails? # thumbnail = item_contents.dig("thumbnail", "sources", 0, "url").as_s # Gives: https://i.ytimg.com/vi/{video_id}/oardefault.jpg?...