From ce1fb8d08c86f747ee638289c8bcfeb208702445 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Mon, 8 May 2023 00:53:08 +0200 Subject: [PATCH 1/2] Use XML.parse instead of XML.parse_html Due to recent changes to libxml2 (between 2.9.14 and 2.10.4, See https://gitlab.gnome.org/GNOME/libxml2/-/issues/508), the HTML parser doesn't take into account the namespaces (xmlns). Because HTML shouldn't contain namespaces anyway, there is no reason for use to keep using it. But switching to the XML parser means that we have to pass the namespaces to every single 'xpath_node(s)' method for it to be able to properly navigate the XML structure. --- src/invidious/channels/channels.cr | 36 +++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/src/invidious/channels/channels.cr b/src/invidious/channels/channels.cr index 63dd2194..b09d93b1 100644 --- a/src/invidious/channels/channels.cr +++ b/src/invidious/channels/channels.cr @@ -159,12 +159,18 @@ def fetch_channel(ucid, pull_all_videos : Bool) LOGGER.debug("fetch_channel: #{ucid}") LOGGER.trace("fetch_channel: #{ucid} : pull_all_videos = #{pull_all_videos}") + namespaces = { + "yt" => "http://www.youtube.com/xml/schemas/2015", + "media" => "http://search.yahoo.com/mrss/", + "default" => "http://www.w3.org/2005/Atom", + } + LOGGER.trace("fetch_channel: #{ucid} : Downloading RSS feed") rss = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{ucid}").body LOGGER.trace("fetch_channel: #{ucid} : Parsing RSS feed") - rss = XML.parse_html(rss) + rss = XML.parse(rss) - author = rss.xpath_node(%q(//feed/title)) + author = rss.xpath_node("//default:feed/default:title", namespaces) if !author raise InfoException.new("Deleted or invalid channel") end @@ -192,15 +198,23 @@ def fetch_channel(ucid, pull_all_videos : Bool) videos, continuation = IV::Channel::Tabs.get_videos(channel) LOGGER.trace("fetch_channel: #{ucid} : Extracting videos from channel RSS feed") - rss.xpath_nodes("//feed/entry").each do |entry| - video_id = entry.xpath_node("videoid").not_nil!.content - title = entry.xpath_node("title").not_nil!.content - published = Time.parse_rfc3339(entry.xpath_node("published").not_nil!.content) - updated = Time.parse_rfc3339(entry.xpath_node("updated").not_nil!.content) - author = entry.xpath_node("author/name").not_nil!.content - ucid = entry.xpath_node("channelid").not_nil!.content - views = entry.xpath_node("group/community/statistics").try &.["views"]?.try &.to_i64? - views ||= 0_i64 + rss.xpath_nodes("//default:feed/default:entry", namespaces).each do |entry| + video_id = entry.xpath_node("yt:videoid", namespaces).not_nil!.content + title = entry.xpath_node("default:title", namespaces).not_nil!.content + + published = Time.parse_rfc3339( + entry.xpath_node("default:published", namespaces).not_nil!.content + ) + updated = Time.parse_rfc3339( + entry.xpath_node("default:updated", namespaces).not_nil!.content + ) + + author = entry.xpath_node("default:author/default:name", namespaces).not_nil!.content + ucid = entry.xpath_node("yt:channelid", namespaces).not_nil!.content + + views = entry + .xpath_node("media:group/media:community/media:statistics", namespaces) + .try &.["views"]?.try &.to_i64? || 0_i64 channel_video = videos .select(SearchVideo) From c385a944e642ce9e060c2dcf2082ecf0bb10b45a Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Mon, 8 May 2023 13:10:18 +0200 Subject: [PATCH 2/2] Subscriptions: Fix casing of XML tag names --- src/invidious/channels/channels.cr | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/invidious/channels/channels.cr b/src/invidious/channels/channels.cr index b09d93b1..c3d6124f 100644 --- a/src/invidious/channels/channels.cr +++ b/src/invidious/channels/channels.cr @@ -199,7 +199,7 @@ def fetch_channel(ucid, pull_all_videos : Bool) LOGGER.trace("fetch_channel: #{ucid} : Extracting videos from channel RSS feed") rss.xpath_nodes("//default:feed/default:entry", namespaces).each do |entry| - video_id = entry.xpath_node("yt:videoid", namespaces).not_nil!.content + video_id = entry.xpath_node("yt:videoId", namespaces).not_nil!.content title = entry.xpath_node("default:title", namespaces).not_nil!.content published = Time.parse_rfc3339( @@ -210,7 +210,7 @@ def fetch_channel(ucid, pull_all_videos : Bool) ) author = entry.xpath_node("default:author/default:name", namespaces).not_nil!.content - ucid = entry.xpath_node("yt:channelid", namespaces).not_nil!.content + ucid = entry.xpath_node("yt:channelId", namespaces).not_nil!.content views = entry .xpath_node("media:group/media:community/media:statistics", namespaces)