Use SearchHashtag for parsing the header of hashtag pages

2024-10-01 01:25:56 -04:00 · 2023-09-07 00:54:21 -04:00 · 2023-09-07 00:54:21 -04:00 · c10fc9474c
commit c10fc9474c
parent ad139d59e3
2 changed files with 16 additions and 57 deletions
--- a/src/invidious/hashtag.cr
+++ b/src/invidious/hashtag.cr
@ -5,15 +5,15 @@ module Invidious::Hashtag
    include DB::Serializable

    property videos : Array(SearchItem) | Array(Video)
-    property header : HashtagHeader?
+    property header : SearchHashtag?
    property has_next_continuation : Bool

    def to_json(locale : String?, json : JSON::Builder)
      json.object do
-        json.field "type", "hashtag"
+        json.field "type", "hashtagPage"
        if self.header != nil
          json.field "header" do
-            self.header.to_json(json)
+            self.header.try &.as(SearchHashtag).to_json(locale, json)
          end
        end
        json.field "results" do
@ -26,39 +26,6 @@ module Invidious::Hashtag
        json.field "hasNextPage", self.has_next_continuation
      end
    end
-
-    # TODO: remove the locale and follow the crystal convention
-    def to_json(locale : String?, _json : Nil)
-      JSON.build do |json|
-        to_json(locale, json)
-      end
-    end
-
-    def to_json(json : JSON::Builder)
-      to_json(nil, json)
-    end
-  end
-
-  struct HashtagHeader
-    include DB::Serializable
-
-    property tag : String
-    property channel_count : Int64
-    property video_count : Int64
-
-    def to_json(json : JSON::Builder)
-      json.object do
-        json.field "hashtag", self.tag
-        json.field "channelCount", self.channel_count
-        json.field "videoCount", self.video_count
-      end
-    end
-
-    def to_json(_json : Nil)
-      JSON.build do |json|
-        to_json(json)
-      end
-    end
  end

  def fetch(hashtag : String, page : Int, region : String? = nil) : HashtagPage
@ -72,8 +39,8 @@ module Invidious::Hashtag
    else
      # item browses the first page (including metadata)
      response = YoutubeAPI.browse("FEhashtag", params: item, client_config: client_config)
-      if item_contents = response.dig?("header", "hashtagHeaderRenderer")
-        header = parse_hashtag_renderer(item_contents)
+      if item_contents = response.dig?("header")
+        header = parse_item(item_contents).try &.as(SearchHashtag)
      end
    end

@ -119,20 +86,4 @@ module Invidious::Hashtag
      .try { |i| Base64.urlsafe_encode(i) }
      .try { |i| URI.encode_www_form(i) }
  end
-
-  def parse_hashtag_renderer(item_contents)
-    info = extract_text(item_contents.dig?("hashtagInfoText")) || ""
-
-    regex_match = /(?<videos>\d+\S)\D+(?<channels>\d+\S)/.match(info)
-
-    hashtag = extract_text(item_contents.dig?("hashtag")) || ""
-    videos = short_text_to_number(regex_match.try &.["videos"]?.try &.to_s || "0")
-    channels = short_text_to_number(regex_match.try &.["channels"]?.try &.to_s || "0")
-
-    return HashtagHeader.new({
-      tag:           hashtag,
-      channel_count: channels,
-      video_count:   videos,
-    })
-  end
 end
--- a/src/invidious/yt_backend/extractors.cr
+++ b/src/invidious/yt_backend/extractors.cr
@ -217,9 +217,11 @@ private module Parsers
  #
  # A `hashtagTileRenderer` is a kind of search result.
  # It can be found when searching for any hashtag (e.g "#hi" or "#shorts")
+  #
+  # A `hashtagHeaderRenderer` is displayed on the first page of the hashtag page.
  module HashtagRendererParser
    def self.process(item : JSON::Any, author_fallback : AuthorFallback)
-      if item_contents = item["hashtagTileRenderer"]?
+      if item_contents = (item["hashtagTileRenderer"]? || item["hashtagHeaderRenderer"]?)
        return self.parse(item_contents)
      end
    end
@ -231,8 +233,14 @@ private module Parsers
      url = item_contents.dig?("onTapCommand", "commandMetadata", "webCommandMetadata", "url").try &.as_s
      url ||= URI.encode_path("/hashtag/#{title.lchop('#')}")

+      if info = extract_text(item_contents.dig?("hashtagInfoText"))
+        regex_match = /(?<videos>\d+\S)\D+(?<channels>\d+\S)/.match(info)
+        videos = regex_match.try &.["videos"]?.try &.to_s
+        channels = regex_match.try &.["channels"]?.try &.to_s
+      else
        video_count_txt = extract_text(item_contents["hashtagVideoCount"]?)     # E.g "203K videos"
        channel_count_txt = extract_text(item_contents["hashtagChannelCount"]?) # E.g "81K channels"
+      end

      # Fallback for video/channel counts
      if channel_count_txt.nil? || video_count_txt.nil?