diff --git a/src/invidious/frontend/watch_page.cr b/src/invidious/frontend/watch_page.cr index b860dba7..5fd81168 100644 --- a/src/invidious/frontend/watch_page.cr +++ b/src/invidious/frontend/watch_page.cr @@ -7,7 +7,7 @@ module Invidious::Frontend::WatchPage getter full_videos : Array(Hash(String, JSON::Any)) getter video_streams : Array(Hash(String, JSON::Any)) getter audio_streams : Array(Hash(String, JSON::Any)) - getter captions : Array(Invidious::Videos::CaptionMetadata) + getter captions : Array(Invidious::Videos::Captions::Metadata) def initialize( @full_videos, diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr index 2b1d2603..9fbd1374 100644 --- a/src/invidious/videos.cr +++ b/src/invidious/videos.cr @@ -24,7 +24,7 @@ struct Video property updated : Time @[DB::Field(ignore: true)] - @captions = [] of Invidious::Videos::CaptionMetadata + @captions = [] of Invidious::Videos::Captions::Metadata @[DB::Field(ignore: true)] property adaptive_fmts : Array(Hash(String, JSON::Any))? @@ -215,9 +215,9 @@ struct Video keywords.includes? "YouTube Red" end - def captions : Array(Invidious::Videos::CaptionMetadata) + def captions : Array(Invidious::Videos::Captions::Metadata) if @captions.empty? && @info.has_key?("captions") - @captions = Invidious::Videos::CaptionMetadata.from_yt_json(info["captions"]) + @captions = Invidious::Videos::Captions::Metadata.from_yt_json(info["captions"]) end return @captions diff --git a/src/invidious/videos/caption.cr b/src/invidious/videos/caption.cr index 1e2abde9..82b68dcd 100644 --- a/src/invidious/videos/caption.cr +++ b/src/invidious/videos/caption.cr @@ -1,107 +1,109 @@ require "json" module Invidious::Videos - struct CaptionMetadata - property name : String - property language_code : String - property base_url : String + module Captions + struct Metadata + property name : String + property language_code : String + property base_url : String - property auto_generated : Bool + property auto_generated : Bool - def initialize(@name, @language_code, @base_url, @auto_generated) - end - - # Parse the JSON structure from Youtube - def self.from_yt_json(container : JSON::Any) : Array(CaptionMetadata) - caption_tracks = container - .dig?("playerCaptionsTracklistRenderer", "captionTracks") - .try &.as_a - - captions_list = [] of CaptionMetadata - return captions_list if caption_tracks.nil? - - caption_tracks.each do |caption| - name = caption["name"]["simpleText"]? || caption["name"]["runs"][0]["text"] - name = name.to_s.split(" - ")[0] - - language_code = caption["languageCode"].to_s - base_url = caption["baseUrl"].to_s - - auto_generated = false - if caption["kind"]? && caption["kind"] == "asr" - auto_generated = true - end - - captions_list << CaptionMetadata.new(name, language_code, base_url, auto_generated) + def initialize(@name, @language_code, @base_url, @auto_generated) end - return captions_list - end + # Parse the JSON structure from Youtube + def self.from_yt_json(container : JSON::Any) : Array(Captions::Metadata) + caption_tracks = container + .dig?("playerCaptionsTracklistRenderer", "captionTracks") + .try &.as_a - def timedtext_to_vtt(timedtext : String, tlang = nil) : String - # In the future, we could just directly work with the url. This is more of a POC - cues = [] of XML::Node - tree = XML.parse(timedtext) - tree = tree.children.first + captions_list = [] of Captions::Metadata + return captions_list if caption_tracks.nil? - tree.children.each do |item| - if item.name == "body" - item.children.each do |cue| - if cue.name == "p" && !(cue.children.size == 1 && cue.children[0].content == "\n") - cues << cue + caption_tracks.each do |caption| + name = caption["name"]["simpleText"]? || caption["name"]["runs"][0]["text"] + name = name.to_s.split(" - ")[0] + + language_code = caption["languageCode"].to_s + base_url = caption["baseUrl"].to_s + + auto_generated = false + if caption["kind"]? && caption["kind"] == "asr" + auto_generated = true + end + + captions_list << Captions::Metadata.new(name, language_code, base_url, auto_generated) + end + + return captions_list + end + + def timedtext_to_vtt(timedtext : String, tlang = nil) : String + # In the future, we could just directly work with the url. This is more of a POC + cues = [] of XML::Node + tree = XML.parse(timedtext) + tree = tree.children.first + + tree.children.each do |item| + if item.name == "body" + item.children.each do |cue| + if cue.name == "p" && !(cue.children.size == 1 && cue.children[0].content == "\n") + cues << cue + end end + break end - break end - end - result = String.build do |result| - result << <<-END_VTT - WEBVTT - Kind: captions - Language: #{tlang || @language_code} + result = String.build do |result| + result << <<-END_VTT + WEBVTT + Kind: captions + Language: #{tlang || @language_code} - END_VTT + END_VTT - result << "\n\n" + result << "\n\n" - cues.each_with_index do |node, i| - start_time = node["t"].to_f.milliseconds + cues.each_with_index do |node, i| + start_time = node["t"].to_f.milliseconds - duration = node["d"]?.try &.to_f.milliseconds + duration = node["d"]?.try &.to_f.milliseconds - duration ||= start_time + duration ||= start_time - if cues.size > i + 1 - end_time = cues[i + 1]["t"].to_f.milliseconds - else - end_time = start_time + duration + if cues.size > i + 1 + end_time = cues[i + 1]["t"].to_f.milliseconds + else + end_time = start_time + duration + end + + # start_time + result << start_time.hours.to_s.rjust(2, '0') + result << ':' << start_time.minutes.to_s.rjust(2, '0') + result << ':' << start_time.seconds.to_s.rjust(2, '0') + result << '.' << start_time.milliseconds.to_s.rjust(3, '0') + + result << " --> " + + # end_time + result << end_time.hours.to_s.rjust(2, '0') + result << ':' << end_time.minutes.to_s.rjust(2, '0') + result << ':' << end_time.seconds.to_s.rjust(2, '0') + result << '.' << end_time.milliseconds.to_s.rjust(3, '0') + + result << "\n" + + node.children.each do |s| + result << s.content + end + result << "\n" + result << "\n" end - - # start_time - result << start_time.hours.to_s.rjust(2, '0') - result << ':' << start_time.minutes.to_s.rjust(2, '0') - result << ':' << start_time.seconds.to_s.rjust(2, '0') - result << '.' << start_time.milliseconds.to_s.rjust(3, '0') - - result << " --> " - - # end_time - result << end_time.hours.to_s.rjust(2, '0') - result << ':' << end_time.minutes.to_s.rjust(2, '0') - result << ':' << end_time.seconds.to_s.rjust(2, '0') - result << '.' << end_time.milliseconds.to_s.rjust(3, '0') - - result << "\n" - - node.children.each do |s| - result << s.content - end - result << "\n" - result << "\n" end + return result end - return result end # List of all caption languages available on Youtube. diff --git a/src/invidious/videos/transcript.cr b/src/invidious/videos/transcript.cr index ba2728cd..c86b3988 100644 --- a/src/invidious/videos/transcript.cr +++ b/src/invidious/videos/transcript.cr @@ -37,7 +37,7 @@ module Invidious::Videos # Convert into array of TranscriptLine lines = self.parse(initial_data) - # Taken from Invidious::Videos::CaptionMetadata.timedtext_to_vtt() + # Taken from Invidious::Videos::Captions::Metadata.timedtext_to_vtt() vtt = String.build do |vtt| vtt << <<-END_VTT WEBVTT diff --git a/src/invidious/views/user/preferences.ecr b/src/invidious/views/user/preferences.ecr index b1061ee8..55349c5a 100644 --- a/src/invidious/views/user/preferences.ecr +++ b/src/invidious/views/user/preferences.ecr @@ -89,7 +89,7 @@ <% preferences.captions.each_with_index do |caption, index| %>