From f34f06bca57cadd95bb3319d6e1346fd0ff166ba Mon Sep 17 00:00:00 2001 From: 0x24d Date: Fri, 19 Nov 2021 22:41:43 +0000 Subject: [PATCH 1/8] Store continuation so that it can be used on the next matching request. This is done because the continuation created for videos sorted by 'oldest' doesn't work after the first 30 videos. The same 30 videos are returned again. The only way to get the next 30 videos, and onwards, is to use the continuation returned in the initial API call. Storing the returned continuation in the db saves having to request each page from 1 to the currently wanted page each time a page other than the first is wanted. --- config/sql/channel_continuations.sql | 23 ++++++++++++ src/invidious.cr | 1 + src/invidious/channels/channels.cr | 38 ++++++++++++++++++++ src/invidious/channels/videos.cr | 31 ++++++++++++++-- src/invidious/yt_backend/extractors_utils.cr | 10 ++++-- 5 files changed, 98 insertions(+), 5 deletions(-) create mode 100644 config/sql/channel_continuations.sql diff --git a/config/sql/channel_continuations.sql b/config/sql/channel_continuations.sql new file mode 100644 index 00000000..629b2aef --- /dev/null +++ b/config/sql/channel_continuations.sql @@ -0,0 +1,23 @@ +-- Table: public.channel_continuations + +-- DROP TABLE public.channel_continuations; + +CREATE TABLE IF NOT EXISTS public.channel_continuations +( + id text NOT NULL, + page integer, + sort_by text, + continuation text, + CONSTRAINT channel_continuations_id_page_sort_by_key UNIQUE (id, page, sort_by) +); + +GRANT ALL ON TABLE public.channel_continuations TO default_user; + +-- Index: public.channel_continuations_id_idx + +-- DROP INDEX public.channel_continuations_id_idx; + +CREATE INDEX IF NOT EXISTS channel_continuations_id_idx + ON public.channel_continuations + USING btree + (id COLLATE pg_catalog."default"); diff --git a/src/invidious.cr b/src/invidious.cr index 21a12ff2..c4ecc275 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -115,6 +115,7 @@ if CONFIG.check_tables check_enum(PG_DB, "privacy", PlaylistPrivacy) check_table(PG_DB, "channels", InvidiousChannel) + check_table(PG_DB, "channel_continuations", ChannelContinuation) check_table(PG_DB, "channel_videos", ChannelVideo) check_table(PG_DB, "playlists", InvidiousPlaylist) check_table(PG_DB, "playlist_videos", PlaylistVideo) diff --git a/src/invidious/channels/channels.cr b/src/invidious/channels/channels.cr index 827b6534..7d89c1a8 100644 --- a/src/invidious/channels/channels.cr +++ b/src/invidious/channels/channels.cr @@ -8,6 +8,23 @@ struct InvidiousChannel property subscribed : Time? end +struct ChannelContinuation + include DB::Serializable + + property id : String + property page : Int32 = 0 + property sort_by : String = "newest" + property continuation : String + + def to_tuple + {% begin %} + { + {{*@type.instance_vars.map(&.name)}} + } + {% end %} + end +end + struct ChannelVideo include DB::Serializable @@ -199,6 +216,18 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil) page = 1 + channel_continuation = ChannelContinuation.new({ + id: ucid, + page: page, + sort_by: "newest", + continuation: produce_channel_videos_continuation(ucid, auto_generated: auto_generated, v2: true) + }) + + LOGGER.trace("fetch_channel: #{ucid} : page #{page} : Updating or inserting continuation") + + db.exec("INSERT INTO channel_continuations VALUES ($1, $2, $3, $4) \ + ON CONFLICT (id, page, sort_by) DO UPDATE SET continuation = $4", *channel_continuation.to_tuple) + LOGGER.trace("fetch_channel: #{ucid} : Downloading channel videos page") initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated) videos = extract_videos(initial_data, author, ucid) @@ -264,6 +293,15 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil) initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated) videos = extract_videos(initial_data, author, ucid) + channel_continuation = ChannelContinuation.new({ + id: ucid, + page: page, + sort_by: "newest", + continuation: fetch_continuation_token(initial_data) || "" + }) + db.exec("INSERT INTO channel_continuations VALUES ($1, $2, $3, $4) \ + ON CONFLICT (id, page, sort_by) DO UPDATE SET continuation = $4", *channel_continuation.to_tuple) + count = videos.size videos = videos.map { |video| ChannelVideo.new({ id: video.id, diff --git a/src/invidious/channels/videos.cr b/src/invidious/channels/videos.cr index 48453bb7..5949345e 100644 --- a/src/invidious/channels/videos.cr +++ b/src/invidious/channels/videos.cr @@ -58,10 +58,35 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so end def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest") - continuation = produce_channel_videos_continuation(ucid, page, - auto_generated: auto_generated, sort_by: sort_by, v2: true) + if channel_continuation = PG_DB.query_one?("SELECT * FROM channel_continuations WHERE id = $1 AND page = $2 AND sort_by = $3", ucid, page, sort_by, as: ChannelContinuation) + continuation = channel_continuation.continuation + else + # Manually create the continuation, and insert it into the table, if one does not already exist. + # This should only the case the first time the first page of each 'sort_by' mode is loaded for each channel, + # as all calls to this function with 'page = 1' will get the continuation for the next page (page 2) from the returned data below. + continuation = produce_channel_videos_continuation(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: true) + channel_continuation = ChannelContinuation.new({ + id: ucid, + page: page, + sort_by: sort_by, + continuation: continuation + }) + PG_DB.exec("INSERT INTO channel_continuations VALUES ($1, $2, $3, $4) \ + ON CONFLICT (id, page, sort_by) DO UPDATE SET continuation = $4", *channel_continuation.to_tuple) + end - return YoutubeAPI.browse(continuation) + initial_data = YoutubeAPI.browse(continuation) + # Store the returned continuation in the table so that it can be used the next time this function is called requesting that page. + channel_continuation = ChannelContinuation.new({ + id: ucid, + page: page + 1, + sort_by: sort_by, + continuation: fetch_continuation_token(initial_data) || "" + }) + PG_DB.exec("INSERT INTO channel_continuations VALUES ($1, $2, $3, $4) \ + ON CONFLICT (id, page, sort_by) DO UPDATE SET continuation = $4", *channel_continuation.to_tuple) + + return initial_data end def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest") diff --git a/src/invidious/yt_backend/extractors_utils.cr b/src/invidious/yt_backend/extractors_utils.cr index add5f488..400bc62e 100644 --- a/src/invidious/yt_backend/extractors_utils.cr +++ b/src/invidious/yt_backend/extractors_utils.cr @@ -58,10 +58,16 @@ def fetch_continuation_token(initial_data : Hash(String, JSON::Any)) # Fetches the continuation token from initial data if initial_data["onResponseReceivedActions"]? continuation_items = initial_data["onResponseReceivedActions"][0]["appendContinuationItemsAction"]["continuationItems"] - else + elsif initial_data["contents"]? tab = extract_selected_tab(initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]) continuation_items = tab["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"]["items"] + else + continuation = initial_data["continuationContents"]["gridContinuation"]["continuations"][0]["nextContinuationData"]["continuation"].as_s end - return fetch_continuation_token(continuation_items.as_a) + if continuation_items.nil? + return continuation + else + return fetch_continuation_token(continuation_items.as_a) + end end From 55fc6e7bbe6c1ea58ea8aea3faeebbf31fea0fa9 Mon Sep 17 00:00:00 2001 From: 0x24d Date: Sat, 20 Nov 2021 00:32:25 +0000 Subject: [PATCH 2/8] Handle when there are no continuations returned due to being on the last page. Also tidy up the return statements --- src/invidious/yt_backend/extractors_utils.cr | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/invidious/yt_backend/extractors_utils.cr b/src/invidious/yt_backend/extractors_utils.cr index 400bc62e..be992ecd 100644 --- a/src/invidious/yt_backend/extractors_utils.cr +++ b/src/invidious/yt_backend/extractors_utils.cr @@ -55,19 +55,20 @@ def fetch_continuation_token(items : Array(JSON::Any)) end def fetch_continuation_token(initial_data : Hash(String, JSON::Any)) + continuation = "" # Fetches the continuation token from initial data if initial_data["onResponseReceivedActions"]? continuation_items = initial_data["onResponseReceivedActions"][0]["appendContinuationItemsAction"]["continuationItems"] elsif initial_data["contents"]? tab = extract_selected_tab(initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]) continuation_items = tab["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"]["items"] - else + elsif initial_data["continuationContents"]["gridContinuation"]["continuations"]? continuation = initial_data["continuationContents"]["gridContinuation"]["continuations"][0]["nextContinuationData"]["continuation"].as_s end - if continuation_items.nil? - return continuation - else - return fetch_continuation_token(continuation_items.as_a) + if !continuation_items.nil? + continuation = fetch_continuation_token(continuation_items.as_a) end + + return continuation end From dce70e2d9307e7e9dc1ac7b94f2c3d4601d353b7 Mon Sep 17 00:00:00 2001 From: 0x24d Date: Sat, 20 Nov 2021 11:57:45 +0000 Subject: [PATCH 3/8] Always generate the first page's continuation, and add fallback logic. The fallback branch iterates from the last known continuation to get the continuation for the wanted page. --- src/invidious/channels/videos.cr | 85 +++++++++++++++++++++++--------- 1 file changed, 62 insertions(+), 23 deletions(-) diff --git a/src/invidious/channels/videos.cr b/src/invidious/channels/videos.cr index 5949345e..cdc1a6d3 100644 --- a/src/invidious/channels/videos.cr +++ b/src/invidious/channels/videos.cr @@ -58,33 +58,72 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so end def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest") - if channel_continuation = PG_DB.query_one?("SELECT * FROM channel_continuations WHERE id = $1 AND page = $2 AND sort_by = $3", ucid, page, sort_by, as: ChannelContinuation) + continuation = "" + initial_data = Hash(String, JSON::Any).new + + if page == 1 + # Always manually create the continuation for page 1 as this is likely faster than a db lookup. + continuation = produce_channel_videos_continuation(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: true) + elsif channel_continuation = PG_DB.query_one?("SELECT * FROM channel_continuations WHERE id = $1 AND page = $2 AND sort_by = $3", ucid, page, sort_by, as: ChannelContinuation) continuation = channel_continuation.continuation else - # Manually create the continuation, and insert it into the table, if one does not already exist. - # This should only the case the first time the first page of each 'sort_by' mode is loaded for each channel, - # as all calls to this function with 'page = 1' will get the continuation for the next page (page 2) from the returned data below. - continuation = produce_channel_videos_continuation(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: true) - channel_continuation = ChannelContinuation.new({ - id: ucid, - page: page, - sort_by: sort_by, - continuation: continuation - }) - PG_DB.exec("INSERT INTO channel_continuations VALUES ($1, $2, $3, $4) \ - ON CONFLICT (id, page, sort_by) DO UPDATE SET continuation = $4", *channel_continuation.to_tuple) + # This branch should not be needed in normal operation (navigating via the previous/next page buttons). + # This is just here as a fallback in case someone requests, for example, page 3 without previously requesting page 2. + + # Iterate backwards from the wanted page to page 2 to find a stored continuation. + start = 1 + ((page - 1)..2).each do |i| + if channel_continuation = PG_DB.query_one?("SELECT * FROM channel_continuations WHERE id = $1 AND page = $2 AND sort_by = $3", ucid, i, sort_by, as: ChannelContinuation) + start = i + continuation = channel_continuation.continuation + break + end + end + + # If a continuation hasn't been found after getting to page 2, manually create the continuation for page 1. + if start == 1 + continuation = produce_channel_videos_continuation(ucid, 1, auto_generated: auto_generated, sort_by: sort_by, v2: true) + end + + # Iterate from the found/created continuation until we have the continuation for the wanted page or there are no more pages. + # Store the returned continuation each time so that it can be found in the db next time the current page is wanted. + (start..(page - 1)).each do |i| + initial_data = YoutubeAPI.browse(continuation) + continuation = fetch_continuation_token(initial_data) + + break if continuation.nil? || continuation.empty? + + channel_continuation = ChannelContinuation.new({ + id: ucid, + page: i, + sort_by: sort_by, + continuation: continuation + }) + PG_DB.exec("INSERT INTO channel_continuations VALUES ($1, $2, $3, $4) \ + ON CONFLICT (id, page, sort_by) DO UPDATE SET continuation = $4", *channel_continuation.to_tuple) + end end - initial_data = YoutubeAPI.browse(continuation) - # Store the returned continuation in the table so that it can be used the next time this function is called requesting that page. - channel_continuation = ChannelContinuation.new({ - id: ucid, - page: page + 1, - sort_by: sort_by, - continuation: fetch_continuation_token(initial_data) || "" - }) - PG_DB.exec("INSERT INTO channel_continuations VALUES ($1, $2, $3, $4) \ - ON CONFLICT (id, page, sort_by) DO UPDATE SET continuation = $4", *channel_continuation.to_tuple) + # If we reached the channel's last page in the else loop above return an empty hash. + if continuation.nil? || continuation.empty? + initial_data.clear + else + # Get the wanted page and store the returned continuation for the next page, + # if there is one, so that it can be used the next time this function is called requesting that page. + initial_data = YoutubeAPI.browse(continuation) + continuation = fetch_continuation_token(initial_data) + + if !continuation.nil? && !continuation.empty? + channel_continuation = ChannelContinuation.new({ + id: ucid, + page: page + 1, + sort_by: sort_by, + continuation: continuation + }) + PG_DB.exec("INSERT INTO channel_continuations VALUES ($1, $2, $3, $4) \ + ON CONFLICT (id, page, sort_by) DO UPDATE SET continuation = $4", *channel_continuation.to_tuple) + end + end return initial_data end From 8e3b8da285dd17594e5df6b765eedf01970b9018 Mon Sep 17 00:00:00 2001 From: 0x24d Date: Sat, 20 Nov 2021 12:10:59 +0000 Subject: [PATCH 4/8] Keep using the existing logic where it already works correctly. These states are: * All pages when sorted by 'newest'. * All pages when sorted by 'popular' * Page 1 when sorted by 'oldest'. If not in either of these three states, e.g. page 2 when sorted by 'oldest', fallback to using the new db/iteration logic. --- src/invidious/channels/videos.cr | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/invidious/channels/videos.cr b/src/invidious/channels/videos.cr index cdc1a6d3..5ac66e83 100644 --- a/src/invidious/channels/videos.cr +++ b/src/invidious/channels/videos.cr @@ -61,8 +61,9 @@ def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = continuation = "" initial_data = Hash(String, JSON::Any).new - if page == 1 - # Always manually create the continuation for page 1 as this is likely faster than a db lookup. + # Manually generating the continuation works correctly for both 'newest' and 'popular' sort modes, + # and for page 1 when sorting by 'oldest'. So only fallback to using the db if not in either of these states. + if sort_by != "oldest" || page == 1 continuation = produce_channel_videos_continuation(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: true) elsif channel_continuation = PG_DB.query_one?("SELECT * FROM channel_continuations WHERE id = $1 AND page = $2 AND sort_by = $3", ucid, page, sort_by, as: ChannelContinuation) continuation = channel_continuation.continuation @@ -111,18 +112,22 @@ def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = # Get the wanted page and store the returned continuation for the next page, # if there is one, so that it can be used the next time this function is called requesting that page. initial_data = YoutubeAPI.browse(continuation) - continuation = fetch_continuation_token(initial_data) - if !continuation.nil? && !continuation.empty? - channel_continuation = ChannelContinuation.new({ - id: ucid, - page: page + 1, - sort_by: sort_by, - continuation: continuation - }) - PG_DB.exec("INSERT INTO channel_continuations VALUES ($1, $2, $3, $4) \ - ON CONFLICT (id, page, sort_by) DO UPDATE SET continuation = $4", *channel_continuation.to_tuple) - end + # Only get the continuation and store it if the sort mode is 'oldest'. + if sort_by == "oldest" + continuation = fetch_continuation_token(initial_data) + + if !continuation.nil? && !continuation.empty? + channel_continuation = ChannelContinuation.new({ + id: ucid, + page: page + 1, + sort_by: sort_by, + continuation: continuation + }) + PG_DB.exec("INSERT INTO channel_continuations VALUES ($1, $2, $3, $4) \ + ON CONFLICT (id, page, sort_by) DO UPDATE SET continuation = $4", *channel_continuation.to_tuple) + end + end end return initial_data From 188a7dd9e620d227ece9ef4534bc693b74bb42ba Mon Sep 17 00:00:00 2001 From: 0x24d Date: Sat, 20 Nov 2021 12:47:25 +0000 Subject: [PATCH 5/8] Remove storing continuations when fetching channel. Videos here are sorted by 'newest' and continuations should only be stored when sorting by 'oldest'. --- src/invidious/channels/channels.cr | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/src/invidious/channels/channels.cr b/src/invidious/channels/channels.cr index 7d89c1a8..37868475 100644 --- a/src/invidious/channels/channels.cr +++ b/src/invidious/channels/channels.cr @@ -216,18 +216,6 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil) page = 1 - channel_continuation = ChannelContinuation.new({ - id: ucid, - page: page, - sort_by: "newest", - continuation: produce_channel_videos_continuation(ucid, auto_generated: auto_generated, v2: true) - }) - - LOGGER.trace("fetch_channel: #{ucid} : page #{page} : Updating or inserting continuation") - - db.exec("INSERT INTO channel_continuations VALUES ($1, $2, $3, $4) \ - ON CONFLICT (id, page, sort_by) DO UPDATE SET continuation = $4", *channel_continuation.to_tuple) - LOGGER.trace("fetch_channel: #{ucid} : Downloading channel videos page") initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated) videos = extract_videos(initial_data, author, ucid) @@ -293,15 +281,6 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil) initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated) videos = extract_videos(initial_data, author, ucid) - channel_continuation = ChannelContinuation.new({ - id: ucid, - page: page, - sort_by: "newest", - continuation: fetch_continuation_token(initial_data) || "" - }) - db.exec("INSERT INTO channel_continuations VALUES ($1, $2, $3, $4) \ - ON CONFLICT (id, page, sort_by) DO UPDATE SET continuation = $4", *channel_continuation.to_tuple) - count = videos.size videos = videos.map { |video| ChannelVideo.new({ id: video.id, From 6467ea14b0f07b8e95e4ff5792406124c5f4a40d Mon Sep 17 00:00:00 2001 From: 0x24d Date: Sat, 20 Nov 2021 12:51:05 +0000 Subject: [PATCH 6/8] Remove default struct properties and set all unique keys to not null. --- config/sql/channel_continuations.sql | 4 ++-- src/invidious/channels/channels.cr | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config/sql/channel_continuations.sql b/config/sql/channel_continuations.sql index 629b2aef..5ac2facf 100644 --- a/config/sql/channel_continuations.sql +++ b/config/sql/channel_continuations.sql @@ -5,8 +5,8 @@ CREATE TABLE IF NOT EXISTS public.channel_continuations ( id text NOT NULL, - page integer, - sort_by text, + page integer NOT NULL, + sort_by text NOT NULL, continuation text, CONSTRAINT channel_continuations_id_page_sort_by_key UNIQUE (id, page, sort_by) ); diff --git a/src/invidious/channels/channels.cr b/src/invidious/channels/channels.cr index 37868475..a27829db 100644 --- a/src/invidious/channels/channels.cr +++ b/src/invidious/channels/channels.cr @@ -12,8 +12,8 @@ struct ChannelContinuation include DB::Serializable property id : String - property page : Int32 = 0 - property sort_by : String = "newest" + property page : Int32 + property sort_by : String property continuation : String def to_tuple From b16af0da3c9dbb8006e7b696a62b9bad876e8410 Mon Sep 17 00:00:00 2001 From: 0x24d Date: Sat, 20 Nov 2021 13:07:40 +0000 Subject: [PATCH 7/8] Add new db table to init script --- docker/init-invidious-db.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/init-invidious-db.sh b/docker/init-invidious-db.sh index 22b4cc5f..a1e0b51e 100755 --- a/docker/init-invidious-db.sh +++ b/docker/init-invidious-db.sh @@ -3,6 +3,7 @@ set -eou pipefail psql --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" < config/sql/channels.sql psql --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" < config/sql/videos.sql +psql --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" < config/sql/channel_continuations.sql psql --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" < config/sql/channel_videos.sql psql --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" < config/sql/users.sql psql --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" < config/sql/session_ids.sql From 1e925e97d87ac1a80650bdf02deb44aeca215698 Mon Sep 17 00:00:00 2001 From: 0x24d Date: Sat, 20 Nov 2021 13:08:42 +0000 Subject: [PATCH 8/8] Format new logic --- src/invidious/channels/videos.cr | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/invidious/channels/videos.cr b/src/invidious/channels/videos.cr index 5ac66e83..88669754 100644 --- a/src/invidious/channels/videos.cr +++ b/src/invidious/channels/videos.cr @@ -95,10 +95,10 @@ def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = break if continuation.nil? || continuation.empty? channel_continuation = ChannelContinuation.new({ - id: ucid, - page: i, - sort_by: sort_by, - continuation: continuation + id: ucid, + page: i, + sort_by: sort_by, + continuation: continuation, }) PG_DB.exec("INSERT INTO channel_continuations VALUES ($1, $2, $3, $4) \ ON CONFLICT (id, page, sort_by) DO UPDATE SET continuation = $4", *channel_continuation.to_tuple) @@ -119,10 +119,10 @@ def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = if !continuation.nil? && !continuation.empty? channel_continuation = ChannelContinuation.new({ - id: ucid, - page: page + 1, - sort_by: sort_by, - continuation: continuation + id: ucid, + page: page + 1, + sort_by: sort_by, + continuation: continuation, }) PG_DB.exec("INSERT INTO channel_continuations VALUES ($1, $2, $3, $4) \ ON CONFLICT (id, page, sort_by) DO UPDATE SET continuation = $4", *channel_continuation.to_tuple)