From 920a93983dd92ea7625d1da30d2e0bd5bdaed190 Mon Sep 17 00:00:00 2001 From: Dessalines Date: Thu, 8 Jun 2023 16:15:15 -0400 Subject: [PATCH] Adding hot_rank columns in place of function sorting. (#2952) * Adding hot_rank columns in place of function sorting. - Creates hot_rank columns for post, comment, and community. - Fixes #2932 * Updating all hot ranks on startup. * Fixing post.url migration. * Removing update_instance_software from startup. * Adding post_rank query --- crates/db_schema/src/aggregates/structs.rs | 4 + crates/db_schema/src/schema.rs | 6 +- crates/db_views/src/comment_report_view.rs | 1 + crates/db_views/src/comment_view.rs | 7 +- crates/db_views/src/post_report_view.rs | 2 + crates/db_views/src/post_view.rs | 18 +-- .../db_views_actor/src/comment_reply_view.rs | 12 +- crates/db_views_actor/src/community_view.rs | 2 +- .../db_views_actor/src/person_mention_view.rs | 6 +- .../2023-06-06-104440_index_post_url/down.sql | 4 + .../2023-06-06-104440_index_post_url/up.sql | 8 ++ .../down.sql | 35 ++++++ .../up.sql | 51 ++++++++ scripts/query_testing/post_query_hot_rank.sh | 3 + src/scheduled_tasks.rs | 110 +++++++++++------- 15 files changed, 194 insertions(+), 75 deletions(-) create mode 100644 migrations/2023-06-07-105918_add_hot_rank_columns/down.sql create mode 100644 migrations/2023-06-07-105918_add_hot_rank_columns/up.sql create mode 100755 scripts/query_testing/post_query_hot_rank.sh diff --git a/crates/db_schema/src/aggregates/structs.rs b/crates/db_schema/src/aggregates/structs.rs index d52b5bb41..8b387fa51 100644 --- a/crates/db_schema/src/aggregates/structs.rs +++ b/crates/db_schema/src/aggregates/structs.rs @@ -22,6 +22,7 @@ pub struct CommentAggregates { pub downvotes: i64, pub published: chrono::NaiveDateTime, pub child_count: i32, + pub hot_rank: i32, } #[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)] @@ -42,6 +43,7 @@ pub struct CommunityAggregates { pub users_active_week: i64, pub users_active_month: i64, pub users_active_half_year: i64, + pub hot_rank: i32, } #[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone, Default)] @@ -73,6 +75,8 @@ pub struct PostAggregates { pub newest_comment_time: chrono::NaiveDateTime, pub featured_community: bool, pub featured_local: bool, + pub hot_rank: i32, + pub hot_rank_active: i32, } #[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)] diff --git a/crates/db_schema/src/schema.rs b/crates/db_schema/src/schema.rs index 5d3d101b9..3f4b8a6ff 100644 --- a/crates/db_schema/src/schema.rs +++ b/crates/db_schema/src/schema.rs @@ -39,7 +39,8 @@ table! { upvotes -> Int8, downvotes -> Int8, published -> Timestamp, - child_count -> Int4, + child_count -> Int4, + hot_rank -> Int4, } } @@ -118,6 +119,7 @@ table! { users_active_week -> Int8, users_active_month -> Int8, users_active_half_year -> Int8, + hot_rank -> Int4, } } @@ -409,6 +411,8 @@ table! { newest_comment_time -> Timestamp, featured_community -> Bool, featured_local -> Bool, + hot_rank -> Int4, + hot_rank_active -> Int4, } } diff --git a/crates/db_views/src/comment_report_view.rs b/crates/db_views/src/comment_report_view.rs index f9d2077fb..510f4432e 100644 --- a/crates/db_views/src/comment_report_view.rs +++ b/crates/db_views/src/comment_report_view.rs @@ -504,6 +504,7 @@ mod tests { downvotes: 0, published: agg.published, child_count: 0, + hot_rank: 1728, }, my_vote: None, resolver: None, diff --git a/crates/db_views/src/comment_view.rs b/crates/db_views/src/comment_view.rs index b46dd3103..2216575dc 100644 --- a/crates/db_views/src/comment_view.rs +++ b/crates/db_views/src/comment_view.rs @@ -37,7 +37,7 @@ use lemmy_db_schema::{ post::Post, }, traits::{ToSafe, ViewToVec}, - utils::{functions::hot_rank, fuzzy_search, get_conn, limit_and_offset_unlimited, DbPool}, + utils::{fuzzy_search, get_conn, limit_and_offset_unlimited, DbPool}, CommentSortType, ListingType, }; @@ -355,9 +355,7 @@ impl<'a> CommentQuery<'a> { }; query = match self.sort.unwrap_or(CommentSortType::Hot) { - CommentSortType::Hot => query - .then_order_by(hot_rank(comment_aggregates::score, comment_aggregates::published).desc()) - .then_order_by(comment_aggregates::published.desc()), + CommentSortType::Hot => query.then_order_by(comment_aggregates::hot_rank.desc()), CommentSortType::New => query.then_order_by(comment::published.desc()), CommentSortType::Old => query.then_order_by(comment::published.asc()), CommentSortType::Top => query.order_by(comment_aggregates::score.desc()), @@ -910,6 +908,7 @@ mod tests { downvotes: 0, published: agg.published, child_count: 5, + hot_rank: 1728, }, } } diff --git a/crates/db_views/src/post_report_view.rs b/crates/db_views/src/post_report_view.rs index e2661d6d6..28a999dea 100644 --- a/crates/db_views/src/post_report_view.rs +++ b/crates/db_views/src/post_report_view.rs @@ -476,6 +476,8 @@ mod tests { newest_comment_time: inserted_post.published, featured_community: false, featured_local: false, + hot_rank: 1728, + hot_rank_active: 1728, }, resolver: None, }; diff --git a/crates/db_views/src/post_view.rs b/crates/db_views/src/post_view.rs index f1dbb4356..868087f3d 100644 --- a/crates/db_views/src/post_view.rs +++ b/crates/db_views/src/post_view.rs @@ -40,7 +40,7 @@ use lemmy_db_schema::{ post::{Post, PostRead, PostSaved}, }, traits::{ToSafe, ViewToVec}, - utils::{functions::hot_rank, fuzzy_search, get_conn, limit_and_offset, DbPool}, + utils::{fuzzy_search, get_conn, limit_and_offset, DbPool}, ListingType, SortType, }; @@ -378,18 +378,8 @@ impl<'a> PostQuery<'a> { } query = match self.sort.unwrap_or(SortType::Hot) { - SortType::Active => query - .then_order_by( - hot_rank( - post_aggregates::score, - post_aggregates::newest_comment_time_necro, - ) - .desc(), - ) - .then_order_by(post_aggregates::newest_comment_time_necro.desc()), - SortType::Hot => query - .then_order_by(hot_rank(post_aggregates::score, post_aggregates::published).desc()) - .then_order_by(post_aggregates::published.desc()), + SortType::Active => query.then_order_by(post_aggregates::hot_rank_active.desc()), + SortType::Hot => query.then_order_by(post_aggregates::hot_rank.desc()), SortType::New => query.then_order_by(post_aggregates::published.desc()), SortType::Old => query.then_order_by(post_aggregates::published.asc()), SortType::NewComments => query.then_order_by(post_aggregates::newest_comment_time.desc()), @@ -929,6 +919,8 @@ mod tests { newest_comment_time: inserted_post.published, featured_community: false, featured_local: false, + hot_rank: 1728, + hot_rank_active: 1728, }, subscribed: SubscribedType::NotSubscribed, read: false, diff --git a/crates/db_views_actor/src/comment_reply_view.rs b/crates/db_views_actor/src/comment_reply_view.rs index 90bcdbc9f..244434b1a 100644 --- a/crates/db_views_actor/src/comment_reply_view.rs +++ b/crates/db_views_actor/src/comment_reply_view.rs @@ -34,7 +34,7 @@ use lemmy_db_schema::{ post::Post, }, traits::{ToSafe, ViewToVec}, - utils::{functions::hot_rank, get_conn, limit_and_offset, DbPool}, + utils::{get_conn, limit_and_offset, DbPool}, CommentSortType, }; use typed_builder::TypedBuilder; @@ -276,12 +276,10 @@ impl<'a> CommentReplyQuery<'a> { query = query.filter(person::bot_account.eq(false)); }; - query = match self.sort.unwrap_or(CommentSortType::Hot) { - CommentSortType::Hot => query - .then_order_by(hot_rank(comment_aggregates::score, comment_aggregates::published).desc()) - .then_order_by(comment_aggregates::published.desc()), - CommentSortType::New => query.then_order_by(comment::published.desc()), - CommentSortType::Old => query.then_order_by(comment::published.asc()), + query = match self.sort.unwrap_or(CommentSortType::New) { + CommentSortType::Hot => query.then_order_by(comment_aggregates::hot_rank.desc()), + CommentSortType::New => query.then_order_by(comment_reply::published.desc()), + CommentSortType::Old => query.then_order_by(comment_reply::published.asc()), CommentSortType::Top => query.order_by(comment_aggregates::score.desc()), }; diff --git a/crates/db_views_actor/src/community_view.rs b/crates/db_views_actor/src/community_view.rs index e59025a5f..55a994e12 100644 --- a/crates/db_views_actor/src/community_view.rs +++ b/crates/db_views_actor/src/community_view.rs @@ -164,7 +164,7 @@ impl<'a> CommunityQuery<'a> { SortType::TopAll => query = query.order_by(community_aggregates::subscribers.desc()), SortType::TopMonth => query = query.order_by(community_aggregates::users_active_month.desc()), SortType::Hot => { - query = query.order_by(community_aggregates::users_active_month.desc()); + query = query.order_by(community_aggregates::hot_rank.desc()); // Don't show hidden communities in Hot (trending) query = query.filter( community::hidden diff --git a/crates/db_views_actor/src/person_mention_view.rs b/crates/db_views_actor/src/person_mention_view.rs index 16c53e6ef..672317ab8 100644 --- a/crates/db_views_actor/src/person_mention_view.rs +++ b/crates/db_views_actor/src/person_mention_view.rs @@ -34,7 +34,7 @@ use lemmy_db_schema::{ post::Post, }, traits::{ToSafe, ViewToVec}, - utils::{functions::hot_rank, get_conn, limit_and_offset, DbPool}, + utils::{get_conn, limit_and_offset, DbPool}, CommentSortType, }; use typed_builder::TypedBuilder; @@ -276,9 +276,7 @@ impl<'a> PersonMentionQuery<'a> { }; query = match self.sort.unwrap_or(CommentSortType::Hot) { - CommentSortType::Hot => query - .then_order_by(hot_rank(comment_aggregates::score, comment_aggregates::published).desc()) - .then_order_by(comment_aggregates::published.desc()), + CommentSortType::Hot => query.then_order_by(comment_aggregates::hot_rank.desc()), CommentSortType::New => query.then_order_by(comment::published.desc()), CommentSortType::Old => query.then_order_by(comment::published.asc()), CommentSortType::Top => query.order_by(comment_aggregates::score.desc()), diff --git a/migrations/2023-06-06-104440_index_post_url/down.sql b/migrations/2023-06-06-104440_index_post_url/down.sql index 5a60b131b..ccc9e938f 100644 --- a/migrations/2023-06-06-104440_index_post_url/down.sql +++ b/migrations/2023-06-06-104440_index_post_url/down.sql @@ -1 +1,5 @@ +-- Change back the column type +alter table post alter column url type text; + +-- Drop the index drop index idx_post_url; diff --git a/migrations/2023-06-06-104440_index_post_url/up.sql b/migrations/2023-06-06-104440_index_post_url/up.sql index 4f8ea66ff..ce3532887 100644 --- a/migrations/2023-06-06-104440_index_post_url/up.sql +++ b/migrations/2023-06-06-104440_index_post_url/up.sql @@ -1 +1,9 @@ +-- Make a hard limit of 512 for the post.url column +-- Truncate existing long rows. +update post set url = left(url, 512) where length(url) > 512; + +-- Enforce the limit +alter table post alter column url type varchar (512); + +-- Add the index create index idx_post_url on post(url); diff --git a/migrations/2023-06-07-105918_add_hot_rank_columns/down.sql b/migrations/2023-06-07-105918_add_hot_rank_columns/down.sql new file mode 100644 index 000000000..71529bdf0 --- /dev/null +++ b/migrations/2023-06-07-105918_add_hot_rank_columns/down.sql @@ -0,0 +1,35 @@ + +-- Remove the new columns + +alter table post_aggregates drop column hot_rank; +alter table post_aggregates drop column hot_rank_active; + +alter table comment_aggregates drop column hot_rank; + +alter table community_aggregates drop column hot_rank; + +-- Drop some new indexes +drop index idx_post_aggregates_score; +drop index idx_post_aggregates_published; +drop index idx_post_aggregates_newest_comment_time; +drop index idx_post_aggregates_newest_comment_time_necro; +drop index idx_post_aggregates_featured_community; +drop index idx_post_aggregates_featured_local; + +-- Recreate the old indexes +CREATE INDEX idx_post_aggregates_featured_local_newest_comment_time ON public.post_aggregates USING btree (featured_local DESC, newest_comment_time DESC); +CREATE INDEX idx_post_aggregates_featured_community_newest_comment_time ON public.post_aggregates USING btree (featured_community DESC, newest_comment_time DESC); +CREATE INDEX idx_post_aggregates_featured_local_comments ON public.post_aggregates USING btree (featured_local DESC, comments DESC); +CREATE INDEX idx_post_aggregates_featured_community_comments ON public.post_aggregates USING btree (featured_community DESC, comments DESC); +CREATE INDEX idx_post_aggregates_featured_local_hot ON public.post_aggregates USING btree (featured_local DESC, hot_rank((score)::numeric, published) DESC, published DESC); +CREATE INDEX idx_post_aggregates_featured_community_hot ON public.post_aggregates USING btree (featured_community DESC, hot_rank((score)::numeric, published) DESC, published DESC); +CREATE INDEX idx_post_aggregates_featured_local_score ON public.post_aggregates USING btree (featured_local DESC, score DESC); +CREATE INDEX idx_post_aggregates_featured_community_score ON public.post_aggregates USING btree (featured_community DESC, score DESC); +CREATE INDEX idx_post_aggregates_featured_local_published ON public.post_aggregates USING btree (featured_local DESC, published DESC); +CREATE INDEX idx_post_aggregates_featured_community_published ON public.post_aggregates USING btree (featured_community DESC, published DESC); +CREATE INDEX idx_post_aggregates_featured_local_active ON public.post_aggregates USING btree (featured_local DESC, hot_rank((score)::numeric, newest_comment_time_necro) DESC, newest_comment_time_necro DESC); +CREATE INDEX idx_post_aggregates_featured_community_active ON public.post_aggregates USING btree (featured_community DESC, hot_rank((score)::numeric, newest_comment_time_necro) DESC, newest_comment_time_necro DESC); + +CREATE INDEX idx_comment_aggregates_hot ON public.comment_aggregates USING btree (hot_rank((score)::numeric, published) DESC, published DESC); + +CREATE INDEX idx_community_aggregates_hot ON public.community_aggregates USING btree (hot_rank((subscribers)::numeric, published) DESC, published DESC); diff --git a/migrations/2023-06-07-105918_add_hot_rank_columns/up.sql b/migrations/2023-06-07-105918_add_hot_rank_columns/up.sql new file mode 100644 index 000000000..92f03e5f3 --- /dev/null +++ b/migrations/2023-06-07-105918_add_hot_rank_columns/up.sql @@ -0,0 +1,51 @@ +-- This converts the old hot_rank functions, to columns + +-- Remove the old compound indexes +DROP INDEX idx_post_aggregates_featured_local_newest_comment_time; +DROP INDEX idx_post_aggregates_featured_community_newest_comment_time; +DROP INDEX idx_post_aggregates_featured_local_comments; +DROP INDEX idx_post_aggregates_featured_community_comments; +DROP INDEX idx_post_aggregates_featured_local_hot; +DROP INDEX idx_post_aggregates_featured_community_hot; +DROP INDEX idx_post_aggregates_featured_local_score; +DROP INDEX idx_post_aggregates_featured_community_score; +DROP INDEX idx_post_aggregates_featured_local_published; +DROP INDEX idx_post_aggregates_featured_community_published; +DROP INDEX idx_post_aggregates_featured_local_active; +DROP INDEX idx_post_aggregates_featured_community_active; + +DROP INDEX idx_comment_aggregates_hot; + +DROP INDEX idx_community_aggregates_hot; + +-- Add the new hot rank columns for post and comment aggregates +-- Note: 1728 is the result of the hot_rank function, with a score of 1, posted now +-- hot_rank = 10000*log10(1 + 3)/Power(2, 1.8) +alter table post_aggregates add column hot_rank integer not null default 1728; +alter table post_aggregates add column hot_rank_active integer not null default 1728; + +alter table comment_aggregates add column hot_rank integer not null default 1728; + +alter table community_aggregates add column hot_rank integer not null default 1728; + +-- Populate them initially +-- Note: After initial population, these are updated in a periodic scheduled job, +-- with only the last week being updated. +update post_aggregates set hot_rank_active = hot_rank(score::numeric, newest_comment_time_necro); +update post_aggregates set hot_rank = hot_rank(score::numeric, published); +update comment_aggregates set hot_rank = hot_rank(score::numeric, published); +update community_aggregates set hot_rank = hot_rank(subscribers::numeric, published); + +-- Create single column indexes +create index idx_post_aggregates_score on post_aggregates (score desc); +create index idx_post_aggregates_published on post_aggregates (published desc); +create index idx_post_aggregates_newest_comment_time on post_aggregates (newest_comment_time desc); +create index idx_post_aggregates_newest_comment_time_necro on post_aggregates (newest_comment_time_necro desc); +create index idx_post_aggregates_featured_community on post_aggregates (featured_community desc); +create index idx_post_aggregates_featured_local on post_aggregates (featured_local desc); +create index idx_post_aggregates_hot on post_aggregates (hot_rank desc); +create index idx_post_aggregates_active on post_aggregates (hot_rank_active desc); + +create index idx_comment_aggregates_hot on comment_aggregates (hot_rank desc); + +create index idx_community_aggregates_hot on community_aggregates (hot_rank desc); diff --git a/scripts/query_testing/post_query_hot_rank.sh b/scripts/query_testing/post_query_hot_rank.sh new file mode 100755 index 000000000..dad746065 --- /dev/null +++ b/scripts/query_testing/post_query_hot_rank.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +sudo docker exec -i docker-postgres-1 psql -Ulemmy -c "EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS, FORMAT JSON) SELECT post.id, post.name, post.url, post.body, post.creator_id, post.community_id, post.removed, post.locked, post.published, post.updated, post.deleted, post.nsfw, post.embed_title, post.embed_description, post.embed_video_url, post.thumbnail_url, post.ap_id, post.local, post.language_id, post.featured_community, post.featured_local, person.id, person.name, person.display_name, person.avatar, person.banned, person.published, person.updated, person.actor_id, person.bio, person.local, person.banner, person.deleted, person.inbox_url, person.shared_inbox_url, person.matrix_user_id, person.admin, person.bot_account, person.ban_expires, person.instance_id, community.id, community.name, community.title, community.description, community.removed, community.published, community.updated, community.deleted, community.nsfw, community.actor_id, community.local, community.icon, community.banner, community.hidden, community.posting_restricted_to_mods, community.instance_id, community_person_ban.id, community_person_ban.community_id, community_person_ban.person_id, community_person_ban.published, community_person_ban.expires, post_aggregates.id, post_aggregates.post_id, post_aggregates.comments, post_aggregates.score, post_aggregates.upvotes, post_aggregates.downvotes, post_aggregates.published, post_aggregates.newest_comment_time_necro, post_aggregates.newest_comment_time, post_aggregates.featured_community, post_aggregates.featured_local, community_follower.id, community_follower.community_id, community_follower.person_id, community_follower.published, community_follower.pending, post_saved.id, post_saved.post_id, post_saved.person_id, post_saved.published, post_read.id, post_read.post_id, post_read.person_id, post_read.published, person_block.id, person_block.person_id, person_block.target_id, person_block.published, post_like.score, coalesce((post_aggregates.comments - person_post_aggregates.read_comments), post_aggregates.comments) FROM ((((((((((((post INNER JOIN person ON (post.creator_id = person.id)) INNER JOIN community ON (post.community_id = community.id)) LEFT OUTER JOIN community_person_ban ON (((post.community_id = community_person_ban.community_id) AND (community_person_ban.person_id = post.creator_id)) AND ((community_person_ban.expires IS NULL) OR (community_person_ban.expires > CURRENT_TIMESTAMP)))) INNER JOIN post_aggregates ON (post_aggregates.post_id = post.id)) LEFT OUTER JOIN community_follower ON ((post.community_id = community_follower.community_id) AND (community_follower.person_id = '33517'))) LEFT OUTER JOIN post_saved ON ((post.id = post_saved.post_id) AND (post_saved.person_id = '33517'))) LEFT OUTER JOIN post_read ON ((post.id = post_read.post_id) AND (post_read.person_id = '33517'))) LEFT OUTER JOIN person_block ON ((post.creator_id = person_block.target_id) AND (person_block.person_id = '33517'))) LEFT OUTER JOIN community_block ON ((community.id = community_block.community_id) AND (community_block.person_id = '33517'))) LEFT OUTER JOIN post_like ON ((post.id = post_like.post_id) AND (post_like.person_id = '33517'))) LEFT OUTER JOIN person_post_aggregates ON ((post.id = person_post_aggregates.post_id) AND (person_post_aggregates.person_id = '33517'))) LEFT OUTER JOIN local_user_language ON ((post.language_id = local_user_language.language_id) AND (local_user_language.local_user_id = '11402'))) WHERE ((((((((((community_follower.person_id IS NOT NULL) AND (post.nsfw = 'f')) AND (community.nsfw = 'f')) AND (local_user_language.language_id IS NOT NULL)) AND (community_block.person_id IS NULL)) AND (person_block.person_id IS NULL)) AND (post.removed = 'f')) AND (post.deleted = 'f')) AND (community.removed = 'f')) AND (community.deleted = 'f')) ORDER BY post_aggregates.featured_local DESC , post_aggregates.hot_rank DESC LIMIT '40' OFFSET '0';" > query_results.json diff --git a/src/scheduled_tasks.rs b/src/scheduled_tasks.rs index 578549110..a255f74c2 100644 --- a/src/scheduled_tasks.rs +++ b/src/scheduled_tasks.rs @@ -1,7 +1,11 @@ use clokwerk::{Scheduler, TimeUnits}; +use diesel::{dsl::now, Connection, ExpressionMethods, QueryDsl}; // Import week days and WeekDay use diesel::{sql_query, PgConnection, RunQueryDsl}; -use diesel::{Connection, ExpressionMethods, QueryDsl}; +use lemmy_db_schema::{ + schema::{comment_aggregates, community_aggregates, post_aggregates}, + utils::functions::hot_rank, +}; use lemmy_utils::error::LemmyError; use std::{thread, time::Duration}; use tracing::info; @@ -11,26 +15,26 @@ pub fn setup(db_url: String) -> Result<(), LemmyError> { // Setup the connections let mut scheduler = Scheduler::new(); - let mut conn = PgConnection::establish(&db_url).expect("could not establish connection"); + let mut conn_1 = PgConnection::establish(&db_url).expect("could not establish connection"); + let mut conn_2 = PgConnection::establish(&db_url).expect("could not establish connection"); + let mut conn_3 = PgConnection::establish(&db_url).expect("could not establish connection"); + let mut conn_4 = PgConnection::establish(&db_url).expect("could not establish connection"); - active_counts(&mut conn); - update_banned_when_expired(&mut conn); + active_counts(&mut conn_1); + update_banned_when_expired(&mut conn_1); + update_hot_ranks(&mut conn_1, false); + clear_old_activities(&mut conn_1); - // On startup, reindex the tables non-concurrently - // TODO remove this for now, since it slows down startup a lot on lemmy.ml - reindex_aggregates_tables(&mut conn, true); scheduler.every(1.hour()).run(move || { - let conn = &mut PgConnection::establish(&db_url) - .unwrap_or_else(|_| panic!("Error connecting to {db_url}")); - active_counts(conn); - update_banned_when_expired(conn); - reindex_aggregates_tables(conn, true); - drop_ccnew_indexes(conn); + active_counts(&mut conn_2); + update_banned_when_expired(&mut conn_2); }); - - clear_old_activities(&mut conn); - scheduler.every(1.weeks()).run(move || { - clear_old_activities(&mut conn); + // Clear old activities every week + scheduler.every(TimeUnits::weeks(1)).run(move || { + clear_old_activities(&mut conn_3); + }); + scheduler.every(TimeUnits::minutes(5)).run(move || { + update_hot_ranks(&mut conn_4, true); }); // Manually run the scheduler in an event loop @@ -40,30 +44,56 @@ pub fn setup(db_url: String) -> Result<(), LemmyError> { } } -/// Reindex the aggregates tables every one hour -/// This is necessary because hot_rank is actually a mutable function: -/// https://dba.stackexchange.com/questions/284052/how-to-create-an-index-based-on-a-time-based-function-in-postgres?noredirect=1#comment555727_284052 -fn reindex_aggregates_tables(conn: &mut PgConnection, concurrently: bool) { - for table_name in &[ - "post_aggregates", - "comment_aggregates", - "community_aggregates", - ] { - reindex_table(conn, table_name, concurrently); - } -} +/// Update the hot_rank columns for the aggregates tables +fn update_hot_ranks(conn: &mut PgConnection, last_week_only: bool) { + let mut post_update = diesel::update(post_aggregates::table).into_boxed(); + let mut comment_update = diesel::update(comment_aggregates::table).into_boxed(); + let mut community_update = diesel::update(community_aggregates::table).into_boxed(); -fn reindex_table(conn: &mut PgConnection, table_name: &str, concurrently: bool) { - let concurrently_str = if concurrently { "concurrently" } else { "" }; - info!("Reindexing table {} {} ...", concurrently_str, table_name); - let query = format!("reindex table {concurrently_str} {table_name}"); - sql_query(query).execute(conn).expect("reindex table"); + // Only update for the last week of content + if last_week_only { + info!("Updating hot ranks for last week..."); + let last_week = now - diesel::dsl::IntervalDsl::weeks(1); + + post_update = post_update.filter(post_aggregates::published.gt(last_week)); + comment_update = comment_update.filter(comment_aggregates::published.gt(last_week)); + community_update = community_update.filter(community_aggregates::published.gt(last_week)); + } else { + info!("Updating hot ranks for all history..."); + } + + post_update + .set(( + post_aggregates::hot_rank.eq(hot_rank(post_aggregates::score, post_aggregates::published)), + post_aggregates::hot_rank_active.eq(hot_rank( + post_aggregates::score, + post_aggregates::newest_comment_time_necro, + )), + )) + .execute(conn) + .expect("update post_aggregate hot_ranks"); + + comment_update + .set(comment_aggregates::hot_rank.eq(hot_rank( + comment_aggregates::score, + comment_aggregates::published, + ))) + .execute(conn) + .expect("update comment_aggregate hot_ranks"); + + community_update + .set(community_aggregates::hot_rank.eq(hot_rank( + community_aggregates::subscribers, + community_aggregates::published, + ))) + .execute(conn) + .expect("update community_aggregate hot_ranks"); info!("Done."); } /// Clear old activities (this table gets very large) fn clear_old_activities(conn: &mut PgConnection) { - use diesel::dsl::{now, IntervalDsl}; + use diesel::dsl::IntervalDsl; use lemmy_db_schema::schema::activity::dsl::{activity, published}; info!("Clearing old activities..."); diesel::delete(activity.filter(published.lt(now - 6.months()))) @@ -110,13 +140,3 @@ fn update_banned_when_expired(conn: &mut PgConnection) { .execute(conn) .expect("update banned when expires"); } - -/// Drops the phantom CCNEW indexes created by postgres -/// https://github.com/LemmyNet/lemmy/issues/2431 -fn drop_ccnew_indexes(conn: &mut PgConnection) { - info!("Dropping phantom ccnew indexes..."); - let drop_stmt = "select drop_ccnew_indexes()"; - sql_query(drop_stmt) - .execute(conn) - .expect("drop ccnew indexes"); -}