From 88703a909c6cbe518d8a6a2b01823f89b8c9e074 Mon Sep 17 00:00:00 2001 From: Felix Ableitner Date: Tue, 10 Sep 2024 14:25:10 +0200 Subject: [PATCH 1/8] Avoid stack overflow when fetching deeply nested comments --- Cargo.lock | 3 +-- Cargo.toml | 2 +- crates/apub/src/objects/comment.rs | 22 ++++++++++++++++------ crates/apub/src/protocol/objects/note.rs | 4 ++-- 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5dcdd045c..a41b48b94 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,8 +17,7 @@ checksum = "8f27d075294830fcab6f66e320dab524bc6d048f4a151698e153205559113772" [[package]] name = "activitypub_federation" version = "0.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b86eea7a032da501fe07b04a83c10716ea732c45e6943d7f045bc053aca03f2a" +source = "git+https://github.com/LemmyNet/activitypub-federation-rust.git?branch=nested-comments-stack-overflow#4b94af9ea0580226aaa10a4abb004d0b8ca10eae" dependencies = [ "activitystreams-kinds", "actix-web", diff --git a/Cargo.toml b/Cargo.toml index 38d4adc5f..5c74e7cad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -100,7 +100,7 @@ lemmy_db_views = { version = "=0.19.6-beta.7", path = "./crates/db_views" } lemmy_db_views_actor = { version = "=0.19.6-beta.7", path = "./crates/db_views_actor" } lemmy_db_views_moderator = { version = "=0.19.6-beta.7", path = "./crates/db_views_moderator" } lemmy_federate = { version = "=0.19.6-beta.7", path = "./crates/federate" } -activitypub_federation = { version = "0.5.8", default-features = false, features = [ +activitypub_federation = { git = "https://github.com/LemmyNet/activitypub-federation-rust.git", branch = "nested-comments-stack-overflow", default-features = false, features = [ "actix-web", ] } diesel = "2.1.6" diff --git a/crates/apub/src/objects/comment.rs b/crates/apub/src/objects/comment.rs index 466094b7f..c28da5dae 100644 --- a/crates/apub/src/objects/comment.rs +++ b/crates/apub/src/objects/comment.rs @@ -145,15 +145,25 @@ impl Object for ApubComment { verify_domains_match(note.id.inner(), expected_domain)?; verify_domains_match(note.attributed_to.inner(), note.id.inner())?; verify_is_public(¬e.to, ¬e.cc)?; - let community = note.community(context).await?; + let community = Box::pin(note.community(context)).await?; - check_apub_id_valid_with_strictness(note.id.inner(), community.local, context).await?; + Box::pin(check_apub_id_valid_with_strictness( + note.id.inner(), + community.local, + context, + )) + .await?; verify_is_remote_object(¬e.id, context)?; - verify_person_in_community(¬e.attributed_to, &community, context).await?; + Box::pin(verify_person_in_community( + ¬e.attributed_to, + &community, + context, + )) + .await?; - let (post, _) = note.get_parents(context).await?; - let creator = note.attributed_to.dereference(context).await?; - let is_mod_or_admin = is_mod_or_admin(&mut context.pool(), &creator, community.id) + let (post, _) = Box::pin(note.get_parents(context)).await?; + let creator = Box::pin(note.attributed_to.dereference(context)).await?; + let is_mod_or_admin = Box::pin(is_mod_or_admin(&mut context.pool(), &creator, community.id)) .await .is_ok(); if post.locked && !is_mod_or_admin { diff --git a/crates/apub/src/protocol/objects/note.rs b/crates/apub/src/protocol/objects/note.rs index b0ae00037..f22de5972 100644 --- a/crates/apub/src/protocol/objects/note.rs +++ b/crates/apub/src/protocol/objects/note.rs @@ -59,8 +59,8 @@ impl Note { context: &Data, ) -> LemmyResult<(ApubPost, Option)> { // Fetch parent comment chain in a box, otherwise it can cause a stack overflow. - let parent = Box::pin(self.in_reply_to.dereference(context).await?); - match parent.deref() { + let parent = self.in_reply_to.dereference(context).await?; + match parent { PostOrComment::Post(p) => Ok((p.clone(), None)), PostOrComment::Comment(c) => { let post_id = c.post_id; From 3f8c9981701adc853f54ce17fc9304d842a91e25 Mon Sep 17 00:00:00 2001 From: Felix Ableitner Date: Tue, 10 Sep 2024 16:44:37 +0200 Subject: [PATCH 2/8] add test case --- api_tests/src/comment.spec.ts | 26 ++++++++++++++++++++++++ crates/apub/src/objects/comment.rs | 4 +++- crates/apub/src/protocol/objects/note.rs | 1 - 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/api_tests/src/comment.spec.ts b/api_tests/src/comment.spec.ts index 8c3a23ab5..76afbc006 100644 --- a/api_tests/src/comment.spec.ts +++ b/api_tests/src/comment.spec.ts @@ -858,3 +858,29 @@ test("Dont send a comment reply to a blocked community", async () => { blockRes = await blockCommunity(beta, newCommunityId, false); expect(blockRes.blocked).toBe(false); }); + +/// Fetching a deeply nested comment can lead to stack overflow as all parent comments are also +/// fetched recursively. Ensure that it works properly. +test("Fetch a deeply nested comment", async () => { + let lastComment; + for (let i = 0; i < 100; i++) { + let parent_id = lastComment?.comment_view.comment.id; + + let commentRes = await createComment( + alpha, + postOnAlphaRes.post_view.post.id, + parent_id, + ); + expect(commentRes.comment_view.comment).toBeDefined(); + lastComment = commentRes; + } + + let betaComment = await resolveComment( + beta, + lastComment!.comment_view.comment, + ); + + console.log(betaComment!.comment!.comment.path); + expect(betaComment!.comment!.comment).toBeDefined(); + expect(betaComment?.comment?.post).toBeDefined(); +}); diff --git a/crates/apub/src/objects/comment.rs b/crates/apub/src/objects/comment.rs index c28da5dae..0a1e8d9f9 100644 --- a/crates/apub/src/objects/comment.rs +++ b/crates/apub/src/objects/comment.rs @@ -136,6 +136,8 @@ impl Object for ApubComment { Ok(note) } + /// Recursively fetches all parent comments. This can lead to a stack overflow so we need to + /// Box::pin all large futures on the heap. #[tracing::instrument(skip_all)] async fn verify( note: &Note, @@ -163,7 +165,7 @@ impl Object for ApubComment { let (post, _) = Box::pin(note.get_parents(context)).await?; let creator = Box::pin(note.attributed_to.dereference(context)).await?; - let is_mod_or_admin = Box::pin(is_mod_or_admin(&mut context.pool(), &creator, community.id)) + let is_mod_or_admin = is_mod_or_admin(&mut context.pool(), &creator, community.id) .await .is_ok(); if post.locked && !is_mod_or_admin { diff --git a/crates/apub/src/protocol/objects/note.rs b/crates/apub/src/protocol/objects/note.rs index f22de5972..3be80e481 100644 --- a/crates/apub/src/protocol/objects/note.rs +++ b/crates/apub/src/protocol/objects/note.rs @@ -23,7 +23,6 @@ use lemmy_db_schema::{ use lemmy_utils::{error::LemmyResult, LemmyErrorType}; use serde::{Deserialize, Serialize}; use serde_with::skip_serializing_none; -use std::ops::Deref; use url::Url; #[skip_serializing_none] From e0687bd58582162b420cc6c9fb5df5344063a033 Mon Sep 17 00:00:00 2001 From: Felix Ableitner Date: Tue, 10 Sep 2024 17:20:12 +0200 Subject: [PATCH 3/8] reduce comment depth, add docs --- Cargo.lock | 3 ++- Cargo.toml | 2 +- api_tests/src/comment.spec.ts | 11 +++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a41b48b94..5dcdd045c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,7 +17,8 @@ checksum = "8f27d075294830fcab6f66e320dab524bc6d048f4a151698e153205559113772" [[package]] name = "activitypub_federation" version = "0.5.8" -source = "git+https://github.com/LemmyNet/activitypub-federation-rust.git?branch=nested-comments-stack-overflow#4b94af9ea0580226aaa10a4abb004d0b8ca10eae" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b86eea7a032da501fe07b04a83c10716ea732c45e6943d7f045bc053aca03f2a" dependencies = [ "activitystreams-kinds", "actix-web", diff --git a/Cargo.toml b/Cargo.toml index 5c74e7cad..38d4adc5f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -100,7 +100,7 @@ lemmy_db_views = { version = "=0.19.6-beta.7", path = "./crates/db_views" } lemmy_db_views_actor = { version = "=0.19.6-beta.7", path = "./crates/db_views_actor" } lemmy_db_views_moderator = { version = "=0.19.6-beta.7", path = "./crates/db_views_moderator" } lemmy_federate = { version = "=0.19.6-beta.7", path = "./crates/federate" } -activitypub_federation = { git = "https://github.com/LemmyNet/activitypub-federation-rust.git", branch = "nested-comments-stack-overflow", default-features = false, features = [ +activitypub_federation = { version = "0.5.8", default-features = false, features = [ "actix-web", ] } diesel = "2.1.6" diff --git a/api_tests/src/comment.spec.ts b/api_tests/src/comment.spec.ts index 76afbc006..a3e796414 100644 --- a/api_tests/src/comment.spec.ts +++ b/api_tests/src/comment.spec.ts @@ -860,16 +860,16 @@ test("Dont send a comment reply to a blocked community", async () => { }); /// Fetching a deeply nested comment can lead to stack overflow as all parent comments are also -/// fetched recursively. Ensure that it works properly. +/// fetched recursively. Ensure that it works properly. Stack size depends on the OS so this +/// may fail in some cases. In release builds Lemmy is more optimized and can handle the max depth +/// of 100. test("Fetch a deeply nested comment", async () => { let lastComment; - for (let i = 0; i < 100; i++) { - let parent_id = lastComment?.comment_view.comment.id; - + for (let i = 0; i < 80; i++) { let commentRes = await createComment( alpha, postOnAlphaRes.post_view.post.id, - parent_id, + lastComment?.comment_view.comment.id, ); expect(commentRes.comment_view.comment).toBeDefined(); lastComment = commentRes; @@ -880,7 +880,6 @@ test("Fetch a deeply nested comment", async () => { lastComment!.comment_view.comment, ); - console.log(betaComment!.comment!.comment.path); expect(betaComment!.comment!.comment).toBeDefined(); expect(betaComment?.comment?.post).toBeDefined(); }); From 5a4d348bcb173638872e04534ab443a2ca3b82e5 Mon Sep 17 00:00:00 2001 From: Felix Ableitner Date: Tue, 10 Sep 2024 18:05:35 +0200 Subject: [PATCH 4/8] decrease --- api_tests/src/comment.spec.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api_tests/src/comment.spec.ts b/api_tests/src/comment.spec.ts index a3e796414..77b9c3a20 100644 --- a/api_tests/src/comment.spec.ts +++ b/api_tests/src/comment.spec.ts @@ -865,7 +865,7 @@ test("Dont send a comment reply to a blocked community", async () => { /// of 100. test("Fetch a deeply nested comment", async () => { let lastComment; - for (let i = 0; i < 80; i++) { + for (let i = 0; i < 50; i++) { let commentRes = await createComment( alpha, postOnAlphaRes.post_view.post.id, From c2d813f3ec755765df7983a77617c115b6fc3dbb Mon Sep 17 00:00:00 2001 From: Felix Ableitner Date: Wed, 11 Sep 2024 13:24:54 +0200 Subject: [PATCH 5/8] reduce max comment depth to 50 --- Cargo.lock | 3 +-- Cargo.toml | 2 +- api_tests/src/comment.spec.ts | 4 +--- crates/api_crud/src/comment/create.rs | 4 +--- crates/apub/src/protocol/objects/note.rs | 17 ++++++++++++++--- crates/utils/src/lib.rs | 2 ++ 6 files changed, 20 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5dcdd045c..a41b48b94 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,8 +17,7 @@ checksum = "8f27d075294830fcab6f66e320dab524bc6d048f4a151698e153205559113772" [[package]] name = "activitypub_federation" version = "0.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b86eea7a032da501fe07b04a83c10716ea732c45e6943d7f045bc053aca03f2a" +source = "git+https://github.com/LemmyNet/activitypub-federation-rust.git?branch=nested-comments-stack-overflow#4b94af9ea0580226aaa10a4abb004d0b8ca10eae" dependencies = [ "activitystreams-kinds", "actix-web", diff --git a/Cargo.toml b/Cargo.toml index 38d4adc5f..5c74e7cad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -100,7 +100,7 @@ lemmy_db_views = { version = "=0.19.6-beta.7", path = "./crates/db_views" } lemmy_db_views_actor = { version = "=0.19.6-beta.7", path = "./crates/db_views_actor" } lemmy_db_views_moderator = { version = "=0.19.6-beta.7", path = "./crates/db_views_moderator" } lemmy_federate = { version = "=0.19.6-beta.7", path = "./crates/federate" } -activitypub_federation = { version = "0.5.8", default-features = false, features = [ +activitypub_federation = { git = "https://github.com/LemmyNet/activitypub-federation-rust.git", branch = "nested-comments-stack-overflow", default-features = false, features = [ "actix-web", ] } diesel = "2.1.6" diff --git a/api_tests/src/comment.spec.ts b/api_tests/src/comment.spec.ts index 77b9c3a20..54c3c8bc4 100644 --- a/api_tests/src/comment.spec.ts +++ b/api_tests/src/comment.spec.ts @@ -860,9 +860,7 @@ test("Dont send a comment reply to a blocked community", async () => { }); /// Fetching a deeply nested comment can lead to stack overflow as all parent comments are also -/// fetched recursively. Ensure that it works properly. Stack size depends on the OS so this -/// may fail in some cases. In release builds Lemmy is more optimized and can handle the max depth -/// of 100. +/// fetched recursively. Ensure that it works properly. test("Fetch a deeply nested comment", async () => { let lastComment; for (let i = 0; i < 50; i++) { diff --git a/crates/api_crud/src/comment/create.rs b/crates/api_crud/src/comment/create.rs index 49de9d5de..b6bd2ccc3 100644 --- a/crates/api_crud/src/comment/create.rs +++ b/crates/api_crud/src/comment/create.rs @@ -29,11 +29,9 @@ use lemmy_db_schema::{ use lemmy_db_views::structs::{LocalUserView, PostView}; use lemmy_utils::{ error::{LemmyErrorExt, LemmyErrorType, LemmyResult}, - utils::{mention::scrape_text_for_mentions, validation::is_valid_body_field}, + utils::{mention::scrape_text_for_mentions, validation::is_valid_body_field}, MAX_COMMENT_DEPTH_LIMIT, }; -const MAX_COMMENT_DEPTH_LIMIT: usize = 100; - #[tracing::instrument(skip(context))] pub async fn create_comment( data: Json, diff --git a/crates/apub/src/protocol/objects/note.rs b/crates/apub/src/protocol/objects/note.rs index 3be80e481..f823b0496 100644 --- a/crates/apub/src/protocol/objects/note.rs +++ b/crates/apub/src/protocol/objects/note.rs @@ -20,7 +20,7 @@ use lemmy_db_schema::{ source::{community::Community, post::Post}, traits::Crud, }; -use lemmy_utils::{error::LemmyResult, LemmyErrorType}; +use lemmy_utils::{error::{LemmyError, LemmyResult}, LemmyErrorType, MAX_COMMENT_DEPTH_LIMIT}; use serde::{Deserialize, Serialize}; use serde_with::skip_serializing_none; use url::Url; @@ -57,13 +57,24 @@ impl Note { &self, context: &Data, ) -> LemmyResult<(ApubPost, Option)> { - // Fetch parent comment chain in a box, otherwise it can cause a stack overflow. + // We use recursion here to fetch the entire comment chain up to the top-level parent. This is + // necessary because we need to know the post and parent comment in order to insert a new + // comment. However it can also lead to stack overflow when fetching many comments recursively. + // To avoid this we check the request count against max comment depth, which based on testing + // can be handled without risking stack overflow. This is not a perfect solution, because in + // some cases we have to fetch user profiles too, and reach the limit after only 25 comments + // or so. + // A cleaner solution would be converting the recursion into a loop, but that is tricky. + // Use a lower request limit here. Otherwise we can run into stack overflow due to recursion. + if context.request_count() > MAX_COMMENT_DEPTH_LIMIT as u32 { + Err(LemmyErrorType::MaxCommentDepthReached)?; + } let parent = self.in_reply_to.dereference(context).await?; match parent { PostOrComment::Post(p) => Ok((p.clone(), None)), PostOrComment::Comment(c) => { let post_id = c.post_id; - let post = Post::read(&mut context.pool(), post_id) + let post = Box::pin(Post::read(&mut context.pool(), post_id)) .await? .ok_or(LemmyErrorType::CouldntFindPost)?; Ok((post.into(), Some(c.clone()))) diff --git a/crates/utils/src/lib.rs b/crates/utils/src/lib.rs index 5a5e76d2a..7f0691496 100644 --- a/crates/utils/src/lib.rs +++ b/crates/utils/src/lib.rs @@ -29,6 +29,8 @@ pub const CACHE_DURATION_FEDERATION: Duration = Duration::from_secs(60); pub const CACHE_DURATION_API: Duration = Duration::from_secs(1); +pub const MAX_COMMENT_DEPTH_LIMIT: usize = 50; + #[macro_export] macro_rules! location_info { () => { From 6ca4f10b30d607bd87d843fde476fc67f9601386 Mon Sep 17 00:00:00 2001 From: Felix Ableitner Date: Wed, 11 Sep 2024 13:26:33 +0200 Subject: [PATCH 6/8] fmt --- crates/api_crud/src/comment/create.rs | 3 ++- crates/apub/src/protocol/objects/note.rs | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/crates/api_crud/src/comment/create.rs b/crates/api_crud/src/comment/create.rs index b6bd2ccc3..ed9172c7b 100644 --- a/crates/api_crud/src/comment/create.rs +++ b/crates/api_crud/src/comment/create.rs @@ -29,7 +29,8 @@ use lemmy_db_schema::{ use lemmy_db_views::structs::{LocalUserView, PostView}; use lemmy_utils::{ error::{LemmyErrorExt, LemmyErrorType, LemmyResult}, - utils::{mention::scrape_text_for_mentions, validation::is_valid_body_field}, MAX_COMMENT_DEPTH_LIMIT, + utils::{mention::scrape_text_for_mentions, validation::is_valid_body_field}, + MAX_COMMENT_DEPTH_LIMIT, }; #[tracing::instrument(skip(context))] diff --git a/crates/apub/src/protocol/objects/note.rs b/crates/apub/src/protocol/objects/note.rs index f823b0496..0a442187d 100644 --- a/crates/apub/src/protocol/objects/note.rs +++ b/crates/apub/src/protocol/objects/note.rs @@ -20,7 +20,11 @@ use lemmy_db_schema::{ source::{community::Community, post::Post}, traits::Crud, }; -use lemmy_utils::{error::{LemmyError, LemmyResult}, LemmyErrorType, MAX_COMMENT_DEPTH_LIMIT}; +use lemmy_utils::{ + error::{LemmyError, LemmyResult}, + LemmyErrorType, + MAX_COMMENT_DEPTH_LIMIT, +}; use serde::{Deserialize, Serialize}; use serde_with::skip_serializing_none; use url::Url; From 9a2f0a1f78aa1015be117e08b00b0aa746f42d3a Mon Sep 17 00:00:00 2001 From: Felix Ableitner Date: Wed, 11 Sep 2024 13:34:49 +0200 Subject: [PATCH 7/8] clippy --- crates/apub/src/protocol/objects/note.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/crates/apub/src/protocol/objects/note.rs b/crates/apub/src/protocol/objects/note.rs index 0a442187d..8cbe08530 100644 --- a/crates/apub/src/protocol/objects/note.rs +++ b/crates/apub/src/protocol/objects/note.rs @@ -20,11 +20,7 @@ use lemmy_db_schema::{ source::{community::Community, post::Post}, traits::Crud, }; -use lemmy_utils::{ - error::{LemmyError, LemmyResult}, - LemmyErrorType, - MAX_COMMENT_DEPTH_LIMIT, -}; +use lemmy_utils::{error::LemmyResult, LemmyErrorType, MAX_COMMENT_DEPTH_LIMIT}; use serde::{Deserialize, Serialize}; use serde_with::skip_serializing_none; use url::Url; From 8f19ba365eb3e78d80ff1781066fab937b1e62a2 Mon Sep 17 00:00:00 2001 From: Felix Ableitner Date: Mon, 30 Sep 2024 16:11:43 +0200 Subject: [PATCH 8/8] cleanup --- crates/apub/src/objects/comment.rs | 22 +++++----------------- crates/apub/src/protocol/objects/note.rs | 1 - 2 files changed, 5 insertions(+), 18 deletions(-) diff --git a/crates/apub/src/objects/comment.rs b/crates/apub/src/objects/comment.rs index 2658ab305..ad7d84a6f 100644 --- a/crates/apub/src/objects/comment.rs +++ b/crates/apub/src/objects/comment.rs @@ -128,8 +128,6 @@ impl Object for ApubComment { Ok(note) } - /// Recursively fetches all parent comments. This can lead to a stack overflow so we need to - /// Box::pin all large futures on the heap. #[tracing::instrument(skip_all)] async fn verify( note: &Note, @@ -139,24 +137,14 @@ impl Object for ApubComment { verify_domains_match(note.id.inner(), expected_domain)?; verify_domains_match(note.attributed_to.inner(), note.id.inner())?; verify_is_public(¬e.to, ¬e.cc)?; - let community = Box::pin(note.community(context)).await?; + let community = note.community(context).await?; - Box::pin(check_apub_id_valid_with_strictness( - note.id.inner(), - community.local, - context, - )) - .await?; + check_apub_id_valid_with_strictness(note.id.inner(), community.local, context).await?; verify_is_remote_object(¬e.id, context)?; - Box::pin(verify_person_in_community( - ¬e.attributed_to, - &community, - context, - )) - .await?; + verify_person_in_community(¬e.attributed_to, &community, context).await?; - let (post, _) = Box::pin(note.get_parents(context)).await?; - let creator = Box::pin(note.attributed_to.dereference(context)).await?; + let (post, _) = note.get_parents(context).await?; + let creator = note.attributed_to.dereference(context).await?; let is_mod_or_admin = is_mod_or_admin(&mut context.pool(), &creator, community.id) .await .is_ok(); diff --git a/crates/apub/src/protocol/objects/note.rs b/crates/apub/src/protocol/objects/note.rs index 2f97c6abc..e3e204254 100644 --- a/crates/apub/src/protocol/objects/note.rs +++ b/crates/apub/src/protocol/objects/note.rs @@ -65,7 +65,6 @@ impl Note { // some cases we have to fetch user profiles too, and reach the limit after only 25 comments // or so. // A cleaner solution would be converting the recursion into a loop, but that is tricky. - // Use a lower request limit here. Otherwise we can run into stack overflow due to recursion. if context.request_count() > MAX_COMMENT_DEPTH_LIMIT as u32 { Err(LemmyErrorType::MaxCommentDepthReached)?; }