diff --git a/crates/api_common/src/context.rs b/crates/api_common/src/context.rs index 334983b20..bff956937 100644 --- a/crates/api_common/src/context.rs +++ b/crates/api_common/src/context.rs @@ -76,7 +76,7 @@ impl LemmyContext { .app_data(context) .debug(true) // Dont allow any network fetches - .http_fetch_limit(0) + .http_fetch_limit(10) .build() .await .expect("build federation config") diff --git a/crates/api_common/src/utils.rs b/crates/api_common/src/utils.rs index b83b9b582..e4d2e1820 100644 --- a/crates/api_common/src/utils.rs +++ b/crates/api_common/src/utils.rs @@ -50,7 +50,7 @@ use lemmy_utils::{ rate_limit::{ActionType, BucketConfig}, settings::structs::{PictrsImageMode, Settings}, utils::{ - markdown::{markdown_check_for_blocked_urls, markdown_rewrite_image_links}, + markdown::{image_links::markdown_rewrite_image_links, markdown_check_for_blocked_urls}, slurs::{build_slur_regex, remove_slurs}, validation::clean_urls_in_text, }, diff --git a/crates/apub/src/lib.rs b/crates/apub/src/lib.rs index c8506da52..cef100f01 100644 --- a/crates/apub/src/lib.rs +++ b/crates/apub/src/lib.rs @@ -26,6 +26,7 @@ pub mod fetcher; pub mod http; pub(crate) mod mentions; pub mod objects; +mod post_links; pub mod protocol; /// Maximum number of outgoing HTTP requests to fetch a single object. Needs to be high enough diff --git a/crates/apub/src/objects/post.rs b/crates/apub/src/objects/post.rs index dfc9d79f9..0056a105c 100644 --- a/crates/apub/src/objects/post.rs +++ b/crates/apub/src/objects/post.rs @@ -3,6 +3,7 @@ use crate::{ check_apub_id_valid_with_strictness, local_site_data_cached, objects::{read_from_string_or_source_opt, verify_is_remote_object}, + post_links::markdown_rewrite_remote_post_links_opt, protocol::{ objects::{ page::{Attachment, AttributedTo, Hashtag, HashtagType, Page, PageType}, @@ -237,6 +238,7 @@ impl Object for ApubPost { let body = read_from_string_or_source_opt(&page.content, &page.media_type, &page.source); let body = process_markdown_opt(&body, slur_regex, &url_blocklist, context).await?; + let body = markdown_rewrite_remote_post_links_opt(body, context).await; let language_id = LanguageTag::to_language_id_single(page.language, &mut context.pool()).await?; diff --git a/crates/utils/src/utils/markdown/image_links.rs b/crates/utils/src/utils/markdown/image_links.rs index 05ba64f30..656e89f20 100644 --- a/crates/utils/src/utils/markdown/image_links.rs +++ b/crates/utils/src/utils/markdown/image_links.rs @@ -1,46 +1,17 @@ -use super::MARKDOWN_PARSER; +use super::{link_rule::Link, MARKDOWN_PARSER}; use crate::settings::SETTINGS; -use markdown_it::plugins::cmark::inline::image::Image; +use markdown_it::{plugins::cmark::inline::image::Image, NodeValue}; use url::Url; use urlencoding::encode; /// Rewrites all links to remote domains in markdown, so they go through `/api/v3/image_proxy`. pub fn markdown_rewrite_image_links(mut src: String) -> (String, Vec) { - let ast = MARKDOWN_PARSER.parse(&src); - let mut links_offsets = vec![]; - - // Walk the syntax tree to find positions of image links - ast.walk(|node, _depth| { - if let Some(image) = node.cast::() { - // srcmap is always present for image - // https://github.com/markdown-it-rust/markdown-it/issues/36#issuecomment-1777844387 - let node_offsets = node.srcmap.expect("srcmap is none").get_byte_offsets(); - // necessary for custom emojis which look like `![name](url "title")` - let start_offset = node_offsets.1 - - image.url.len() - - 1 - - image - .title - .as_ref() - .map(|t| t.len() + 3) - .unwrap_or_default(); - let end_offset = node_offsets.1 - 1; - - links_offsets.push((start_offset, end_offset)); - } - }); + let links_offsets = find_urls::(&src); let mut links = vec![]; // Go through the collected links in reverse order - while let Some((start, end)) = links_offsets.pop() { - let content = src.get(start..end).unwrap_or_default(); - // necessary for custom emojis which look like `![name](url "title")` - let (url, extra) = if content.contains(' ') { - let split = content.split_once(' ').expect("split is valid"); - (split.0, Some(split.1)) - } else { - (content, None) - }; + for (start, end) in links_offsets.into_iter().rev() { + let (url, extra) = markdown_handle_title(&src, start, end); match Url::parse(url) { Ok(parsed) => { links.push(parsed.clone()); @@ -68,6 +39,61 @@ pub fn markdown_rewrite_image_links(mut src: String) -> (String, Vec) { (src, links) } +pub fn markdown_handle_title(src: &String, start: usize, end: usize) -> (&str, Option<&str>) { + let content = src.get(start..end).unwrap_or_default(); + // necessary for custom emojis which look like `![name](url "title")` + let (url, extra) = if content.contains(' ') { + let split = content.split_once(' ').expect("split is valid"); + (split.0, Some(split.1)) + } else { + (content, None) + }; + (url, extra) +} + +pub fn markdown_find_links(src: &str) -> Vec<(usize, usize)> { + find_urls::(src) +} + +// Walk the syntax tree to find positions of image or link urls +fn find_urls(src: &str) -> Vec<(usize, usize)> { + let ast = MARKDOWN_PARSER.parse(src); + let mut links_offsets = vec![]; + ast.walk(|node, _depth| { + if let Some(image) = node.cast::() { + let node_offsets = node.srcmap.expect("srcmap is none").get_byte_offsets(); + let start_offset = node_offsets.1 - image.url_len() - 1 - image.title_len(); + let end_offset = node_offsets.1 - 1; + + links_offsets.push((start_offset, end_offset)); + } + }); + links_offsets +} + +pub trait UrlAndTitle { + fn url_len(&self) -> usize; + fn title_len(&self) -> usize; +} + +impl UrlAndTitle for Image { + fn url_len(&self) -> usize { + self.url.len() + } + + fn title_len(&self) -> usize { + self.title.as_ref().map(|t| t.len() + 3).unwrap_or_default() + } +} +impl UrlAndTitle for Link { + fn url_len(&self) -> usize { + self.url.len() + } + fn title_len(&self) -> usize { + self.title.as_ref().map(|t| t.len() + 3).unwrap_or_default() + } +} + #[cfg(test)] #[expect(clippy::unwrap_used)] mod tests { @@ -75,6 +101,15 @@ mod tests { use super::*; use pretty_assertions::assert_eq; + #[test] + fn test_find_links() { + let links = markdown_find_links("[test](https://example.com)"); + assert_eq!(vec![(7, 26)], links); + + let links = find_urls::("![test](https://example.com)"); + assert_eq!(vec![(8, 27)], links); + } + #[test] fn test_markdown_proxy_images() { let tests: Vec<_> = diff --git a/crates/utils/src/utils/markdown/mod.rs b/crates/utils/src/utils/markdown/mod.rs index 241052a91..7f21112fc 100644 --- a/crates/utils/src/utils/markdown/mod.rs +++ b/crates/utils/src/utils/markdown/mod.rs @@ -3,7 +3,7 @@ use markdown_it::MarkdownIt; use regex::RegexSet; use std::sync::LazyLock; -mod image_links; +pub mod image_links; mod link_rule; mod spoiler_rule;