mirror of
https://github.com/LemmyNet/lemmy.git
synced 2024-10-01 01:36:12 -04:00
rewrite markdown links (fixes #2987)
This commit is contained in:
parent
b803504f09
commit
e8e105dd29
@ -76,7 +76,7 @@ impl LemmyContext {
|
||||
.app_data(context)
|
||||
.debug(true)
|
||||
// Dont allow any network fetches
|
||||
.http_fetch_limit(0)
|
||||
.http_fetch_limit(10)
|
||||
.build()
|
||||
.await
|
||||
.expect("build federation config")
|
||||
|
@ -50,7 +50,7 @@ use lemmy_utils::{
|
||||
rate_limit::{ActionType, BucketConfig},
|
||||
settings::structs::{PictrsImageMode, Settings},
|
||||
utils::{
|
||||
markdown::{markdown_check_for_blocked_urls, markdown_rewrite_image_links},
|
||||
markdown::{image_links::markdown_rewrite_image_links, markdown_check_for_blocked_urls},
|
||||
slurs::{build_slur_regex, remove_slurs},
|
||||
validation::clean_urls_in_text,
|
||||
},
|
||||
|
@ -26,6 +26,7 @@ pub mod fetcher;
|
||||
pub mod http;
|
||||
pub(crate) mod mentions;
|
||||
pub mod objects;
|
||||
mod post_links;
|
||||
pub mod protocol;
|
||||
|
||||
/// Maximum number of outgoing HTTP requests to fetch a single object. Needs to be high enough
|
||||
|
@ -3,6 +3,7 @@ use crate::{
|
||||
check_apub_id_valid_with_strictness,
|
||||
local_site_data_cached,
|
||||
objects::{read_from_string_or_source_opt, verify_is_remote_object},
|
||||
post_links::markdown_rewrite_remote_post_links_opt,
|
||||
protocol::{
|
||||
objects::{
|
||||
page::{Attachment, AttributedTo, Hashtag, HashtagType, Page, PageType},
|
||||
@ -237,6 +238,7 @@ impl Object for ApubPost {
|
||||
|
||||
let body = read_from_string_or_source_opt(&page.content, &page.media_type, &page.source);
|
||||
let body = process_markdown_opt(&body, slur_regex, &url_blocklist, context).await?;
|
||||
let body = markdown_rewrite_remote_post_links_opt(body, context).await;
|
||||
let language_id =
|
||||
LanguageTag::to_language_id_single(page.language, &mut context.pool()).await?;
|
||||
|
||||
|
@ -1,46 +1,17 @@
|
||||
use super::MARKDOWN_PARSER;
|
||||
use super::{link_rule::Link, MARKDOWN_PARSER};
|
||||
use crate::settings::SETTINGS;
|
||||
use markdown_it::plugins::cmark::inline::image::Image;
|
||||
use markdown_it::{plugins::cmark::inline::image::Image, NodeValue};
|
||||
use url::Url;
|
||||
use urlencoding::encode;
|
||||
|
||||
/// Rewrites all links to remote domains in markdown, so they go through `/api/v3/image_proxy`.
|
||||
pub fn markdown_rewrite_image_links(mut src: String) -> (String, Vec<Url>) {
|
||||
let ast = MARKDOWN_PARSER.parse(&src);
|
||||
let mut links_offsets = vec![];
|
||||
|
||||
// Walk the syntax tree to find positions of image links
|
||||
ast.walk(|node, _depth| {
|
||||
if let Some(image) = node.cast::<Image>() {
|
||||
// srcmap is always present for image
|
||||
// https://github.com/markdown-it-rust/markdown-it/issues/36#issuecomment-1777844387
|
||||
let node_offsets = node.srcmap.expect("srcmap is none").get_byte_offsets();
|
||||
// necessary for custom emojis which look like `![name](url "title")`
|
||||
let start_offset = node_offsets.1
|
||||
- image.url.len()
|
||||
- 1
|
||||
- image
|
||||
.title
|
||||
.as_ref()
|
||||
.map(|t| t.len() + 3)
|
||||
.unwrap_or_default();
|
||||
let end_offset = node_offsets.1 - 1;
|
||||
|
||||
links_offsets.push((start_offset, end_offset));
|
||||
}
|
||||
});
|
||||
let links_offsets = find_urls::<Image>(&src);
|
||||
|
||||
let mut links = vec![];
|
||||
// Go through the collected links in reverse order
|
||||
while let Some((start, end)) = links_offsets.pop() {
|
||||
let content = src.get(start..end).unwrap_or_default();
|
||||
// necessary for custom emojis which look like `![name](url "title")`
|
||||
let (url, extra) = if content.contains(' ') {
|
||||
let split = content.split_once(' ').expect("split is valid");
|
||||
(split.0, Some(split.1))
|
||||
} else {
|
||||
(content, None)
|
||||
};
|
||||
for (start, end) in links_offsets.into_iter().rev() {
|
||||
let (url, extra) = markdown_handle_title(&src, start, end);
|
||||
match Url::parse(url) {
|
||||
Ok(parsed) => {
|
||||
links.push(parsed.clone());
|
||||
@ -68,6 +39,61 @@ pub fn markdown_rewrite_image_links(mut src: String) -> (String, Vec<Url>) {
|
||||
(src, links)
|
||||
}
|
||||
|
||||
pub fn markdown_handle_title(src: &String, start: usize, end: usize) -> (&str, Option<&str>) {
|
||||
let content = src.get(start..end).unwrap_or_default();
|
||||
// necessary for custom emojis which look like `![name](url "title")`
|
||||
let (url, extra) = if content.contains(' ') {
|
||||
let split = content.split_once(' ').expect("split is valid");
|
||||
(split.0, Some(split.1))
|
||||
} else {
|
||||
(content, None)
|
||||
};
|
||||
(url, extra)
|
||||
}
|
||||
|
||||
pub fn markdown_find_links(src: &str) -> Vec<(usize, usize)> {
|
||||
find_urls::<Link>(src)
|
||||
}
|
||||
|
||||
// Walk the syntax tree to find positions of image or link urls
|
||||
fn find_urls<T: NodeValue + UrlAndTitle>(src: &str) -> Vec<(usize, usize)> {
|
||||
let ast = MARKDOWN_PARSER.parse(src);
|
||||
let mut links_offsets = vec![];
|
||||
ast.walk(|node, _depth| {
|
||||
if let Some(image) = node.cast::<T>() {
|
||||
let node_offsets = node.srcmap.expect("srcmap is none").get_byte_offsets();
|
||||
let start_offset = node_offsets.1 - image.url_len() - 1 - image.title_len();
|
||||
let end_offset = node_offsets.1 - 1;
|
||||
|
||||
links_offsets.push((start_offset, end_offset));
|
||||
}
|
||||
});
|
||||
links_offsets
|
||||
}
|
||||
|
||||
pub trait UrlAndTitle {
|
||||
fn url_len(&self) -> usize;
|
||||
fn title_len(&self) -> usize;
|
||||
}
|
||||
|
||||
impl UrlAndTitle for Image {
|
||||
fn url_len(&self) -> usize {
|
||||
self.url.len()
|
||||
}
|
||||
|
||||
fn title_len(&self) -> usize {
|
||||
self.title.as_ref().map(|t| t.len() + 3).unwrap_or_default()
|
||||
}
|
||||
}
|
||||
impl UrlAndTitle for Link {
|
||||
fn url_len(&self) -> usize {
|
||||
self.url.len()
|
||||
}
|
||||
fn title_len(&self) -> usize {
|
||||
self.title.as_ref().map(|t| t.len() + 3).unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
@ -75,6 +101,15 @@ mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn test_find_links() {
|
||||
let links = markdown_find_links("[test](https://example.com)");
|
||||
assert_eq!(vec![(7, 26)], links);
|
||||
|
||||
let links = find_urls::<Image>("![test](https://example.com)");
|
||||
assert_eq!(vec![(8, 27)], links);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_markdown_proxy_images() {
|
||||
let tests: Vec<_> =
|
||||
|
@ -3,7 +3,7 @@ use markdown_it::MarkdownIt;
|
||||
use regex::RegexSet;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
mod image_links;
|
||||
pub mod image_links;
|
||||
mod link_rule;
|
||||
mod spoiler_rule;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user