This commit is contained in:
Nutomic 2024-09-30 15:01:44 +02:00 committed by GitHub
commit 144e18f136
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 412 additions and 182 deletions

View File

@ -50,7 +50,7 @@ use lemmy_utils::{
rate_limit::{ActionType, BucketConfig},
settings::structs::{PictrsImageMode, Settings},
utils::{
markdown::{markdown_check_for_blocked_urls, markdown_rewrite_image_links},
markdown::{image_links::markdown_rewrite_image_links, markdown_check_for_blocked_urls},
slurs::{build_slur_regex, remove_slurs},
validation::clean_urls_in_text,
},

View File

@ -1,4 +1,5 @@
use crate::fetcher::{
post_or_comment::PostOrComment,
search::{search_query_to_object_id, search_query_to_object_id_local, SearchableObjects},
user_or_community::UserOrCommunity,
};
@ -46,21 +47,22 @@ async fn convert_response(
local_user_view: Option<LocalUserView>,
pool: &mut DbPool<'_>,
) -> LemmyResult<Json<ResolveObjectResponse>> {
use SearchableObjects::*;
let removed_or_deleted;
let mut res = ResolveObjectResponse::default();
let local_user = local_user_view.map(|l| l.local_user);
match object {
Post(p) => {
SearchableObjects::PostOrComment(pc) => match *pc {
PostOrComment::Post(p) => {
removed_or_deleted = p.deleted || p.removed;
res.post = Some(PostView::read(pool, p.id, local_user.as_ref(), false).await?)
}
Comment(c) => {
PostOrComment::Comment(c) => {
removed_or_deleted = c.deleted || c.removed;
res.comment = Some(CommentView::read(pool, c.id, local_user.as_ref()).await?)
}
PersonOrCommunity(p) => match *p {
},
SearchableObjects::PersonOrCommunity(pc) => match *pc {
UserOrCommunity::User(u) => {
removed_or_deleted = u.deleted;
res.person = Some(PersonView::read(pool, u.id).await?)

View File

@ -310,7 +310,7 @@ where
#[cfg(test)]
#[expect(clippy::indexing_slicing)]
mod tests {
pub(crate) mod tests {
use crate::api::user_settings_backup::{export_settings, import_settings, UserSettingsBackup};
use activitypub_federation::config::Data;
@ -332,7 +332,7 @@ mod tests {
use std::time::Duration;
use tokio::time::sleep;
async fn create_user(
pub(crate) async fn create_user(
name: String,
bio: Option<String>,
context: &Data<LemmyContext>,

View File

@ -0,0 +1,184 @@
use super::{search::SearchableObjects, user_or_community::UserOrCommunity};
use crate::fetcher::post_or_comment::PostOrComment;
use activitypub_federation::{config::Data, fetch::object_id::ObjectId};
use lemmy_api_common::{
context::LemmyContext,
utils::{generate_local_apub_endpoint, EndpointType},
};
use lemmy_db_schema::{newtypes::InstanceId, source::instance::Instance};
use lemmy_utils::{
error::LemmyResult,
utils::markdown::image_links::{markdown_find_links, markdown_handle_title},
};
use url::Url;
pub async fn markdown_rewrite_remote_links_opt(
src: Option<String>,
context: &Data<LemmyContext>,
) -> Option<String> {
match src {
Some(t) => Some(markdown_rewrite_remote_links(t, context).await),
None => None,
}
}
/// Goes through all remote markdown links and attempts to resolve them as Activitypub objects.
/// If successful, the link is rewritten to a local link, so it can be viewed without leaving the
/// local instance.
///
/// As it relies on ObjectId::dereference, it can only be used for incoming federated objects, not
/// for the API.
pub async fn markdown_rewrite_remote_links(
mut src: String,
context: &Data<LemmyContext>,
) -> String {
let links_offsets = markdown_find_links(&src);
// Go through the collected links in reverse order
for (start, end) in links_offsets.into_iter().rev() {
let (url, extra) = markdown_handle_title(&src, start, end);
// TODO: needs cleanup
if let Some(local_url) = to_local_url(url, context).await {
let mut local_url = local_url.to_string();
// restore title
if let Some(extra) = extra {
local_url = format!("{local_url} {extra}");
}
src.replace_range(start..end, local_url.as_str());
}
}
src
}
pub(crate) async fn to_local_url(url: &str, context: &Data<LemmyContext>) -> Option<Url> {
let local_domain = &context.settings().get_protocol_and_hostname();
let object_id = ObjectId::<SearchableObjects>::parse(url).ok()?;
if object_id.inner().domain() == Some(local_domain) {
return None;
}
let dereferenced = object_id.dereference(context).await.ok()?;
match dereferenced {
SearchableObjects::PostOrComment(pc) => match *pc {
PostOrComment::Post(post) => {
generate_local_apub_endpoint(EndpointType::Post, &post.id.to_string(), local_domain)
}
PostOrComment::Comment(comment) => {
generate_local_apub_endpoint(EndpointType::Comment, &comment.id.to_string(), local_domain)
}
}
.ok()
.map(Into::into),
SearchableObjects::PersonOrCommunity(pc) => match *pc {
UserOrCommunity::User(user) => {
format_actor_url(&user.name, "u", user.instance_id, context).await
}
UserOrCommunity::Community(community) => {
format_actor_url(&community.name, "c", community.instance_id, context).await
}
}
.ok(),
}
}
async fn format_actor_url(
name: &str,
kind: &str,
instance_id: InstanceId,
context: &LemmyContext,
) -> LemmyResult<Url> {
let local_protocol_and_hostname = context.settings().get_protocol_and_hostname();
let local_hostname = &context.settings().hostname;
let instance = Instance::read(&mut context.pool(), instance_id).await?;
let url = if &instance.domain != local_hostname {
format!(
"{local_protocol_and_hostname}/{kind}/{name}@{}",
instance.domain
)
} else {
format!("{local_protocol_and_hostname}/{kind}/{name}")
};
Ok(Url::parse(&url)?)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::api::user_settings_backup::tests::create_user;
use lemmy_db_schema::{
source::{
community::{Community, CommunityInsertForm},
post::{Post, PostInsertForm},
},
traits::Crud,
};
use pretty_assertions::assert_eq;
#[tokio::test]
async fn test_markdown_rewrite_remote_links() -> LemmyResult<()> {
let context = LemmyContext::init_test_context().await;
let instance = Instance::read_or_create(&mut context.pool(), "example.com".to_string()).await?;
let community = Community::create(
&mut context.pool(),
&CommunityInsertForm::new(
instance.id,
"my_community".to_string(),
"My Community".to_string(),
"pubkey".to_string(),
),
)
.await?;
let user = create_user("john".to_string(), None, &context).await?;
let post_form = PostInsertForm {
..PostInsertForm::new("My post".to_string(), user.person.id, community.id)
};
let post = Post::create(&mut context.pool(), &post_form).await?;
let tests: Vec<_> = vec![
(
"rewrite remote link",
format!("[link]({})", post.ap_id),
"[link](https://lemmy-alpha/post/1)",
),
(
"rewrite community link",
format!("[link]({})", community.actor_id),
"[link](https://lemmy-alpha/c/my_community@example.com)",
),
(
"dont rewrite local post link",
"[link](https://lemmy-alpha/post/2)".to_string(),
"[link](https://lemmy-alpha/post/2)",
),
(
"dont rewrite local community link",
"[link](https://lemmy-alpha/c/test)".to_string(),
"[link](https://lemmy-alpha/c/test)",
),
(
"dont rewrite non-fediverse link",
"[link](https://example.com/)".to_string(),
"[link](https://example.com/)",
),
(
"dont rewrite invalid url",
"[link](example-com)".to_string(),
"[link](example-com)",
),
];
let context = LemmyContext::init_test_context().await;
for (msg, input, expected) in &tests {
let result = markdown_rewrite_remote_links(input.to_string(), &context).await;
assert_eq!(
&result, expected,
"Testing {}, with original input '{}'",
msg, input
);
}
Ok(())
}
}

View File

@ -10,6 +10,7 @@ use lemmy_db_schema::traits::ApubActor;
use lemmy_db_views::structs::LocalUserView;
use lemmy_utils::error::{LemmyError, LemmyResult};
pub(crate) mod markdown_links;
pub mod post_or_comment;
pub mod search;
pub mod site_or_community_or_user;

View File

@ -1,8 +1,5 @@
use crate::{
fetcher::user_or_community::{PersonOrGroup, UserOrCommunity},
objects::{comment::ApubComment, community::ApubCommunity, person::ApubPerson, post::ApubPost},
protocol::objects::{note::Note, page::Page},
};
use super::post_or_comment::{PageOrNote, PostOrComment};
use crate::fetcher::user_or_community::{PersonOrGroup, UserOrCommunity};
use activitypub_federation::{
config::Data,
fetch::{object_id::ObjectId, webfinger::webfinger_resolve_actor},
@ -54,16 +51,14 @@ pub(crate) async fn search_query_to_object_id_local(
/// The types of ActivityPub objects that can be fetched directly by searching for their ID.
#[derive(Debug)]
pub(crate) enum SearchableObjects {
Post(ApubPost),
Comment(ApubComment),
PostOrComment(Box<PostOrComment>),
PersonOrCommunity(Box<UserOrCommunity>),
}
#[derive(Deserialize)]
#[serde(untagged)]
pub(crate) enum SearchableKinds {
Page(Box<Page>),
Note(Note),
PageOrNote(Box<PageOrNote>),
PersonOrGroup(Box<PersonOrGroup>),
}
@ -75,8 +70,7 @@ impl Object for SearchableObjects {
fn last_refreshed_at(&self) -> Option<DateTime<Utc>> {
match self {
SearchableObjects::Post(p) => p.last_refreshed_at(),
SearchableObjects::Comment(c) => c.last_refreshed_at(),
SearchableObjects::PostOrComment(p) => p.last_refreshed_at(),
SearchableObjects::PersonOrCommunity(p) => p.last_refreshed_at(),
}
}
@ -95,13 +89,9 @@ impl Object for SearchableObjects {
if let Some(uc) = uc {
return Ok(Some(SearchableObjects::PersonOrCommunity(Box::new(uc))));
}
let p = ApubPost::read_from_id(object_id.clone(), context).await?;
if let Some(p) = p {
return Ok(Some(SearchableObjects::Post(p)));
}
let c = ApubComment::read_from_id(object_id, context).await?;
if let Some(c) = c {
return Ok(Some(SearchableObjects::Comment(c)));
let pc = PostOrComment::read_from_id(object_id.clone(), context).await?;
if let Some(pc) = pc {
return Ok(Some(SearchableObjects::PostOrComment(Box::new(pc))));
}
Ok(None)
}
@ -109,25 +99,16 @@ impl Object for SearchableObjects {
#[tracing::instrument(skip_all)]
async fn delete(self, data: &Data<Self::DataType>) -> LemmyResult<()> {
match self {
SearchableObjects::Post(p) => p.delete(data).await,
SearchableObjects::Comment(c) => c.delete(data).await,
SearchableObjects::PersonOrCommunity(pc) => match *pc {
UserOrCommunity::User(p) => p.delete(data).await,
UserOrCommunity::Community(c) => c.delete(data).await,
},
SearchableObjects::PostOrComment(pc) => pc.delete(data).await,
SearchableObjects::PersonOrCommunity(pc) => pc.delete(data).await,
}
}
async fn into_json(self, data: &Data<Self::DataType>) -> LemmyResult<Self::Kind> {
use SearchableObjects::*;
Ok(match self {
SearchableObjects::Post(p) => SearchableKinds::Page(Box::new(p.into_json(data).await?)),
SearchableObjects::Comment(c) => SearchableKinds::Note(c.into_json(data).await?),
SearchableObjects::PersonOrCommunity(pc) => {
SearchableKinds::PersonOrGroup(Box::new(match *pc {
UserOrCommunity::User(p) => PersonOrGroup::Person(p.into_json(data).await?),
UserOrCommunity::Community(c) => PersonOrGroup::Group(c.into_json(data).await?),
}))
}
PostOrComment(pc) => SearchableKinds::PageOrNote(Box::new(pc.into_json(data).await?)),
PersonOrCommunity(pc) => SearchableKinds::PersonOrGroup(Box::new(pc.into_json(data).await?)),
})
}
@ -137,24 +118,20 @@ impl Object for SearchableObjects {
expected_domain: &Url,
data: &Data<Self::DataType>,
) -> LemmyResult<()> {
use SearchableKinds::*;
match apub {
SearchableKinds::Page(a) => ApubPost::verify(a, expected_domain, data).await,
SearchableKinds::Note(a) => ApubComment::verify(a, expected_domain, data).await,
SearchableKinds::PersonOrGroup(pg) => match pg.as_ref() {
PersonOrGroup::Person(a) => ApubPerson::verify(a, expected_domain, data).await,
PersonOrGroup::Group(a) => ApubCommunity::verify(a, expected_domain, data).await,
},
PageOrNote(pn) => PostOrComment::verify(pn, expected_domain, data).await,
PersonOrGroup(pg) => UserOrCommunity::verify(pg, expected_domain, data).await,
}
}
#[tracing::instrument(skip_all)]
async fn from_json(apub: Self::Kind, context: &Data<LemmyContext>) -> LemmyResult<Self> {
use SearchableKinds as SAT;
use SearchableKinds::*;
use SearchableObjects as SO;
Ok(match apub {
SAT::Page(p) => SO::Post(ApubPost::from_json(*p, context).await?),
SAT::Note(n) => SO::Comment(ApubComment::from_json(n, context).await?),
SAT::PersonOrGroup(pg) => {
PageOrNote(pg) => SO::PostOrComment(Box::new(PostOrComment::from_json(*pg, context).await?)),
PersonOrGroup(pg) => {
SO::PersonOrCommunity(Box::new(UserOrCommunity::from_json(*pg, context).await?))
}
})

View File

@ -1,6 +1,7 @@
use crate::{
activities::{verify_is_public, verify_person_in_community},
check_apub_id_valid_with_strictness,
fetcher::markdown_links::markdown_rewrite_remote_links,
mentions::collect_non_local_mentions,
objects::{read_from_string_or_source, verify_is_remote_object},
protocol::{
@ -169,6 +170,7 @@ impl Object for ApubComment {
let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let url_blocklist = get_url_blocklist(context).await?;
let content = process_markdown(&content, slur_regex, &url_blocklist, context).await?;
let content = markdown_rewrite_remote_links(content, context).await;
let language_id =
LanguageTag::to_language_id_single(note.language, &mut context.pool()).await?;

View File

@ -1,6 +1,7 @@
use crate::{
activities::GetActorType,
check_apub_id_valid,
fetcher::markdown_links::markdown_rewrite_remote_links_opt,
local_site_data_cached,
objects::{instance::fetch_instance_actor_for_object, read_from_string_or_source_opt},
protocol::{
@ -148,6 +149,7 @@ impl Object for ApubCommunity {
let description = read_from_string_or_source_opt(&group.summary, &None, &group.source);
let description =
process_markdown_opt(&description, slur_regex, &url_blocklist, context).await?;
let description = markdown_rewrite_remote_links_opt(description, context).await;
let icon = proxy_image_link_opt_apub(group.icon.map(|i| i.url), context).await?;
let banner = proxy_image_link_opt_apub(group.image.map(|i| i.url), context).await?;

View File

@ -2,6 +2,7 @@ use super::verify_is_remote_object;
use crate::{
activities::GetActorType,
check_apub_id_valid_with_strictness,
fetcher::markdown_links::markdown_rewrite_remote_links_opt,
local_site_data_cached,
objects::read_from_string_or_source_opt,
protocol::{
@ -151,6 +152,7 @@ impl Object for ApubSite {
let url_blocklist = get_url_blocklist(context).await?;
let sidebar = read_from_string_or_source_opt(&apub.content, &None, &apub.source);
let sidebar = process_markdown_opt(&sidebar, slur_regex, &url_blocklist, context).await?;
let sidebar = markdown_rewrite_remote_links_opt(sidebar, context).await;
let icon = proxy_image_link_opt_apub(apub.icon.map(|i| i.url), context).await?;
let banner = proxy_image_link_opt_apub(apub.image.map(|i| i.url), context).await?;

View File

@ -2,6 +2,7 @@ use super::verify_is_remote_object;
use crate::{
activities::GetActorType,
check_apub_id_valid_with_strictness,
fetcher::markdown_links::markdown_rewrite_remote_links_opt,
local_site_data_cached,
objects::{instance::fetch_instance_actor_for_object, read_from_string_or_source_opt},
protocol::{
@ -156,6 +157,7 @@ impl Object for ApubPerson {
let url_blocklist = get_url_blocklist(context).await?;
let bio = read_from_string_or_source_opt(&person.summary, &None, &person.source);
let bio = process_markdown_opt(&bio, slur_regex, &url_blocklist, context).await?;
let bio = markdown_rewrite_remote_links_opt(bio, context).await;
let avatar = proxy_image_link_opt_apub(person.icon.map(|i| i.url), context).await?;
let banner = proxy_image_link_opt_apub(person.image.map(|i| i.url), context).await?;

View File

@ -1,6 +1,7 @@
use crate::{
activities::{verify_is_public, verify_person_in_community},
check_apub_id_valid_with_strictness,
fetcher::markdown_links::{markdown_rewrite_remote_links_opt, to_local_url},
local_site_data_cached,
objects::{read_from_string_or_source_opt, verify_is_remote_object},
protocol::{
@ -215,7 +216,7 @@ impl Object for ApubPost {
let first_attachment = page.attachment.first();
let local_site = LocalSite::read(&mut context.pool()).await.ok();
let url = if let Some(attachment) = first_attachment.cloned() {
let mut url = if let Some(attachment) = first_attachment.cloned() {
Some(attachment.url())
} else if page.kind == PageType::Video {
// we cant display videos directly, so insert a link to external video page
@ -226,9 +227,12 @@ impl Object for ApubPost {
let url_blocklist = get_url_blocklist(context).await?;
if let Some(url) = &url {
if let Some(ref mut url) = url {
is_url_blocked(url, &url_blocklist)?;
is_valid_url(url)?;
if let Some(local_url) = to_local_url(url.as_str(), context).await {
*url = local_url;
}
}
let alt_text = first_attachment.cloned().and_then(Attachment::alt_text);
@ -237,6 +241,7 @@ impl Object for ApubPost {
let body = read_from_string_or_source_opt(&page.content, &page.media_type, &page.source);
let body = process_markdown_opt(&body, slur_regex, &url_blocklist, context).await?;
let body = markdown_rewrite_remote_links_opt(body, context).await;
let language_id =
LanguageTag::to_language_id_single(page.language, &mut context.pool()).await?;

View File

@ -1,6 +1,7 @@
use super::verify_is_remote_object;
use crate::{
check_apub_id_valid_with_strictness,
fetcher::markdown_links::markdown_rewrite_remote_links,
objects::read_from_string_or_source,
protocol::{
objects::chat_message::{ChatMessage, ChatMessageType},
@ -134,6 +135,7 @@ impl Object for ApubPrivateMessage {
let url_blocklist = get_url_blocklist(context).await?;
let content = read_from_string_or_source(&note.content, &None, &note.source);
let content = process_markdown(&content, slur_regex, &url_blocklist, context).await?;
let content = markdown_rewrite_remote_links(content, context).await;
let form = PrivateMessageInsertForm {
creator_id: creator.id,

View File

@ -67,6 +67,11 @@ impl Instance {
}
}
}
pub async fn read(pool: &mut DbPool<'_>, instance_id: InstanceId) -> Result<Self, Error> {
let conn = &mut get_conn(pool).await?;
instance::table.find(instance_id).first(conn).await
}
pub async fn update(
pool: &mut DbPool<'_>,
instance_id: InstanceId,

View File

@ -0,0 +1,168 @@
use super::{link_rule::Link, MARKDOWN_PARSER};
use crate::settings::SETTINGS;
use markdown_it::{plugins::cmark::inline::image::Image, NodeValue};
use url::Url;
use urlencoding::encode;
/// Rewrites all links to remote domains in markdown, so they go through `/api/v3/image_proxy`.
pub fn markdown_rewrite_image_links(mut src: String) -> (String, Vec<Url>) {
let links_offsets = find_urls::<Image>(&src);
let mut links = vec![];
// Go through the collected links in reverse order
for (start, end) in links_offsets.into_iter().rev() {
let (url, extra) = markdown_handle_title(&src, start, end);
match Url::parse(url) {
Ok(parsed) => {
links.push(parsed.clone());
// If link points to remote domain, replace with proxied link
if parsed.domain() != Some(&SETTINGS.hostname) {
let mut proxied = format!(
"{}/api/v3/image_proxy?url={}",
SETTINGS.get_protocol_and_hostname(),
encode(url),
);
// restore custom emoji format
if let Some(extra) = extra {
proxied = format!("{proxied} {extra}");
}
src.replace_range(start..end, &proxied);
}
}
Err(_) => {
// If its not a valid url, replace with empty text
src.replace_range(start..end, "");
}
}
}
(src, links)
}
pub fn markdown_handle_title(src: &str, start: usize, end: usize) -> (&str, Option<&str>) {
let content = src.get(start..end).unwrap_or_default();
// necessary for custom emojis which look like `![name](url "title")`
let (url, extra) = if content.contains(' ') {
let split = content.split_once(' ').expect("split is valid");
(split.0, Some(split.1))
} else {
(content, None)
};
(url, extra)
}
pub fn markdown_find_links(src: &str) -> Vec<(usize, usize)> {
find_urls::<Link>(src)
}
// Walk the syntax tree to find positions of image or link urls
fn find_urls<T: NodeValue + UrlAndTitle>(src: &str) -> Vec<(usize, usize)> {
let ast = MARKDOWN_PARSER.parse(src);
let mut links_offsets = vec![];
ast.walk(|node, _depth| {
if let Some(image) = node.cast::<T>() {
let node_offsets = node.srcmap.expect("srcmap is none").get_byte_offsets();
let start_offset = node_offsets.1 - image.url_len() - 1 - image.title_len();
let end_offset = node_offsets.1 - 1;
links_offsets.push((start_offset, end_offset));
}
});
links_offsets
}
pub trait UrlAndTitle {
fn url_len(&self) -> usize;
fn title_len(&self) -> usize;
}
impl UrlAndTitle for Image {
fn url_len(&self) -> usize {
self.url.len()
}
fn title_len(&self) -> usize {
self.title.as_ref().map(|t| t.len() + 3).unwrap_or_default()
}
}
impl UrlAndTitle for Link {
fn url_len(&self) -> usize {
self.url.len()
}
fn title_len(&self) -> usize {
self.title.as_ref().map(|t| t.len() + 3).unwrap_or_default()
}
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn test_find_links() {
let links = markdown_find_links("[test](https://example.com)");
assert_eq!(vec![(7, 26)], links);
let links = find_urls::<Image>("![test](https://example.com)");
assert_eq!(vec![(8, 27)], links);
}
#[test]
fn test_markdown_proxy_images() {
let tests: Vec<_> =
vec![
(
"remote image proxied",
"![link](http://example.com/image.jpg)",
"![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)",
),
(
"local image unproxied",
"![link](http://lemmy-alpha/image.jpg)",
"![link](http://lemmy-alpha/image.jpg)",
),
(
"multiple image links",
"![link](http://example.com/image1.jpg) ![link](http://example.com/image2.jpg)",
"![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage1.jpg) ![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage2.jpg)",
),
(
"empty link handled",
"![image]()",
"![image]()"
),
(
"empty label handled",
"![](http://example.com/image.jpg)",
"![](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)"
),
(
"invalid image link removed",
"![image](http-not-a-link)",
"![image]()"
),
(
"label with nested markdown handled",
"![a *b* c](http://example.com/image.jpg)",
"![a *b* c](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)"
),
(
"custom emoji support",
r#"![party-blob](https://www.hexbear.net/pictrs/image/83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#,
r#"![party-blob](https://lemmy-alpha/api/v3/image_proxy?url=https%3A%2F%2Fwww.hexbear.net%2Fpictrs%2Fimage%2F83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#
)
];
tests.iter().for_each(|&(msg, input, expected)| {
let result = markdown_rewrite_image_links(input.to_string());
assert_eq!(
result.0, expected,
"Testing {}, with original input '{}'",
msg, input
);
});
}
}

View File

@ -1,10 +1,9 @@
use crate::{error::LemmyResult, settings::SETTINGS, LemmyErrorType};
use markdown_it::{plugins::cmark::inline::image::Image, MarkdownIt};
use crate::{error::LemmyResult, LemmyErrorType};
use markdown_it::MarkdownIt;
use regex::RegexSet;
use std::sync::LazyLock;
use url::Url;
use urlencoding::encode;
pub mod image_links;
mod link_rule;
mod spoiler_rule;
@ -35,70 +34,6 @@ pub fn markdown_to_html(text: &str) -> String {
MARKDOWN_PARSER.parse(text).xrender()
}
/// Rewrites all links to remote domains in markdown, so they go through `/api/v3/image_proxy`.
pub fn markdown_rewrite_image_links(mut src: String) -> (String, Vec<Url>) {
let ast = MARKDOWN_PARSER.parse(&src);
let mut links_offsets = vec![];
// Walk the syntax tree to find positions of image links
ast.walk(|node, _depth| {
if let Some(image) = node.cast::<Image>() {
// srcmap is always present for image
// https://github.com/markdown-it-rust/markdown-it/issues/36#issuecomment-1777844387
let node_offsets = node.srcmap.expect("srcmap is none").get_byte_offsets();
// necessary for custom emojis which look like `![name](url "title")`
let start_offset = node_offsets.1
- image.url.len()
- 1
- image
.title
.as_ref()
.map(|t| t.len() + 3)
.unwrap_or_default();
let end_offset = node_offsets.1 - 1;
links_offsets.push((start_offset, end_offset));
}
});
let mut links = vec![];
// Go through the collected links in reverse order
while let Some((start, end)) = links_offsets.pop() {
let content = src.get(start..end).unwrap_or_default();
// necessary for custom emojis which look like `![name](url "title")`
let (url, extra) = if content.contains(' ') {
let split = content.split_once(' ').expect("split is valid");
(split.0, Some(split.1))
} else {
(content, None)
};
match Url::parse(url) {
Ok(parsed) => {
links.push(parsed.clone());
// If link points to remote domain, replace with proxied link
if parsed.domain() != Some(&SETTINGS.hostname) {
let mut proxied = format!(
"{}/api/v3/image_proxy?url={}",
SETTINGS.get_protocol_and_hostname(),
encode(url),
);
// restore custom emoji format
if let Some(extra) = extra {
proxied = format!("{proxied} {extra}");
}
src.replace_range(start..end, &proxied);
}
}
Err(_) => {
// If its not a valid url, replace with empty text
src.replace_range(start..end, "");
}
}
}
(src, links)
}
pub fn markdown_check_for_blocked_urls(text: &str, blocklist: &RegexSet) -> LemmyResult<()> {
if blocklist.is_match(text) {
Err(LemmyErrorType::BlockedUrl)?
@ -187,63 +122,6 @@ mod tests {
});
}
#[test]
fn test_markdown_proxy_images() {
let tests: Vec<_> =
vec![
(
"remote image proxied",
"![link](http://example.com/image.jpg)",
"![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)",
),
(
"local image unproxied",
"![link](http://lemmy-alpha/image.jpg)",
"![link](http://lemmy-alpha/image.jpg)",
),
(
"multiple image links",
"![link](http://example.com/image1.jpg) ![link](http://example.com/image2.jpg)",
"![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage1.jpg) ![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage2.jpg)",
),
(
"empty link handled",
"![image]()",
"![image]()"
),
(
"empty label handled",
"![](http://example.com/image.jpg)",
"![](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)"
),
(
"invalid image link removed",
"![image](http-not-a-link)",
"![image]()"
),
(
"label with nested markdown handled",
"![a *b* c](http://example.com/image.jpg)",
"![a *b* c](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)"
),
(
"custom emoji support",
r#"![party-blob](https://www.hexbear.net/pictrs/image/83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#,
r#"![party-blob](https://lemmy-alpha/api/v3/image_proxy?url=https%3A%2F%2Fwww.hexbear.net%2Fpictrs%2Fimage%2F83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#
)
];
tests.iter().for_each(|&(msg, input, expected)| {
let result = markdown_rewrite_image_links(input.to_string());
assert_eq!(
result.0, expected,
"Testing {}, with original input '{}'",
msg, input
);
});
}
#[test]
fn test_url_blocking() {
let set = RegexSet::new(vec![r"(https://)?example\.com/?"]).unwrap();