From e8746760faacc76e6f054f9e3ebc65cf784a9cd6 Mon Sep 17 00:00:00 2001 From: Trash Panda Date: Fri, 27 Feb 2026 23:41:52 -0700 Subject: [PATCH] feat(url-preview): Optionally download audio/video files for url preview requests --- src/core/config/mod.rs | 5 ++ src/service/globals/mod.rs | 4 + src/service/media/data.rs | 70 ++++++++++++++++ src/service/media/preview.rs | 158 +++++++++++++++++++++++++++++------ 4 files changed, 213 insertions(+), 24 deletions(-) diff --git a/src/core/config/mod.rs b/src/core/config/mod.rs index fb2c9375..a642f5b7 100644 --- a/src/core/config/mod.rs +++ b/src/core/config/mod.rs @@ -1735,6 +1735,11 @@ pub struct Config { /// default: "continuwuity/ (bot; +https://continuwuity.org)" pub url_preview_user_agent: Option, + /// Determines whether audio and video files will be downloaded for URL + /// previews. + #[serde(default)] + pub url_preview_allow_audio_video: bool, + /// List of forbidden room aliases and room IDs as strings of regex /// patterns. /// diff --git a/src/service/globals/mod.rs b/src/service/globals/mod.rs index 938bfcc5..3930042d 100644 --- a/src/service/globals/mod.rs +++ b/src/service/globals/mod.rs @@ -142,6 +142,10 @@ impl Service { self.server.config.url_preview_check_root_domain } + pub fn url_preview_allow_audio_video(&self) -> bool { + self.server.config.url_preview_allow_audio_video + } + pub fn forbidden_alias_names(&self) -> &RegexSet { &self.server.config.forbidden_alias_names } pub fn forbidden_usernames(&self) -> &RegexSet { &self.server.config.forbidden_usernames } diff --git a/src/service/media/data.rs b/src/service/media/data.rs index 0703a476..0bc355cd 100644 --- a/src/service/media/data.rs +++ b/src/service/media/data.rs @@ -207,6 +207,28 @@ impl Data { value.extend_from_slice(&data.image_width.unwrap_or(0).to_be_bytes()); value.push(0xFF); value.extend_from_slice(&data.image_height.unwrap_or(0).to_be_bytes()); + value.push(0xFF); + value.extend_from_slice( + data.video + .as_ref() + .map(String::as_bytes) + .unwrap_or_default(), + ); + value.push(0xFF); + value.extend_from_slice(&data.video_size.unwrap_or(0).to_be_bytes()); + value.push(0xFF); + value.extend_from_slice(&data.video_width.unwrap_or(0).to_be_bytes()); + value.push(0xFF); + value.extend_from_slice(&data.video_height.unwrap_or(0).to_be_bytes()); + value.push(0xFF); + value.extend_from_slice( + data.audio + .as_ref() + .map(String::as_bytes) + .unwrap_or_default(), + ); + value.push(0xFF); + value.extend_from_slice(&data.audio_size.unwrap_or(0).to_be_bytes()); self.url_previews.insert(url.as_bytes(), &value); @@ -267,6 +289,48 @@ impl Data { | Some(0) => None, | x => x, }; + let video = match values + .next() + .and_then(|b| String::from_utf8(b.to_vec()).ok()) + { + | Some(s) if s.is_empty() => None, + | x => x, + }; + let video_size = match values + .next() + .map(|b| usize::from_be_bytes(b.try_into().unwrap_or_default())) + { + | Some(0) => None, + | x => x, + }; + let video_width = match values + .next() + .map(|b| u32::from_be_bytes(b.try_into().unwrap_or_default())) + { + | Some(0) => None, + | x => x, + }; + let video_height = match values + .next() + .map(|b| u32::from_be_bytes(b.try_into().unwrap_or_default())) + { + | Some(0) => None, + | x => x, + }; + let audio = match values + .next() + .and_then(|b| String::from_utf8(b.to_vec()).ok()) + { + | Some(s) if s.is_empty() => None, + | x => x, + }; + let audio_size = match values + .next() + .map(|b| usize::from_be_bytes(b.try_into().unwrap_or_default())) + { + | Some(0) => None, + | x => x, + }; Ok(UrlPreviewData { title, @@ -275,6 +339,12 @@ impl Data { image_size, image_width, image_height, + video, + video_size, + video_width, + video_height, + audio, + audio_size, }) } } diff --git a/src/service/media/preview.rs b/src/service/media/preview.rs index e5a22e27..783269f1 100644 --- a/src/service/media/preview.rs +++ b/src/service/media/preview.rs @@ -29,6 +29,18 @@ pub struct UrlPreviewData { pub image_width: Option, #[serde(skip_serializing_if = "Option::is_none", rename(serialize = "og:image:height"))] pub image_height: Option, + #[serde(skip_serializing_if = "Option::is_none", rename(serialize = "og:video"))] + pub video: Option, + #[serde(skip_serializing_if = "Option::is_none", rename(serialize = "matrix:video:size"))] + pub video_size: Option, + #[serde(skip_serializing_if = "Option::is_none", rename(serialize = "og:video:width"))] + pub video_width: Option, + #[serde(skip_serializing_if = "Option::is_none", rename(serialize = "og:video:height"))] + pub video_height: Option, + #[serde(skip_serializing_if = "Option::is_none", rename(serialize = "og:audio"))] + pub audio: Option, + #[serde(skip_serializing_if = "Option::is_none", rename(serialize = "matrix:audio:size"))] + pub audio_size: Option, } #[implement(Service)] @@ -96,7 +108,9 @@ async fn request_url_preview(&self, url: &Url) -> Result { let data = match content_type { | html if html.starts_with("text/html") => self.download_html(url.as_str()).await?, - | img if img.starts_with("image/") => self.download_image(url.as_str()).await?, + | img if img.starts_with("image/") => self.download_image(url.as_str(), None).await?, + | video if video.starts_with("video/") => self.download_video(url.as_str(), None).await?, + | audio if audio.starts_with("audio/") => self.download_audio(url.as_str(), None).await?, | _ => return Err!(Request(Unknown("Unsupported Content-Type"))), }; @@ -107,11 +121,17 @@ async fn request_url_preview(&self, url: &Url) -> Result { #[cfg(feature = "url_preview")] #[implement(Service)] -pub async fn download_image(&self, url: &str) -> Result { +pub async fn download_image( + &self, + url: &str, + preview_data: Option, +) -> Result { use conduwuit::utils::random_string; use image::ImageReader; use ruma::Mxc; + let mut preview_data = preview_data.unwrap_or_default(); + let image = self .services .client @@ -128,6 +148,7 @@ pub async fn download_image(&self, url: &str) -> Result { .expect("u64 should fit in usize"), ) .await?; + let mxc = Mxc { server_name: self.services.globals.server_name(), media_id: &random_string(super::MXC_LENGTH), @@ -135,27 +156,107 @@ pub async fn download_image(&self, url: &str) -> Result { self.create(&mxc, None, None, None, &image).await?; - let cursor = std::io::Cursor::new(&image); - let (width, height) = match ImageReader::new(cursor).with_guessed_format() { - | Err(_) => (None, None), - | Ok(reader) => match reader.into_dimensions() { + preview_data.image = Some(mxc.to_string()); + if preview_data.image_height.is_none() || preview_data.image_width.is_none() { + let cursor = std::io::Cursor::new(&image); + let (width, height) = match ImageReader::new(cursor).with_guessed_format() { | Err(_) => (None, None), - | Ok((width, height)) => (Some(width), Some(height)), - }, - }; + | Ok(reader) => match reader.into_dimensions() { + | Err(_) => (None, None), + | Ok((width, height)) => (Some(width), Some(height)), + }, + }; - Ok(UrlPreviewData { - image: Some(mxc.to_string()), - image_size: Some(image.len()), - image_width: width, - image_height: height, - ..Default::default() - }) + preview_data.image_width = width; + preview_data.image_height = height; + } + + Ok(preview_data) +} + +#[cfg(feature = "url_preview")] +#[implement(Service)] +pub async fn download_video( + &self, + url: &str, + preview_data: Option, +) -> Result { + use conduwuit::utils::random_string; + use ruma::Mxc; + + let mut preview_data = preview_data.unwrap_or_default(); + + if self.services.globals.url_preview_allow_audio_video() { + let video = self.services.client.url_preview.get(url).send().await?; + let video = video.bytes().await?; + let mxc = Mxc { + server_name: self.services.globals.server_name(), + media_id: &random_string(super::MXC_LENGTH), + }; + + self.create(&mxc, None, None, None, &video).await?; + + preview_data.video = Some(mxc.to_string()); + } + + Ok(preview_data) +} + +#[cfg(feature = "url_preview")] +#[implement(Service)] +pub async fn download_audio( + &self, + url: &str, + preview_data: Option, +) -> Result { + use conduwuit::utils::random_string; + use ruma::Mxc; + + let mut preview_data = preview_data.unwrap_or_default(); + + if self.services.globals.url_preview_allow_audio_video() { + let audio = self.services.client.url_preview.get(url).send().await?; + let audio = audio.bytes().await?; + let mxc = Mxc { + server_name: self.services.globals.server_name(), + media_id: &random_string(super::MXC_LENGTH), + }; + + self.create(&mxc, None, None, None, &audio).await?; + + preview_data.video = Some(mxc.to_string()); + } + + Ok(preview_data) } #[cfg(not(feature = "url_preview"))] #[implement(Service)] -pub async fn download_image(&self, _url: &str) -> Result { +pub async fn download_image( + &self, + _url: &str, + _preview_data: Option, +) -> Result { + Err!(FeatureDisabled("url_preview")) +} + +#[cfg(not(feature = "url_preview"))] +#[implement(Service)] +pub async fn download_video( + &self, + _url: &str, + _preview_data: Option, +) -> Result { + Err!(FeatureDisabled("url_preview")) +} + +#[cfg(not(feature = "url_preview"))] +#[implement(Service)] +pub async fn download_audio( + &self, + _url: &str, + _preview_data: Option, +) -> Result { Err!(FeatureDisabled("url_preview")) } @@ -182,18 +283,27 @@ async fn download_html(&self, url: &str) -> Result { return Err!(Request(Unknown("Failed to parse HTML"))); }; - let mut data = match html.opengraph.images.first() { - | None => UrlPreviewData::default(), - | Some(obj) => self.download_image(&obj.url).await?, - }; + let mut preview_data = UrlPreviewData::default(); + + if let Some(obj) = html.opengraph.images.first() { + preview_data = self.download_image(&obj.url, Some(preview_data)).await?; + } + + if let Some(obj) = html.opengraph.videos.first() { + preview_data = self.download_video(&obj.url, Some(preview_data)).await?; + } + + if let Some(obj) = html.opengraph.audios.first() { + preview_data = self.download_audio(&obj.url, Some(preview_data)).await?; + } let props = html.opengraph.properties; /* use OpenGraph title/description, but fall back to HTML if not available */ - data.title = props.get("title").cloned().or(html.title); - data.description = props.get("description").cloned().or(html.description); + preview_data.title = props.get("title").cloned().or(html.title); + preview_data.description = props.get("description").cloned().or(html.description); - Ok(data) + Ok(preview_data) } #[cfg(not(feature = "url_preview"))]