feat(url-preview): Optionally download audio/video files for url preview requests

This commit is contained in:
Trash Panda 2026-02-27 23:41:52 -07:00 committed by Ellis Git
parent 9dbd75e740
commit e8746760fa
4 changed files with 213 additions and 24 deletions

View file

@ -1735,6 +1735,11 @@ pub struct Config {
/// default: "continuwuity/<version> (bot; +https://continuwuity.org)"
pub url_preview_user_agent: Option<String>,
/// Determines whether audio and video files will be downloaded for URL
/// previews.
#[serde(default)]
pub url_preview_allow_audio_video: bool,
/// List of forbidden room aliases and room IDs as strings of regex
/// patterns.
///

View file

@ -142,6 +142,10 @@ impl Service {
self.server.config.url_preview_check_root_domain
}
pub fn url_preview_allow_audio_video(&self) -> bool {
self.server.config.url_preview_allow_audio_video
}
pub fn forbidden_alias_names(&self) -> &RegexSet { &self.server.config.forbidden_alias_names }
pub fn forbidden_usernames(&self) -> &RegexSet { &self.server.config.forbidden_usernames }

View file

@ -207,6 +207,28 @@ impl Data {
value.extend_from_slice(&data.image_width.unwrap_or(0).to_be_bytes());
value.push(0xFF);
value.extend_from_slice(&data.image_height.unwrap_or(0).to_be_bytes());
value.push(0xFF);
value.extend_from_slice(
data.video
.as_ref()
.map(String::as_bytes)
.unwrap_or_default(),
);
value.push(0xFF);
value.extend_from_slice(&data.video_size.unwrap_or(0).to_be_bytes());
value.push(0xFF);
value.extend_from_slice(&data.video_width.unwrap_or(0).to_be_bytes());
value.push(0xFF);
value.extend_from_slice(&data.video_height.unwrap_or(0).to_be_bytes());
value.push(0xFF);
value.extend_from_slice(
data.audio
.as_ref()
.map(String::as_bytes)
.unwrap_or_default(),
);
value.push(0xFF);
value.extend_from_slice(&data.audio_size.unwrap_or(0).to_be_bytes());
self.url_previews.insert(url.as_bytes(), &value);
@ -267,6 +289,48 @@ impl Data {
| Some(0) => None,
| x => x,
};
let video = match values
.next()
.and_then(|b| String::from_utf8(b.to_vec()).ok())
{
| Some(s) if s.is_empty() => None,
| x => x,
};
let video_size = match values
.next()
.map(|b| usize::from_be_bytes(b.try_into().unwrap_or_default()))
{
| Some(0) => None,
| x => x,
};
let video_width = match values
.next()
.map(|b| u32::from_be_bytes(b.try_into().unwrap_or_default()))
{
| Some(0) => None,
| x => x,
};
let video_height = match values
.next()
.map(|b| u32::from_be_bytes(b.try_into().unwrap_or_default()))
{
| Some(0) => None,
| x => x,
};
let audio = match values
.next()
.and_then(|b| String::from_utf8(b.to_vec()).ok())
{
| Some(s) if s.is_empty() => None,
| x => x,
};
let audio_size = match values
.next()
.map(|b| usize::from_be_bytes(b.try_into().unwrap_or_default()))
{
| Some(0) => None,
| x => x,
};
Ok(UrlPreviewData {
title,
@ -275,6 +339,12 @@ impl Data {
image_size,
image_width,
image_height,
video,
video_size,
video_width,
video_height,
audio,
audio_size,
})
}
}

View file

@ -29,6 +29,18 @@ pub struct UrlPreviewData {
pub image_width: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none", rename(serialize = "og:image:height"))]
pub image_height: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none", rename(serialize = "og:video"))]
pub video: Option<String>,
#[serde(skip_serializing_if = "Option::is_none", rename(serialize = "matrix:video:size"))]
pub video_size: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none", rename(serialize = "og:video:width"))]
pub video_width: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none", rename(serialize = "og:video:height"))]
pub video_height: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none", rename(serialize = "og:audio"))]
pub audio: Option<String>,
#[serde(skip_serializing_if = "Option::is_none", rename(serialize = "matrix:audio:size"))]
pub audio_size: Option<usize>,
}
#[implement(Service)]
@ -96,7 +108,9 @@ async fn request_url_preview(&self, url: &Url) -> Result<UrlPreviewData> {
let data = match content_type {
| html if html.starts_with("text/html") => self.download_html(url.as_str()).await?,
| img if img.starts_with("image/") => self.download_image(url.as_str()).await?,
| img if img.starts_with("image/") => self.download_image(url.as_str(), None).await?,
| video if video.starts_with("video/") => self.download_video(url.as_str(), None).await?,
| audio if audio.starts_with("audio/") => self.download_audio(url.as_str(), None).await?,
| _ => return Err!(Request(Unknown("Unsupported Content-Type"))),
};
@ -107,11 +121,17 @@ async fn request_url_preview(&self, url: &Url) -> Result<UrlPreviewData> {
#[cfg(feature = "url_preview")]
#[implement(Service)]
pub async fn download_image(&self, url: &str) -> Result<UrlPreviewData> {
pub async fn download_image(
&self,
url: &str,
preview_data: Option<UrlPreviewData>,
) -> Result<UrlPreviewData> {
use conduwuit::utils::random_string;
use image::ImageReader;
use ruma::Mxc;
let mut preview_data = preview_data.unwrap_or_default();
let image = self
.services
.client
@ -128,6 +148,7 @@ pub async fn download_image(&self, url: &str) -> Result<UrlPreviewData> {
.expect("u64 should fit in usize"),
)
.await?;
let mxc = Mxc {
server_name: self.services.globals.server_name(),
media_id: &random_string(super::MXC_LENGTH),
@ -135,27 +156,107 @@ pub async fn download_image(&self, url: &str) -> Result<UrlPreviewData> {
self.create(&mxc, None, None, None, &image).await?;
let cursor = std::io::Cursor::new(&image);
let (width, height) = match ImageReader::new(cursor).with_guessed_format() {
| Err(_) => (None, None),
| Ok(reader) => match reader.into_dimensions() {
preview_data.image = Some(mxc.to_string());
if preview_data.image_height.is_none() || preview_data.image_width.is_none() {
let cursor = std::io::Cursor::new(&image);
let (width, height) = match ImageReader::new(cursor).with_guessed_format() {
| Err(_) => (None, None),
| Ok((width, height)) => (Some(width), Some(height)),
},
};
| Ok(reader) => match reader.into_dimensions() {
| Err(_) => (None, None),
| Ok((width, height)) => (Some(width), Some(height)),
},
};
Ok(UrlPreviewData {
image: Some(mxc.to_string()),
image_size: Some(image.len()),
image_width: width,
image_height: height,
..Default::default()
})
preview_data.image_width = width;
preview_data.image_height = height;
}
Ok(preview_data)
}
#[cfg(feature = "url_preview")]
#[implement(Service)]
pub async fn download_video(
&self,
url: &str,
preview_data: Option<UrlPreviewData>,
) -> Result<UrlPreviewData> {
use conduwuit::utils::random_string;
use ruma::Mxc;
let mut preview_data = preview_data.unwrap_or_default();
if self.services.globals.url_preview_allow_audio_video() {
let video = self.services.client.url_preview.get(url).send().await?;
let video = video.bytes().await?;
let mxc = Mxc {
server_name: self.services.globals.server_name(),
media_id: &random_string(super::MXC_LENGTH),
};
self.create(&mxc, None, None, None, &video).await?;
preview_data.video = Some(mxc.to_string());
}
Ok(preview_data)
}
#[cfg(feature = "url_preview")]
#[implement(Service)]
pub async fn download_audio(
&self,
url: &str,
preview_data: Option<UrlPreviewData>,
) -> Result<UrlPreviewData> {
use conduwuit::utils::random_string;
use ruma::Mxc;
let mut preview_data = preview_data.unwrap_or_default();
if self.services.globals.url_preview_allow_audio_video() {
let audio = self.services.client.url_preview.get(url).send().await?;
let audio = audio.bytes().await?;
let mxc = Mxc {
server_name: self.services.globals.server_name(),
media_id: &random_string(super::MXC_LENGTH),
};
self.create(&mxc, None, None, None, &audio).await?;
preview_data.video = Some(mxc.to_string());
}
Ok(preview_data)
}
#[cfg(not(feature = "url_preview"))]
#[implement(Service)]
pub async fn download_image(&self, _url: &str) -> Result<UrlPreviewData> {
pub async fn download_image(
&self,
_url: &str,
_preview_data: Option<UrlPreviewData>,
) -> Result<UrlPreviewData> {
Err!(FeatureDisabled("url_preview"))
}
#[cfg(not(feature = "url_preview"))]
#[implement(Service)]
pub async fn download_video(
&self,
_url: &str,
_preview_data: Option<UrlPreviewData>,
) -> Result<UrlPreviewData> {
Err!(FeatureDisabled("url_preview"))
}
#[cfg(not(feature = "url_preview"))]
#[implement(Service)]
pub async fn download_audio(
&self,
_url: &str,
_preview_data: Option<UrlPreviewData>,
) -> Result<UrlPreviewData> {
Err!(FeatureDisabled("url_preview"))
}
@ -182,18 +283,27 @@ async fn download_html(&self, url: &str) -> Result<UrlPreviewData> {
return Err!(Request(Unknown("Failed to parse HTML")));
};
let mut data = match html.opengraph.images.first() {
| None => UrlPreviewData::default(),
| Some(obj) => self.download_image(&obj.url).await?,
};
let mut preview_data = UrlPreviewData::default();
if let Some(obj) = html.opengraph.images.first() {
preview_data = self.download_image(&obj.url, Some(preview_data)).await?;
}
if let Some(obj) = html.opengraph.videos.first() {
preview_data = self.download_video(&obj.url, Some(preview_data)).await?;
}
if let Some(obj) = html.opengraph.audios.first() {
preview_data = self.download_audio(&obj.url, Some(preview_data)).await?;
}
let props = html.opengraph.properties;
/* use OpenGraph title/description, but fall back to HTML if not available */
data.title = props.get("title").cloned().or(html.title);
data.description = props.get("description").cloned().or(html.description);
preview_data.title = props.get("title").cloned().or(html.title);
preview_data.description = props.get("description").cloned().or(html.description);
Ok(data)
Ok(preview_data)
}
#[cfg(not(feature = "url_preview"))]