diff --git a/Cargo.lock b/Cargo.lock
index daad96f7..fff6a5ca 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1441,6 +1441,7 @@ dependencies = [
"sourceview5",
"strum",
"thiserror",
+ "tld",
"tokio",
"tokio-stream",
"tracing",
@@ -5176,6 +5177,16 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
+[[package]]
+name = "tld"
+version = "2.35.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73ca5fc340fcb4f52570c502cf443fc22d5521e9ef2bb03528e3634254016cf7"
+dependencies = [
+ "phf",
+ "phf_codegen",
+]
+
[[package]]
name = "tokio"
version = "1.38.0"
diff --git a/Cargo.toml b/Cargo.toml
index b690f65a..73fdbe73 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -43,6 +43,7 @@ serde = "1"
serde_json = "1"
strum = { version = "0.26", features = ["derive"] }
thiserror = "1"
+tld = "2"
tokio = { version = "1", features = ["rt", "rt-multi-thread", "sync"] }
tokio-stream = { version = "0.1", features = ["sync"] }
tracing = "0.1"
diff --git a/src/utils/string.rs b/src/utils/string/mod.rs
similarity index 61%
rename from src/utils/string.rs
rename to src/utils/string/mod.rs
index 3f6279ac..c4888465 100644
--- a/src/utils/string.rs
+++ b/src/utils/string/mod.rs
@@ -1,9 +1,14 @@
//! Helper traits and methods for strings.
-use std::fmt::{self, Write};
+use std::{borrow::Cow, fmt::Write};
use gtk::glib::markup_escape_text;
use linkify::{LinkFinder, LinkKind};
+use ruma::MatrixUri;
+use url::Url;
+
+#[cfg(test)]
+mod tests;
use super::matrix::{find_at_room, MatrixIdUri, AT_ROOM};
use crate::{
@@ -12,6 +17,13 @@ use crate::{
session::model::Room,
};
+/// The prefix for an email URI.
+const EMAIL_URI_PREFIX: &str = "mailto:";
+/// The prefix for a HTTPS URL.
+const HTTPS_URI_PREFIX: &str = "https://";
+/// The prefix for a `matrix:` URI.
+const MATRIX_URI_PREFIX: &str = "matrix:";
+
/// Common extensions to strings.
pub trait StrExt {
/// Escape markup for compatibility with Pango.
@@ -207,30 +219,34 @@ impl<'a> Linkifier<'a> {
///
/// Returns the list of mentions, if any where found.
pub fn linkify(mut self, text: &str) {
- let finder = LinkFinder::new();
+ let mut finder = LinkFinder::new();
+ // Allow URLS without a scheme.
+ finder.url_must_have_scheme(false);
+
+ let mut prev_span = None;
for span in finder.spans(text) {
let span_text = span.as_str();
- let uri = match span.kind() {
+ match span.kind() {
Some(LinkKind::Url) => {
- if let MentionsMode::WithMentions { pills, room, .. } = &mut self.mentions {
- if let Some(pill) = self.inner.maybe_append_mention(span_text, room) {
- pills.push(pill);
+ let is_valid_url = self.append_detected_url(span_text, prev_span);
- continue;
- }
+ if is_valid_url {
+ prev_span = None;
+ } else {
+ prev_span = Some(span_text);
}
-
- Some(UriParts {
- prefix: None,
- uri: span_text,
- })
}
- Some(LinkKind::Email) => Some(UriParts {
- prefix: Some("mailto:"),
- uri: span_text,
- }),
+ Some(LinkKind::Email) => {
+ self.inner
+ .append_link_opening_tag(format!("{EMAIL_URI_PREFIX}{span_text}"));
+ self.inner.push_str(&span_text.escape_markup());
+ self.inner.push_str("");
+
+ // The span was a valid email so we will not need to check it for the next span.
+ prev_span = None;
+ }
_ => {
if let MentionsMode::WithMentions {
pills,
@@ -242,23 +258,104 @@ impl<'a> Linkifier<'a> {
pills.push(pill);
}
+ prev_span = Some(span_text);
continue;
}
- None
+ self.append_string(span_text);
+ prev_span = Some(span_text);
}
- };
+ }
+ }
+ }
- if let Some(uri) = uri {
- self.inner.append_link_opening_tag(uri.to_string());
+ /// Append the given string.
+ ///
+ /// Escapes the markup of the string.
+ fn append_string(&mut self, s: &str) {
+ self.inner.push_str(&s.escape_markup());
+ }
+
+ /// Append the given URI.
+ fn append_uri(&mut self, uri: &str, prefix: Option<&str>) {
+ let full_uri = if let Some(prefix) = prefix {
+ Cow::Owned(format!("{prefix}{uri}"))
+ } else {
+ Cow::Borrowed(uri)
+ };
+
+ if let MentionsMode::WithMentions { pills, room, .. } = &mut self.mentions {
+ if let Some(pill) = self.inner.maybe_append_mention(full_uri.as_ref(), room) {
+ pills.push(pill);
+
+ return;
}
+ }
- self.inner.push_str(&span_text.escape_markup());
+ self.inner.append_link_opening_tag(full_uri);
+ self.append_string(uri);
+ self.inner.push_str("");
+ }
+
+ /// Append the given string detected as a URL.
+ ///
+ /// Appends false positives as normal strings, otherwise appends it as a
+ /// URI.
+ ///
+ /// Returns `true` if it was detected as a valid URL.
+ fn append_detected_url(&mut self, detected_url: &str, prev_span: Option<&str>) -> bool {
+ if Url::parse(detected_url).is_ok() {
+ // This is a full URL with a scheme, we can trust that it is valid.
+ self.append_uri(detected_url, None);
+ return true;
+ }
+
+ // It does not have a scheme, try to split it to get only the domain.
+ let domain = if let Some((domain, _)) = detected_url.split_once('/') {
+ // This is a URL with a path component.
+ domain
+ } else if let Some((domain, _)) = detected_url.split_once('?') {
+ // This is a URL with a query component.
+ domain
+ } else if let Some((domain, _)) = detected_url.split_once('#') {
+ // This is a URL with a fragment.
+ domain
+ } else {
+ // It should only contain the full domain.
+ detected_url
+ };
+
+ // Check that the top-level domain is known.
+ if !domain.rsplit_once('.').is_some_and(|(_, d)| tld::exist(d)) {
+ // This is a false positive, treat it like a regular string.
+ self.append_string(detected_url);
+ return false;
+ }
- if uri.is_some() {
- self.inner.push_str("");
+ // The LinkFinder does not detect URIs without an authority component, which is
+ // problematic for `matrix:` URIs. However it detects a link starting from the
+ // homeserver part, e.g. it detects `example.org` in
+ // `matrix:r/somewhere:example.org`. We can use that to recompose the full URI
+ // with the previous span.
+
+ // First, detect if we can find the `matrix:` scheme in the previous span.
+ if let Some(maybe_uri_start) =
+ prev_span.and_then(|s| s.rfind(MATRIX_URI_PREFIX).map(|pos| &s[pos..]))
+ {
+ // See if the whole string is a valid URI.
+ let maybe_full_uri = format!("{maybe_uri_start}{detected_url}");
+ if MatrixUri::parse(&maybe_full_uri).is_ok() {
+ // Remove the start of the URI from the string.
+ self.inner
+ .truncate(self.inner.len() - maybe_uri_start.len());
+ self.append_uri(&maybe_full_uri, None);
+
+ return true;
}
}
+
+ self.append_uri(detected_url, Some(HTTPS_URI_PREFIX));
+ true
}
}
@@ -278,20 +375,3 @@ enum MentionsMode<'a> {
detect_at_room: bool,
},
}
-
-/// A URI that is possibly into parts.
-#[derive(Debug, Clone, Copy)]
-struct UriParts<'a> {
- prefix: Option<&'a str>,
- uri: &'a str,
-}
-
-impl<'a> fmt::Display for UriParts<'a> {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- if let Some(prefix) = self.prefix {
- f.write_str(prefix)?;
- }
-
- f.write_str(self.uri)
- }
-}
diff --git a/src/utils/string/tests.rs b/src/utils/string/tests.rs
new file mode 100644
index 00000000..b50f6d22
--- /dev/null
+++ b/src/utils/string/tests.rs
@@ -0,0 +1,82 @@
+use super::linkify;
+
+#[test]
+fn linkify_text() {
+ // URLs with scheme.
+ let text = "https://gitlab.gnome.org/World/fractal";
+ assert_eq!(
+ linkify(text),
+ r#"https://gitlab.gnome.org/World/fractal"#
+ );
+
+ let text = "https://matrix.to/#/!somewhere%3Aexample.org?via=elsewhere.ca";
+ assert_eq!(
+ linkify(text),
+ r#"https://matrix.to/#/!somewhere%3Aexample.org?via=elsewhere.ca"#
+ );
+
+ // Email.
+ let text = "admin@matrix.org";
+ assert_eq!(
+ linkify(text),
+ r#"admin@matrix.org"#
+ );
+
+ // URLs without scheme.
+ let text = "gnome.org";
+ assert_eq!(
+ linkify(text),
+ r#"gnome.org"#
+ );
+
+ let text = "gitlab.gnome.org/World/fractal";
+ assert_eq!(
+ linkify(text),
+ r#"gitlab.gnome.org/World/fractal"#
+ );
+
+ let text = "matrix.to/#/!somewhere%3Aexample.org?via=elsewhere.ca";
+ assert_eq!(
+ linkify(text),
+ r#"matrix.to/#/!somewhere%3Aexample.org?via=elsewhere.ca"#
+ );
+
+ // `matrix:` URIs.
+ let text = "matrix:r/somewhere:example.org";
+ assert_eq!(
+ linkify(text),
+ r#"matrix:r/somewhere:example.org"#
+ );
+
+ let text = "matrix:roomid/somewhere:example.org?via=elsewhere.ca";
+ assert_eq!(
+ linkify(text),
+ r#"matrix:roomid/somewhere:example.org?via=elsewhere.ca"#
+ );
+
+ let text = "matrix:roomid/somewhere:example.org/e/event?via=elsewhere.ca";
+ assert_eq!(
+ linkify(text),
+ r#"matrix:roomid/somewhere:example.org/e/event?via=elsewhere.ca"#
+ );
+
+ let text = "matrix:u/alice:example.org?action=chat";
+ assert_eq!(
+ linkify(text),
+ r#"matrix:u/alice:example.org?action=chat"#
+ );
+
+ // Invalid TLDs.
+ let text = "gnome.invalid";
+ assert_eq!(linkify(text), "gnome.invalid");
+
+ let text = "org.gnome.fractal";
+ assert_eq!(linkify(text), "org.gnome.fractal");
+
+ // `matrix:` that is not a URI scheme.
+ let text = "My homeserver for matrix: gnome.org";
+ assert_eq!(
+ linkify(text),
+ r#"My homeserver for matrix: gnome.org"#
+ );
+}