summaryrefslogtreecommitdiff
path: root/src/channel
diff options
context:
space:
mode:
authorOwen Jacobson <owen@grimoire.ca>2024-10-21 00:36:44 -0400
committerOwen Jacobson <owen@grimoire.ca>2024-10-22 10:58:11 -0400
commit3f9648eed48cd8b6cd35d0ae2ee5bbe25fa735ac (patch)
tree8ecdd86cd9e09d8a3bd55ec1f72888a81498cc38 /src/channel
parent379e97c2cb145bc3a495aa14746273d83b508214 (diff)
Canonicalize login and channel names.
Canonicalization does two things: * It prevents duplicate names that differ only by case or only by normalization/encoding sequence; and * It makes certain name-based comparisons "case-insensitive" (generalizing via Unicode's case-folding rules). This change is complicated, as it means that every name now needs to be stored in two forms. Unfortunately, this is _very likely_ a breaking schema change. The migrations in this commit perform a best-effort attempt to canonicalize existing channel or login names, but it's likely any existing channels or logins with non-ASCII characters will not be canonicalize correctly. Since clients look at all channel names and all login names on boot, and since the code in this commit verifies canonicalization when reading from the database, this will effectively make the server un-usuable until any incorrectly-canonicalized values are either manually canonicalized, or removed It might be possible to do better with [the `icu` sqlite3 extension][icu], but (a) I'm not convinced of that and (b) this commit is already huge; adding database extension support would make it far larger. [icu]: https://sqlite.org/src/dir/ext/icu For some references on why it's worth storing usernames this way, see <https://www.b-list.org/weblog/2018/nov/26/case/> and the refernced talk, as well as <https://www.b-list.org/weblog/2018/feb/11/usernames/>. Bennett's treatment of this issue is, to my eye, much more readable than the referenced Unicode technical reports, and I'm inclined to trust his opinion given that he maintains a widely-used, internet-facing user registration library for Django.
Diffstat (limited to 'src/channel')
-rw-r--r--src/channel/app.rs49
-rw-r--r--src/channel/mod.rs5
-rw-r--r--src/channel/name.rs30
-rw-r--r--src/channel/repo.rs212
-rw-r--r--src/channel/routes/post.rs3
-rw-r--r--src/channel/snapshot.rs4
6 files changed, 185 insertions, 118 deletions
diff --git a/src/channel/app.rs b/src/channel/app.rs
index ea60943..b8ceeb0 100644
--- a/src/channel/app.rs
+++ b/src/channel/app.rs
@@ -2,12 +2,16 @@ use chrono::TimeDelta;
use itertools::Itertools;
use sqlx::sqlite::SqlitePool;
-use super::{repo::Provider as _, Channel, History, Id, Name};
+use super::{
+ repo::{LoadError, Provider as _},
+ Channel, History, Id,
+};
use crate::{
clock::DateTime,
db::{Duplicate as _, NotFound as _},
event::{repo::Provider as _, Broadcaster, Event, Sequence},
message::repo::Provider as _,
+ name::{self, Name},
};
pub struct Channels<'a> {
@@ -38,7 +42,7 @@ impl<'a> Channels<'a> {
// This function is careless with respect to time, and gets you the channel as
// it exists in the specific moment when you call it.
- pub async fn get(&self, channel: &Id) -> Result<Option<Channel>, sqlx::Error> {
+ pub async fn get(&self, channel: &Id) -> Result<Option<Channel>, Error> {
let mut tx = self.db.begin().await?;
let channel = tx.channels().by_id(channel).await.optional()?;
tx.commit().await?;
@@ -88,7 +92,7 @@ impl<'a> Channels<'a> {
Ok(())
}
- pub async fn expire(&self, relative_to: &DateTime) -> Result<(), sqlx::Error> {
+ pub async fn expire(&self, relative_to: &DateTime) -> Result<(), ExpireError> {
// Somewhat arbitrarily, expire after 90 days.
let expire_at = relative_to.to_owned() - TimeDelta::days(90);
@@ -137,6 +141,17 @@ pub enum CreateError {
DuplicateName(Name),
#[error(transparent)]
Database(#[from] sqlx::Error),
+ #[error(transparent)]
+ Name(#[from] name::Error),
+}
+
+impl From<LoadError> for CreateError {
+ fn from(error: LoadError) -> Self {
+ match error {
+ LoadError::Database(error) => error.into(),
+ LoadError::Name(error) => error.into(),
+ }
+ }
}
#[derive(Debug, thiserror::Error)]
@@ -147,4 +162,32 @@ pub enum Error {
Deleted(Id),
#[error(transparent)]
Database(#[from] sqlx::Error),
+ #[error(transparent)]
+ Name(#[from] name::Error),
+}
+
+impl From<LoadError> for Error {
+ fn from(error: LoadError) -> Self {
+ match error {
+ LoadError::Database(error) => error.into(),
+ LoadError::Name(error) => error.into(),
+ }
+ }
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum ExpireError {
+ #[error(transparent)]
+ Database(#[from] sqlx::Error),
+ #[error(transparent)]
+ Name(#[from] name::Error),
+}
+
+impl From<LoadError> for ExpireError {
+ fn from(error: LoadError) -> Self {
+ match error {
+ LoadError::Database(error) => error.into(),
+ LoadError::Name(error) => error.into(),
+ }
+ }
}
diff --git a/src/channel/mod.rs b/src/channel/mod.rs
index fb13e92..eb8200b 100644
--- a/src/channel/mod.rs
+++ b/src/channel/mod.rs
@@ -2,11 +2,8 @@ pub mod app;
pub mod event;
mod history;
mod id;
-mod name;
pub mod repo;
mod routes;
mod snapshot;
-pub use self::{
- event::Event, history::History, id::Id, name::Name, routes::router, snapshot::Channel,
-};
+pub use self::{event::Event, history::History, id::Id, routes::router, snapshot::Channel};
diff --git a/src/channel/name.rs b/src/channel/name.rs
deleted file mode 100644
index fc82dec..0000000
--- a/src/channel/name.rs
+++ /dev/null
@@ -1,30 +0,0 @@
-use std::fmt;
-
-use crate::nfc;
-
-#[derive(
- Clone, Debug, Default, Eq, PartialEq, serde::Deserialize, serde::Serialize, sqlx::Type,
-)]
-#[serde(transparent)]
-#[sqlx(transparent)]
-pub struct Name(nfc::String);
-
-impl fmt::Display for Name {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- let Self(name) = self;
- name.fmt(f)
- }
-}
-
-impl From<String> for Name {
- fn from(name: String) -> Self {
- Self(name.into())
- }
-}
-
-impl From<Name> for String {
- fn from(name: Name) -> Self {
- let Name(name) = name;
- name.into()
- }
-}
diff --git a/src/channel/repo.rs b/src/channel/repo.rs
index 3353bfd..4baa95b 100644
--- a/src/channel/repo.rs
+++ b/src/channel/repo.rs
@@ -1,9 +1,12 @@
+use futures::stream::{StreamExt as _, TryStreamExt as _};
use sqlx::{sqlite::Sqlite, SqliteConnection, Transaction};
use crate::{
- channel::{Channel, History, Id, Name},
+ channel::{Channel, History, Id},
clock::DateTime,
+ db::NotFound,
event::{Instant, ResumePoint, Sequence},
+ name::{self, Name},
};
pub trait Provider {
@@ -21,132 +24,160 @@ pub struct Channels<'t>(&'t mut SqliteConnection);
impl<'c> Channels<'c> {
pub async fn create(&mut self, name: &Name, created: &Instant) -> Result<History, sqlx::Error> {
let id = Id::generate();
- let channel = sqlx::query!(
+ let name = name.clone();
+ let display_name = name.display();
+ let canonical_name = name.canonical();
+ let created = *created;
+
+ sqlx::query!(
r#"
insert
- into channel (id, name, created_at, created_sequence)
- values ($1, $2, $3, $4)
- returning
- id as "id: Id",
- name as "name!: Name", -- known non-null as we just set it
- created_at as "created_at: DateTime",
- created_sequence as "created_sequence: Sequence"
+ into channel (id, created_at, created_sequence)
+ values ($1, $2, $3)
"#,
id,
- name,
created.at,
created.sequence,
)
- .map(|row| History {
+ .execute(&mut *self.0)
+ .await?;
+
+ sqlx::query!(
+ r#"
+ insert into channel_name (id, display_name, canonical_name)
+ values ($1, $2, $3)
+ "#,
+ id,
+ display_name,
+ canonical_name,
+ )
+ .execute(&mut *self.0)
+ .await?;
+
+ let channel = History {
channel: Channel {
- id: row.id,
- name: row.name,
+ id,
+ name: name.clone(),
deleted_at: None,
},
- created: Instant::new(row.created_at, row.created_sequence),
+ created,
deleted: None,
- })
- .fetch_one(&mut *self.0)
- .await?;
+ };
Ok(channel)
}
- pub async fn by_id(&mut self, channel: &Id) -> Result<History, sqlx::Error> {
+ pub async fn by_id(&mut self, channel: &Id) -> Result<History, LoadError> {
let channel = sqlx::query!(
r#"
select
id as "id: Id",
- channel.name as "name: Name",
+ name.display_name as "display_name?: String",
+ name.canonical_name as "canonical_name?: String",
channel.created_at as "created_at: DateTime",
channel.created_sequence as "created_sequence: Sequence",
deleted.deleted_at as "deleted_at?: DateTime",
deleted.deleted_sequence as "deleted_sequence?: Sequence"
from channel
+ left join channel_name as name
+ using (id)
left join channel_deleted as deleted
using (id)
where id = $1
"#,
channel,
)
- .map(|row| History {
- channel: Channel {
- id: row.id,
- name: row.name.unwrap_or_default(),
- deleted_at: row.deleted_at,
- },
- created: Instant::new(row.created_at, row.created_sequence),
- deleted: Instant::optional(row.deleted_at, row.deleted_sequence),
+ .map(|row| {
+ Ok::<_, name::Error>(History {
+ channel: Channel {
+ id: row.id,
+ name: Name::optional(row.display_name, row.canonical_name)?.unwrap_or_default(),
+ deleted_at: row.deleted_at,
+ },
+ created: Instant::new(row.created_at, row.created_sequence),
+ deleted: Instant::optional(row.deleted_at, row.deleted_sequence),
+ })
})
.fetch_one(&mut *self.0)
- .await?;
+ .await??;
Ok(channel)
}
- pub async fn all(&mut self, resume_at: ResumePoint) -> Result<Vec<History>, sqlx::Error> {
+ pub async fn all(&mut self, resume_at: ResumePoint) -> Result<Vec<History>, LoadError> {
let channels = sqlx::query!(
r#"
select
id as "id: Id",
- channel.name as "name: Name",
+ name.display_name as "display_name: String",
+ name.canonical_name as "canonical_name: String",
channel.created_at as "created_at: DateTime",
channel.created_sequence as "created_sequence: Sequence",
- deleted.deleted_at as "deleted_at: DateTime",
- deleted.deleted_sequence as "deleted_sequence: Sequence"
+ deleted.deleted_at as "deleted_at?: DateTime",
+ deleted.deleted_sequence as "deleted_sequence?: Sequence"
from channel
+ left join channel_name as name
+ using (id)
left join channel_deleted as deleted
using (id)
where coalesce(channel.created_sequence <= $1, true)
- order by channel.name
+ order by name.canonical_name
"#,
resume_at,
)
- .map(|row| History {
- channel: Channel {
- id: row.id,
- name: row.name.unwrap_or_default(),
- deleted_at: row.deleted_at,
- },
- created: Instant::new(row.created_at, row.created_sequence),
- deleted: Instant::optional(row.deleted_at, row.deleted_sequence),
+ .map(|row| {
+ Ok::<_, name::Error>(History {
+ channel: Channel {
+ id: row.id,
+ name: Name::optional(row.display_name, row.canonical_name)?.unwrap_or_default(),
+ deleted_at: row.deleted_at,
+ },
+ created: Instant::new(row.created_at, row.created_sequence),
+ deleted: Instant::optional(row.deleted_at, row.deleted_sequence),
+ })
})
- .fetch_all(&mut *self.0)
+ .fetch(&mut *self.0)
+ .map(|res| Ok::<_, LoadError>(res??))
+ .try_collect()
.await?;
Ok(channels)
}
- pub async fn replay(
- &mut self,
- resume_at: Option<Sequence>,
- ) -> Result<Vec<History>, sqlx::Error> {
+ pub async fn replay(&mut self, resume_at: Option<Sequence>) -> Result<Vec<History>, LoadError> {
let channels = sqlx::query!(
r#"
select
id as "id: Id",
- channel.name as "name: Name",
+ name.display_name as "display_name: String",
+ name.canonical_name as "canonical_name: String",
channel.created_at as "created_at: DateTime",
channel.created_sequence as "created_sequence: Sequence",
- deleted.deleted_at as "deleted_at: DateTime",
- deleted.deleted_sequence as "deleted_sequence: Sequence"
+ deleted.deleted_at as "deleted_at?: DateTime",
+ deleted.deleted_sequence as "deleted_sequence?: Sequence"
from channel
+ left join channel_name as name
+ using (id)
left join channel_deleted as deleted
using (id)
where coalesce(channel.created_sequence > $1, true)
"#,
resume_at,
)
- .map(|row| History {
- channel: Channel {
- id: row.id,
- name: row.name.unwrap_or_default(),
- deleted_at: row.deleted_at,
- },
- created: Instant::new(row.created_at, row.created_sequence),
- deleted: Instant::optional(row.deleted_at, row.deleted_sequence),
+ .map(|row| {
+ Ok::<_, name::Error>(History {
+ channel: Channel {
+ id: row.id,
+ name: Name::optional(row.display_name, row.canonical_name)?.unwrap_or_default(),
+ deleted_at: row.deleted_at,
+ },
+ created: Instant::new(row.created_at, row.created_sequence),
+ deleted: Instant::optional(row.deleted_at, row.deleted_sequence),
+ })
})
- .fetch_all(&mut *self.0)
+ .fetch(&mut *self.0)
+ .map(|res| Ok::<_, LoadError>(res??))
+ .try_collect()
.await?;
Ok(channels)
@@ -156,19 +187,18 @@ impl<'c> Channels<'c> {
&mut self,
channel: &History,
deleted: &Instant,
- ) -> Result<History, sqlx::Error> {
+ ) -> Result<History, LoadError> {
let id = channel.id();
- sqlx::query_scalar!(
+ sqlx::query!(
r#"
insert into channel_deleted (id, deleted_at, deleted_sequence)
values ($1, $2, $3)
- returning 1 as "deleted: bool"
"#,
id,
deleted.at,
deleted.sequence,
)
- .fetch_one(&mut *self.0)
+ .execute(&mut *self.0)
.await?;
// Small social responsibility hack here: when a channel is deleted, its name is
@@ -179,16 +209,14 @@ impl<'c> Channels<'c> {
// This also avoids the need for a separate name reservation table to ensure
// that live channels have unique names, since the `channel` table's name field
// is unique over non-null values.
- sqlx::query_scalar!(
+ sqlx::query!(
r#"
- update channel
- set name = null
+ delete from channel_name
where id = $1
- returning 1 as "updated: bool"
"#,
id,
)
- .fetch_one(&mut *self.0)
+ .execute(&mut *self.0)
.await?;
let channel = self.by_id(id).await?;
@@ -230,38 +258,66 @@ impl<'c> Channels<'c> {
Ok(())
}
- pub async fn expired(&mut self, expired_at: &DateTime) -> Result<Vec<History>, sqlx::Error> {
+ pub async fn expired(&mut self, expired_at: &DateTime) -> Result<Vec<History>, LoadError> {
let channels = sqlx::query!(
r#"
select
channel.id as "id: Id",
- channel.name as "name: Name",
+ name.display_name as "display_name: String",
+ name.canonical_name as "canonical_name: String",
channel.created_at as "created_at: DateTime",
channel.created_sequence as "created_sequence: Sequence",
deleted.deleted_at as "deleted_at?: DateTime",
deleted.deleted_sequence as "deleted_sequence?: Sequence"
from channel
+ left join channel_name as name
+ using (id)
left join channel_deleted as deleted
using (id)
left join message
+ on channel.id = message.channel
where channel.created_at < $1
and message.id is null
and deleted.id is null
"#,
expired_at,
)
- .map(|row| History {
- channel: Channel {
- id: row.id,
- name: row.name.unwrap_or_default(),
- deleted_at: row.deleted_at,
- },
- created: Instant::new(row.created_at, row.created_sequence),
- deleted: Instant::optional(row.deleted_at, row.deleted_sequence),
+ .map(|row| {
+ Ok::<_, name::Error>(History {
+ channel: Channel {
+ id: row.id,
+ name: Name::optional(row.display_name, row.canonical_name)?.unwrap_or_default(),
+ deleted_at: row.deleted_at,
+ },
+ created: Instant::new(row.created_at, row.created_sequence),
+ deleted: Instant::optional(row.deleted_at, row.deleted_sequence),
+ })
})
- .fetch_all(&mut *self.0)
+ .fetch(&mut *self.0)
+ .map(|res| Ok::<_, LoadError>(res??))
+ .try_collect()
.await?;
Ok(channels)
}
}
+
+#[derive(Debug, thiserror::Error)]
+#[error(transparent)]
+pub enum LoadError {
+ Database(#[from] sqlx::Error),
+ Name(#[from] name::Error),
+}
+
+impl<T> NotFound for Result<T, LoadError> {
+ type Ok = T;
+ type Error = LoadError;
+
+ fn optional(self) -> Result<Option<T>, LoadError> {
+ match self {
+ Ok(value) => Ok(Some(value)),
+ Err(LoadError::Database(sqlx::Error::RowNotFound)) => Ok(None),
+ Err(other) => Err(other),
+ }
+ }
+}
diff --git a/src/channel/routes/post.rs b/src/channel/routes/post.rs
index d354f79..9781dd7 100644
--- a/src/channel/routes/post.rs
+++ b/src/channel/routes/post.rs
@@ -6,10 +6,11 @@ use axum::{
use crate::{
app::App,
- channel::{app, Channel, Name},
+ channel::{app, Channel},
clock::RequestedAt,
error::Internal,
login::Login,
+ name::Name,
};
pub async fn handler(
diff --git a/src/channel/snapshot.rs b/src/channel/snapshot.rs
index dc2894d..129c0d6 100644
--- a/src/channel/snapshot.rs
+++ b/src/channel/snapshot.rs
@@ -1,8 +1,8 @@
use super::{
event::{Created, Event},
- Id, Name,
+ Id,
};
-use crate::clock::DateTime;
+use crate::{clock::DateTime, name::Name};
#[derive(Clone, Debug, Eq, PartialEq, serde::Serialize)]
pub struct Channel {