summaryrefslogtreecommitdiff
path: root/src/nfc.rs
diff options
context:
space:
mode:
authorOwen Jacobson <owen@grimoire.ca>2024-10-19 01:51:30 -0400
committerOwen Jacobson <owen@grimoire.ca>2024-10-21 00:49:05 -0400
commit379e97c2cb145bc3a495aa14746273d83b508214 (patch)
tree218bbe2572af9dd4b165ff05495d084dc0bd8905 /src/nfc.rs
parent98af8ff80da919a1126ba7c6afa65e6654b5ecde (diff)
Unicode normalization on input.
This normalizes the following values: * login names * passwords * channel names * message bodies, because why not The goal here is to have a canonical representation of these values, so that, for example, the service does not inadvertently host two channels whose names are semantically identical but differ in the specifics of how diacritics are encoded, or two users whose names are identical. Normalization is done on input from the wire, using Serde hooks, and when reading from the database. The `crate::nfc::String` type implements these normalizations (as well as normalizing whenever converted from a `std::string::String` generally). This change does not cover: * Trying to cope with passwords that were created as non-normalized strings, which are now non-verifiable as all the paths to verify passwords normalize the input. * Trying to ensure that non-normalized data in the database compares reasonably to normalized data. Fortunately, we don't _do_ very many string comparisons (I think only login names), so this isn't a huge deal at this stage. Login names will probably have to Get Fixed later on, when we figure out how to handle case folding for login name verification.
Diffstat (limited to 'src/nfc.rs')
-rw-r--r--src/nfc.rs103
1 files changed, 103 insertions, 0 deletions
diff --git a/src/nfc.rs b/src/nfc.rs
new file mode 100644
index 0000000..70e936c
--- /dev/null
+++ b/src/nfc.rs
@@ -0,0 +1,103 @@
+use std::{fmt, string::String as StdString};
+
+use sqlx::{
+ encode::{Encode, IsNull},
+ Database, Decode, Type,
+};
+use unicode_normalization::UnicodeNormalization as _;
+
+#[derive(Clone, Debug, Default, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
+#[serde(from = "StdString", into = "StdString")]
+pub struct String(StdString);
+
+impl fmt::Display for String {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let Self(value) = self;
+ value.fmt(f)
+ }
+}
+
+impl From<StdString> for String {
+ fn from(value: StdString) -> Self {
+ let value = value.nfc().collect();
+
+ Self(value)
+ }
+}
+
+impl From<String> for StdString {
+ fn from(value: String) -> Self {
+ let String(value) = value;
+ value
+ }
+}
+
+impl std::ops::Deref for String {
+ type Target = StdString;
+
+ fn deref(&self) -> &Self::Target {
+ let Self(value) = self;
+ value
+ }
+}
+
+// Type is manually implemented so that we can implement Decode to do
+// normalization on read. Implementation is otherwise based on
+// `#[derive(sqlx::Type)]` with the `#[sqlx(transparent)]` attribute.
+impl<DB> Type<DB> for String
+where
+ DB: Database,
+ StdString: Type<DB>,
+{
+ fn type_info() -> <DB as Database>::TypeInfo {
+ <StdString as Type<DB>>::type_info()
+ }
+
+ fn compatible(ty: &<DB as Database>::TypeInfo) -> bool {
+ <StdString as Type<DB>>::compatible(ty)
+ }
+}
+
+impl<'r, DB> Decode<'r, DB> for String
+where
+ DB: Database,
+ StdString: Decode<'r, DB>,
+{
+ fn decode(value: <DB as Database>::ValueRef<'r>) -> Result<Self, sqlx::error::BoxDynError> {
+ let value = StdString::decode(value)?;
+ let value = value.nfc().collect();
+ Ok(Self(value))
+ }
+}
+
+impl<'q, DB> Encode<'q, DB> for String
+where
+ DB: Database,
+ StdString: Encode<'q, DB>,
+{
+ fn encode_by_ref(
+ &self,
+ buf: &mut <DB as Database>::ArgumentBuffer<'q>,
+ ) -> Result<IsNull, sqlx::error::BoxDynError> {
+ let Self(value) = self;
+ value.encode_by_ref(buf)
+ }
+
+ fn encode(
+ self,
+ buf: &mut <DB as Database>::ArgumentBuffer<'q>,
+ ) -> Result<IsNull, sqlx::error::BoxDynError> {
+ let Self(value) = self;
+ value.encode(buf)
+ }
+
+ fn produces(&self) -> Option<<DB as Database>::TypeInfo> {
+ let Self(value) = self;
+ value.produces()
+ }
+
+ fn size_hint(&self) -> usize {
+ let Self(value) = self;
+ value.size_hint()
+ }
+}