diff options
Diffstat (limited to 'src/normalize')
| -rw-r--r-- | src/normalize/mod.rs | 36 | ||||
| -rw-r--r-- | src/normalize/string.rs | 112 |
2 files changed, 148 insertions, 0 deletions
diff --git a/src/normalize/mod.rs b/src/normalize/mod.rs new file mode 100644 index 0000000..6294201 --- /dev/null +++ b/src/normalize/mod.rs @@ -0,0 +1,36 @@ +mod string; + +pub mod nfc { + use std::string::String as StdString; + + use unicode_normalization::UnicodeNormalization as _; + + pub type String = super::string::String<Nfc>; + + #[derive(Clone, Debug, Default, Eq, PartialEq)] + pub struct Nfc; + + impl super::string::Normalize for Nfc { + fn normalize(&self, value: &str) -> StdString { + value.nfc().collect() + } + } +} + +pub mod ident { + use std::string::String as StdString; + + use unicode_casefold::UnicodeCaseFold as _; + use unicode_normalization::UnicodeNormalization as _; + + pub type String = super::string::String<Ident>; + + #[derive(Clone, Debug, Default, Eq, PartialEq)] + pub struct Ident; + + impl super::string::Normalize for Ident { + fn normalize(&self, value: &str) -> StdString { + value.case_fold().nfkc().collect() + } + } +} diff --git a/src/normalize/string.rs b/src/normalize/string.rs new file mode 100644 index 0000000..a0d178c --- /dev/null +++ b/src/normalize/string.rs @@ -0,0 +1,112 @@ +use std::{fmt, string::String as StdString}; + +use sqlx::{ + encode::{Encode, IsNull}, + Database, Decode, Type, +}; + +pub trait Normalize: Clone + Default { + fn normalize(&self, value: &str) -> StdString; +} + +#[derive(Clone, Debug, Default, Eq, PartialEq, serde::Serialize, serde::Deserialize)] +#[serde(into = "StdString", from = "StdString")] +#[serde(bound = "N: Normalize")] +pub struct String<N>(StdString, N); + +impl<N> fmt::Display for String<N> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Self(value, _) = self; + value.fmt(f) + } +} + +impl<S, N> From<S> for String<N> +where + S: AsRef<str>, + N: Normalize, +{ + fn from(value: S) -> Self { + let normalizer = N::default(); + let value = normalizer.normalize(value.as_ref()); + + Self(value, normalizer) + } +} + +impl<N> From<String<N>> for StdString { + fn from(value: String<N>) -> Self { + let String(value, _) = value; + value + } +} + +impl<N> std::ops::Deref for String<N> { + type Target = StdString; + + fn deref(&self) -> &Self::Target { + let Self(value, _) = self; + value + } +} + +// Type is manually implemented so that we can implement Decode to do +// normalization on read. Implementation is otherwise based on +// `#[derive(sqlx::Type)]` with the `#[sqlx(transparent)]` attribute. +impl<DB, N> Type<DB> for String<N> +where + DB: Database, + StdString: Type<DB>, +{ + fn type_info() -> <DB as Database>::TypeInfo { + <StdString as Type<DB>>::type_info() + } + + fn compatible(ty: &<DB as Database>::TypeInfo) -> bool { + <StdString as Type<DB>>::compatible(ty) + } +} + +impl<'r, DB, N> Decode<'r, DB> for String<N> +where + DB: Database, + StdString: Decode<'r, DB>, + N: Normalize, +{ + fn decode(value: <DB as Database>::ValueRef<'r>) -> Result<Self, sqlx::error::BoxDynError> { + let value = StdString::decode(value)?; + Ok(Self::from(value)) + } +} + +impl<'q, DB, N> Encode<'q, DB> for String<N> +where + DB: Database, + StdString: Encode<'q, DB>, +{ + fn encode_by_ref( + &self, + buf: &mut <DB as Database>::ArgumentBuffer<'q>, + ) -> Result<IsNull, sqlx::error::BoxDynError> { + let Self(value, _) = self; + value.encode_by_ref(buf) + } + + fn encode( + self, + buf: &mut <DB as Database>::ArgumentBuffer<'q>, + ) -> Result<IsNull, sqlx::error::BoxDynError> { + let Self(value, _) = self; + value.encode(buf) + } + + fn produces(&self) -> Option<<DB as Database>::TypeInfo> { + let Self(value, _) = self; + value.produces() + } + + fn size_hint(&self) -> usize { + let Self(value, _) = self; + value.size_hint() + } +} |
