From 3bd63e20777126216777b392615dc8144f21bb9a Mon Sep 17 00:00:00 2001 From: Owen Jacobson Date: Sat, 30 Aug 2025 00:50:24 -0400 Subject: Generate, store, and deliver a VAPID key. VAPID is used to authenticate applications to push brokers, as part of the [Web Push] specification. It's notionally optional, but we believe [Apple requires it][apple], and in any case making it impossible to use subscription URLs without the corresponding private key available, and thus harder to impersonate the server, seems like a good security practice regardless. [Web Push]: https://developer.mozilla.org/en-US/docs/Web/API/Push_API [apple]: https://developer.apple.com/documentation/usernotifications/sending-web-push-notifications-in-web-apps-and-browsers There are several implementations of VAPID for Rust: * [web_push](https://docs.rs/web-push/latest/web_push/) includes an implementation of VAPID but requires callers to provision their own keys. We will likely use this crate for Web Push fulfilment, but we cannot use it for key generation. * [vapid](https://docs.rs/vapid/latest/vapid/) includes an implementation of VAPID key generation. It delegates to `openssl` to handle cryptographic operations. * [p256](https://docs.rs/p256/latest/p256/) implements NIST P-256 in Rust. It's maintained by the RustCrypto team, though as of this writing it is largely written by a single contributor. It isn't specifically designed for use with VAPID. I opted to use p256 for this, as I believe the RustCrypto team are the most likely to produce a correct and secure implementation, and because openssl has consistently left a bad taste in my mouth for years. Because it's a general implementation of the algorithm, I expect that it will require more work for us to adapt it for use with VAPID specifically; I'm willing to chance it and we can swap it out for the vapid crate if it sucks. This has left me with one area of uncertainty: I'm not actually sure I'm using the right parts of p256. The choice of `ecdsa::SigningKey` over `p256::SecretKey` is based on [the MDN docs] using phrases like "This value is part of a signing key pair generated by your application server, and usable with elliptic curve digital signature (ECDSA), over the P-256 curve." and on [RFC 8292]'s "The 'k' parameter includes an ECDSA public key in uncompressed form that is encoded using base64url encoding. However, we won't be able to test my implementation until we implement some other key parts of Web Push, which are out of scope of this commit. [the MDN docs]: https://developer.mozilla.org/en-US/docs/Web/API/PushSubscription/options [RFC 8292]: https://datatracker.ietf.org/doc/html/rfc8292#section-3.2 Following the design used for storing logins and users, VAPID keys are split into a non-synchronized part (consisting of the private key), whose exposure would allow others to impersonate the Pilcrow server, and a synchronized part (consisting of event coordinates and, notionally, the public key), which is non-sensitive and can be safely shared with any user. However, the public key is derived from the stored private key, rather than being stored directly, to minimize redundancy in the stored data. Following the design used for expiring stale entities, the app checks for and creates, or rotates, its VAPID key using middleware that runs before most API requests. If, at that point, the key is either absent, or more than 30 days old, it is replaced. This imposes a small tax on API request latency, which is used to fund prompt and automatic key rotation without the need for an operator-facing key management interface. VAPID keys are delivered to clients via the event stream, as laid out in `docs/api/events.md`. There are a few reasons for this, but the big one is that changing the VAPID key would immediately invalidate push subscriptions: we throw away the private key, so we wouldn't be able to publish to them any longer. Clients must replace their push subscriptions in order to resume delivery, and doing so promptly when notified that the key has changed will minimize the gap. This design is intended to allow for manual key rotation. The key can be rotated "immedately" by emptying the `vapid_key` and `vapid_signing_key` tables (which destroys the rotated kye); the server will generate a new one before it is needed, and will notify clients that the key has been invalidated. This change includes client support for tracking the current VAPID key. The client doesn't _use_ this information anywhere, yet, but it has it. --- src/vapid/app.rs | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 src/vapid/app.rs (limited to 'src/vapid/app.rs') diff --git a/src/vapid/app.rs b/src/vapid/app.rs new file mode 100644 index 0000000..8886c9f --- /dev/null +++ b/src/vapid/app.rs @@ -0,0 +1,89 @@ +use chrono::TimeDelta; +use sqlx::SqlitePool; + +use super::{History, repo, repo::Provider as _}; +use crate::{ + clock::DateTime, + db::NotFound as _, + event::{Broadcaster, Sequence, repo::Provider}, +}; + +pub struct Vapid<'a> { + db: &'a SqlitePool, + events: &'a Broadcaster, +} + +impl<'a> Vapid<'a> { + pub const fn new(db: &'a SqlitePool, events: &'a Broadcaster) -> Self { + Self { db, events } + } + + pub async fn refresh_key(&self, ensure_at: &DateTime) -> Result<(), Error> { + let mut tx = self.db.begin().await?; + let key = tx.vapid().current().await.optional()?; + if key.is_none() { + let changed_at = tx.sequence().next(ensure_at).await?; + let (key, secret) = History::begin(&changed_at); + + tx.vapid().clear().await?; + tx.vapid().store_signing_key(&secret).await?; + + let events = key.events().filter(Sequence::start_from(changed_at)); + tx.vapid().record_events(events.clone()).await?; + + tx.commit().await?; + + self.events.broadcast_from(events); + } else if let Some(key) = key + // Somewhat arbitrarily, rotate keys every 30 days. + && key.older_than(ensure_at.to_owned() - TimeDelta::days(30)) + { + // If you can think of a way to factor out this duplication, be my guest. I tried. + // The only approach I could think of mirrors `crate::user::create::Create`, encoding + // the process in a state machine made of types, and that's a very complex solution + // to a problem that doesn't seem to merit it. -o + let changed_at = tx.sequence().next(ensure_at).await?; + let (key, secret) = key.rotate(&changed_at); + + tx.vapid().clear().await?; + tx.vapid().store_signing_key(&secret).await?; + + // Refactoring constraint: this `events` iterator borrows `key`. Anything that moves + // `key` has to give it back, but it can't give both `key` back and an event iterator + // borrowing from `key` because Rust doesn't support types that borrow from other + // parts of themselves. + let events = key.events().filter(Sequence::start_from(changed_at)); + tx.vapid().record_events(events.clone()).await?; + + // Refactoring constraint: we _really_ want to commit the transaction before we send + // out events, so that anything acting on those events is guaranteed to see the state + // of the service at some point at or after the side effects of this. I'd also prefer + // to keep the commit in the same method that the transaction is begun in, for clarity. + tx.commit().await?; + + self.events.broadcast_from(events); + } + // else, the key exists and is not stale. Don't bother allocating a sequence number, and + // in fact throw away the whole transaction. + + Ok(()) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error(transparent)] + Database(#[from] sqlx::Error), + #[error(transparent)] + Ecdsa(#[from] p256::ecdsa::Error), +} + +impl From for Error { + fn from(error: repo::Error) -> Self { + use repo::Error; + match error { + Error::Database(error) => error.into(), + Error::Ecdsa(error) => error.into(), + } + } +} -- cgit v1.2.3 From 96b62f1018641b3dc28cc189d314a1bff475b751 Mon Sep 17 00:00:00 2001 From: Owen Jacobson Date: Sat, 30 Aug 2025 02:59:51 -0400 Subject: Allow administrators to rotate VAPID keys immediately if needed. In spite of my design preference away from CLI tools, this is a CLI tool: `pilcrow --database-url rotate-vapid-key`. This is something we can implement here and now, which does not require us to confront the long-avoided issue of how to handle the idea that some users are allowed to make server-operational changes and some aren't, by delegating the problem back to the OS. The implementation is a little half-baked to make it easier to rip out later. I would ordinarily prefer to push both `serve` (the default verb, not actually named in this change) and `rotate-vapid-key` into their own, separate CLI submodules, with their own argument structs, but that change is much more intrusive and would make this effectively permanent. This can be yanked out in a few minutes by deleting a few lines of `cli.rs` and inlining the `serve` function. Nonetheless, nothing is as permanent as a temporary solution, so I've written at least some bare-minimum operations documentation on how to use this and what it does. --- docs/ops.md | 14 ++++++++++++++ src/cli.rs | 25 +++++++++++++++++++++++-- src/vapid/app.rs | 17 ++++++++++++++++- 3 files changed, 53 insertions(+), 3 deletions(-) (limited to 'src/vapid/app.rs') diff --git a/docs/ops.md b/docs/ops.md index a622c04..678d2a4 100644 --- a/docs/ops.md +++ b/docs/ops.md @@ -21,3 +21,17 @@ By default, the `pilcrow` command will set the process' umask to a value that pr - any octal value corresponding to a valid umask, such as `0027`. Pilcrow does not check or change the permissions of the database after creation. Changing the umask of the server after the database has been created has no effect on the database's filesystem permissions. + +## VAPID keys + +Pilcrow uses [VAPID] to identify itself to public Web Push brokers, which then deliver notifications to Pilcrow's users of interesting events, such as messages. VAPID uses cryptographic signatures to authenticate the server. + +[VAPID]: https://datatracker.ietf.org/doc/html/rfc8292 + +The key is stored in the `pilcrow` database. Pilcrow will create its key automatically, and will rotate the key every 30 days. + +If the `pilcrow` database is accessed inappropriately or leaked, then the key can be used to send push notifications to Pilcrow users as if from the Pilcrow server. If this happens, the key _must_ be rotated to prevent misuse. + +The key can be rotated at any time running `pilcrow […options…] rotate-vapid-key`, as the same user Pilcrow normally runs as. This does not require that the server be shut down or restarted. The `[…options…]` must be set to the same values as used by the running server. + +When the key is rotated, no further push messages will be sent from the Pilcrow server using that key. Unfortunately, the Web Push protocol doesn't allow Pilcrow to proactively invalidate clients' push subscriptions, but Pilcrow will inform clients when the key is rotated so that they can invalidate the affected subscriptions themselves. diff --git a/src/cli.rs b/src/cli.rs index 378686b..263a4fd 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -10,7 +10,8 @@ use axum::{ middleware, response::{IntoResponse, Response}, }; -use clap::{CommandFactory, Parser}; +use chrono::Utc; +use clap::{CommandFactory, Parser, Subcommand}; use sqlx::sqlite::SqlitePool; use tokio::net; @@ -65,6 +66,15 @@ pub struct Args { /// upgrades #[arg(short = 'D', long, env, default_value = "sqlite://pilcrow.db.backup")] backup_database_url: String, + + #[command(subcommand)] + command: Option, +} + +#[derive(Subcommand)] +enum Command { + /// Immediately rotate the server's VAPID (Web Push) application key. + RotateVapidKey, } impl Args { @@ -89,6 +99,16 @@ impl Args { let pool = self.pool().await?; let app = App::from(pool); + + match self.command { + None => self.serve(app).await?, + Some(Command::RotateVapidKey) => app.vapid().rotate_key(&Utc::now()).await?, + } + + Result::<_, Error>::Ok(()) + } + + async fn serve(self, app: App) -> Result<(), Error> { let app = routes::routes(&app) .route_layer(middleware::from_fn(clock::middleware)) .route_layer(middleware::map_response(Self::server_info())) @@ -101,7 +121,7 @@ impl Args { println!("{started_msg}"); serve.await?; - Result::<_, Error>::Ok(()) + Ok(()) } async fn listener(&self) -> io::Result { @@ -140,5 +160,6 @@ fn started_msg(listener: &net::TcpListener) -> io::Result { enum Error { Io(#[from] io::Error), Database(#[from] db::Error), + Sqlx(#[from] sqlx::Error), Umask(#[from] umask::Error), } diff --git a/src/vapid/app.rs b/src/vapid/app.rs index 8886c9f..ddb1f4d 100644 --- a/src/vapid/app.rs +++ b/src/vapid/app.rs @@ -1,4 +1,4 @@ -use chrono::TimeDelta; +use chrono::{TimeDelta, Utc}; use sqlx::SqlitePool; use super::{History, repo, repo::Provider as _}; @@ -18,6 +18,21 @@ impl<'a> Vapid<'a> { Self { db, events } } + pub async fn rotate_key(&self, rotate_at: &DateTime) -> Result<(), sqlx::Error> { + let mut tx = self.db.begin().await?; + // This is called from a separate CLI utility (see `cli.rs`), and we _can't_ deliver events + // to active clients from another process, so don't do anything that would require us to + // send events, like generating a new key. + // + // Instead, the server's next `refresh_key` call will generate a key and notify clients + // of the change. All we have to do is remove the existing key, so that the server can know + // to do so. + tx.vapid().clear().await?; + tx.commit().await?; + + Ok(()) + } + pub async fn refresh_key(&self, ensure_at: &DateTime) -> Result<(), Error> { let mut tx = self.db.begin().await?; let key = tx.vapid().current().await.optional()?; -- cgit v1.2.3