From 70d0197e85ae4ef85202781f6d2dc9e76bd508b3 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Wed, 24 Dec 2025 08:02:12 +0000 Subject: feat(purgatory): add broken purgatory implementation --- src/git/authorization.rs | 113 ++++++++++++++--- src/git/handlers.rs | 321 ++++++++++++++++++++++++++++++++--------------- src/git/mod.rs | 68 ++++++++++ 3 files changed, 380 insertions(+), 122 deletions(-) (limited to 'src/git') diff --git a/src/git/authorization.rs b/src/git/authorization.rs index 4896fc0..fbeaf9e 100644 --- a/src/git/authorization.rs +++ b/src/git/authorization.rs @@ -31,7 +31,7 @@ use anyhow::{anyhow, Result}; use nostr_relay_builder::prelude::*; use nostr_sdk::{EventId, ToBech32}; use std::collections::{HashMap, HashSet}; -use tracing::debug; +use tracing::{debug, info, warn}; use crate::nostr::builder::SharedDatabase; use crate::nostr::events::{ @@ -325,26 +325,31 @@ pub async fn get_authorization_from_db( /// Get the authorization result for a repository scoped to a specific owner /// -/// Unlike `get_authorization_from_db`, this function scopes the authorization -/// to a specific owner's announcement. This is the correct approach for Git push -/// authorization where the URL path specifies the owner. +/// Push authorization checks ONLY purgatory for state events. The database represents +/// the current git state, while purgatory holds the intended future state that pushes +/// should be authorized against. /// /// A push to `alice/my-repo` should only consider authorization from alice's /// announcement, not bob's announcement for the same identifier. /// /// It: -/// 1. Fetches all announcements and states for the identifier -/// 2. Collects authorized maintainers from all announcements (grouped by owner) -/// 3. Looks up the authorized set for the specific owner -/// 4. Finds the latest state event from an authorized maintainer +/// 1. Fetches announcements for the identifier +/// 2. Collects authorized maintainers from owner's announcement +/// 3. Checks purgatory for matching state events from authorized maintainers /// /// Returns an `AuthorizationResult` that indicates whether a push is authorized. -pub async fn get_authorization_for_owner( +pub async fn get_state_authorization_for_specific_owner_repo( database: &SharedDatabase, identifier: &str, owner_pubkey: &str, + purgatory: &std::sync::Arc, + pushed_refs: &[(String, String, String)], + repo_path: &std::path::Path, ) -> Result { - // Fetch all repository data with a single query + use crate::git::list_refs; + use crate::purgatory::RefUpdate; + + // Fetch announcements only - we don't need database states let repo_data = fetch_repository_data(database, identifier).await?; if repo_data.announcements.is_empty() { @@ -380,16 +385,82 @@ pub async fn get_authorization_for_owner( owner_pubkey ); - // Find the latest authorized state from owner's maintainer set - match find_latest_authorized_state(&repo_data.states, &authorized) { - Some(state) => Ok(AuthorizationResult::authorized( - state.clone(), - authorized.into_iter().collect(), - )), - None => Ok(AuthorizationResult::denied( - "No state event found from authorized publishers", - )), + // Check purgatory for matching state events + // Convert pushed refs to RefUpdate (filter out refs/nostr/* refs) + let pushed_updates: Vec = pushed_refs + .iter() + .filter(|(_, _, name)| !name.starts_with("refs/nostr/")) + .map(|(old_oid, new_oid, ref_name)| RefUpdate { + old_oid: old_oid.clone(), + new_oid: new_oid.clone(), + ref_name: ref_name.clone(), + }) + .collect(); + + // Get local refs from repository + let local_refs_list = list_refs(repo_path).unwrap_or_default(); + let local_refs: HashMap = local_refs_list.into_iter().collect(); + + // Find matching state events in purgatory + let matching_events = purgatory.find_matching_states(identifier, &pushed_updates, &local_refs); + + if !matching_events.is_empty() { + debug!( + "Found {} matching state event(s) in purgatory", + matching_events.len() + ); + + // Filter to authorized events and collect them + let authorized_events: Vec = matching_events + .into_iter() + .filter(|event| { + let author_hex = event.pubkey.to_hex(); + authorized.contains(&author_hex) + }) + .collect(); + + if !authorized_events.is_empty() { + // Find the latest event + let latest_authorized = authorized_events + .iter() + .max_by_key(|event| event.created_at) + .unwrap(); // Safe because we checked the vec is not empty + + // Parse the event into RepositoryState + if let Ok(state) = RepositoryState::from_event(latest_authorized.clone()) { + info!( + "Authorized by state event {} from purgatory (author: {})", + latest_authorized.id, + latest_authorized + .pubkey + .to_bech32() + .unwrap_or_else(|_| latest_authorized.pubkey.to_hex()) + ); + + return Ok(AuthorizationResult { + authorized: true, + reason: "Authorized by state event in purgatory".to_string(), + state: Some(state), + maintainers: authorized.into_iter().collect(), + purgatory_events: vec![latest_authorized.clone()], + }); + } else { + warn!( + "Failed to parse purgatory event {} as RepositoryState", + latest_authorized.id + ); + } + } else { + debug!("Purgatory events found but none from authorized authors"); + } + } else { + debug!("No matching state events found in purgatory"); } + + // No matching state found in purgatory + Ok(AuthorizationResult::denied( + "No state event found in purgatory from authorized publishers", + )) } /// Result of authorization check @@ -403,6 +474,8 @@ pub struct AuthorizationResult { pub state: Option, /// The set of valid maintainers (authorized publishers) pub maintainers: Vec, + /// Events from purgatory that authorized this push (state, PR, PR-update events) + pub purgatory_events: Vec, } impl AuthorizationResult { @@ -413,6 +486,7 @@ impl AuthorizationResult { reason: "Push matches latest authorized state".to_string(), state: Some(state), maintainers, + purgatory_events: vec![], } } @@ -423,6 +497,7 @@ impl AuthorizationResult { reason: reason.into(), state: None, maintainers: vec![], + purgatory_events: vec![], } } } diff --git a/src/git/handlers.rs b/src/git/handlers.rs index 8e5f5e1..df6f0e9 100644 --- a/src/git/handlers.rs +++ b/src/git/handlers.rs @@ -4,20 +4,23 @@ use http_body_util::Full; use hyper::{body::Bytes, Response, StatusCode}; +use nostr_sdk::prelude::*; use std::path::PathBuf; +use std::sync::Arc; use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tracing::{debug, error, info, warn}; use super::authorization::{ - get_authorization_for_owner, parse_pushed_refs, validate_nostr_ref_pushes, validate_push_refs, - AuthorizationResult, + get_state_authorization_for_specific_owner_repo, parse_pushed_refs, validate_nostr_ref_pushes, + validate_push_refs, AuthorizationResult, }; use super::protocol::{GitService, PktLine}; use super::subprocess::GitSubprocess; use super::try_set_head_if_available; use crate::nostr::builder::SharedDatabase; -use crate::nostr::events::RepositoryState; +use crate::nostr::events::{RepositoryState, KIND_PR, KIND_PR_UPDATE, KIND_REPOSITORY_STATE}; +use crate::purgatory::Purgatory; /// Handle GET /info/refs?service=git-{upload,receive}-pack /// @@ -168,18 +171,24 @@ pub async fn handle_upload_pack( /// Also per GRASP-01: "MUST set repository HEAD per repository state announcement /// as soon as the git data related to that branch has been received." /// +/// Also purgatory GRASP-01: "Accepted repo state announcements, PRs and PR Updates +/// SHOULD be accepted with message "purgatory: won't be served until git data arrives" +/// and kepted in purgatory (not served) until the related git data arrives and +/// otherwise discarded after 30 minutes." +/// /// # Arguments /// * `repo_path` - Path to the bare git repository /// * `request_body` - The git pack data from the client -/// * `database` - Optional database reference for authorization queries +/// * `database` - Database reference for authorization queries /// * `identifier` - The repository identifier (d tag) for authorization lookup /// * `owner_pubkey` - The owner's public key (hex) from the URL path, scoping authorization pub async fn handle_receive_pack( repo_path: PathBuf, request_body: Bytes, - database: Option, + database: SharedDatabase, identifier: &str, owner_pubkey: &str, + purgatory: Arc, ) -> Result>, GitError> { debug!("Handling receive-pack for {:?}", repo_path); @@ -187,37 +196,46 @@ pub async fn handle_receive_pack( return Err(GitError::RepositoryNotFound); } - // Keep track of state for HEAD setting after push + // Keep track of state and events for processing after push let mut authorized_state: Option = None; + let mut authorized_events: Vec = Vec::new(); - // GRASP Authorization Check (if database is provided) - if let Some(ref db) = database { - info!( - "Authorizing push for {} owned by {} via database query", - identifier, owner_pubkey - ); + // GRASP Authorization Check + info!( + "Authorizing push for {} owned by {} via database query", + identifier, owner_pubkey + ); - match authorize_push(db, identifier, owner_pubkey, &request_body).await { - Ok(auth_result) => { - if !auth_result.authorized { - warn!("Push rejected for {}: {}", identifier, auth_result.reason); - return Err(GitError::Unauthorized); - } - info!( - "Push authorized for {} - {} maintainers", - identifier, - auth_result.maintainers.len() - ); - // Save the state for HEAD setting after push - authorized_state = auth_result.state; - } - Err(e) => { - warn!("Authorization check failed for {}: {}", identifier, e); + match authorize_push( + &database, + identifier, + owner_pubkey, + &request_body, + &purgatory, + &repo_path, + ) + .await + { + Ok(auth_result) => { + if !auth_result.authorized { + warn!("Push rejected for {}: {}", identifier, auth_result.reason); return Err(GitError::Unauthorized); } + info!( + "Push authorized for {} - {} maintainers, {} purgatory events", + identifier, + auth_result.maintainers.len(), + auth_result.purgatory_events.len() + ); + // Save the state for HEAD setting after push + authorized_state = auth_result.state.clone(); + // Save the purgatory events for database saving after push + authorized_events = auth_result.purgatory_events; + } + Err(e) => { + warn!("Authorization check failed for {}: {}", identifier, e); + return Err(GitError::Unauthorized); } - } else { - debug!("No database provided - accepting push without authorization"); } // Spawn git receive-pack @@ -265,7 +283,7 @@ pub async fn handle_receive_pack( // GRASP-01: Set HEAD after git data is received // "MUST set repository HEAD per repository state announcement // as soon as the git data related to that branch has been received." - if let Some(state) = authorized_state { + if let Some(ref state) = authorized_state { if let Some(head_ref) = &state.head { if let Some(branch_name) = state.get_head_branch() { if let Some(commit) = state.get_branch_commit(branch_name) { @@ -288,6 +306,43 @@ pub async fn handle_receive_pack( } } + // Save all events from purgatory that authorized this push and remove them from purgatory + // This includes state events, PR events, and PR-update events + if !authorized_events.is_empty() { + info!( + "Saving {} purgatory event(s) to database after successful push", + authorized_events.len() + ); + + for event in &authorized_events { + match database.save_event(event).await { + Ok(_) => { + info!("Saved purgatory event {} to database", event.id); + // TODO let broadcast_success = local_relay.notify_event(event.clone()); + warn!("TODO Here we need to broadcast on open websockets for live listeners. eventid; {}", event.id); + // Remove from purgatory based on event kind + if event.kind == Kind::from(KIND_REPOSITORY_STATE) { + purgatory.remove_state_event(identifier, &event.id); + info!("Removed state event {} from purgatory", event.id); + } else if event.kind == Kind::from(KIND_PR) + || event.kind == Kind::from(KIND_PR_UPDATE) + { + // Extract event ID from the event itself (it's the event.id) + let event_id_hex = event.id.to_hex(); + purgatory.remove_pr(&event_id_hex); + info!("Removed PR event {} from purgatory", event.id); + } + } + Err(e) => { + warn!( + "Failed to save purgatory event {} to database: {}", + event.id, e + ); + } + } + } + } + Ok(Response::builder() .status(StatusCode::OK) .header( @@ -302,115 +357,175 @@ pub async fn handle_receive_pack( /// Perform GRASP authorization for a push operation /// /// This function queries the database directly (not via WebSocket): -/// 1. Fetches announcement and state events for the identifier -/// 2. Filters to the specific owner's announcement -/// 3. Collects authorized publishers from that announcement (owner + maintainers) -/// 4. Gets the latest authorized state from those publishers -/// 5. Validates that pushed refs match the state -/// 6. Validates refs/nostr/ has valid event id and if event exists, `c` tag matches ref +/// 1. Parses the pushed refs from the git pack protocol +/// 2. Separates refs/nostr/ refs from normal refs +/// 3. For normal refs: validates against state events in purgatory +/// 4. For refs/nostr/ refs: validates event ID format and collects PR/PR-update events from purgatory +/// 5. Returns all authorizing events (state + PR/PR-update) in the result async fn authorize_push( database: &SharedDatabase, identifier: &str, owner_pubkey: &str, request_body: &Bytes, + purgatory: &Arc, + repo_path: &std::path::Path, ) -> anyhow::Result { debug!( "Authorizing push for {} owned by {} via database query", identifier, owner_pubkey ); - // Parse refs from the push request FIRST to check if this is a refs/nostr/ push + // Parse refs from the push request let pushed_refs = parse_pushed_refs(request_body); debug!("Parsed {} refs from push request", pushed_refs.len()); for (old_oid, new_oid, ref_name) in &pushed_refs { debug!(" {} {} -> {}", ref_name, old_oid, new_oid); } - // Separate refs/nostr/ refs from other refs - // Per GRASP-01: "MUST accept pushes via this service to `refs/nostr/`" - let (nostr_refs, other_refs): (Vec<_>, Vec<_>) = pushed_refs + // Separate refs/nostr/ refs from state refs + let (nostr_refs, state_refs): (Vec<_>, Vec<_>) = pushed_refs .iter() .partition(|(_, _, ref_name)| ref_name.starts_with("refs/nostr/")); - // Validate refs/nostr/ refs if any exist + // Collect all purgatory events that authorize this push + let mut purgatory_events = Vec::new(); + + // Handle refs/nostr/ refs - validate and collect PR/PR-update events from purgatory if !nostr_refs.is_empty() { debug!( - "Found {} refs/nostr/ refs - validating against events", + "Found {} refs/nostr/ refs - validating and collecting from purgatory", nostr_refs.len() ); - // Validate refs/nostr/ pushes: checks event ID format and commit matching - let nostr_refs_owned: Vec<(String, String, String)> = nostr_refs - .into_iter() - .map(|(a, b, c)| (a.clone(), b.clone(), c.clone())) - .collect(); - if let Err(e) = validate_nostr_ref_pushes(database, &nostr_refs_owned).await { - warn!("refs/nostr/ validation failed: {}", e); - return Ok(AuthorizationResult::denied(format!( - "refs/nostr/ validation failed: {}", - e - ))); + for (_, new_oid, ref_name) in &nostr_refs { + // Extract event ID from ref name + if let Some(event_id_hex) = ref_name.strip_prefix("refs/nostr/") { + // Validate event ID format + if EventId::parse(event_id_hex).is_err() { + warn!("Invalid event ID format in ref: {}", ref_name); + return Ok(AuthorizationResult::denied(format!( + "Invalid event ID format in ref: {}", + ref_name + ))); + } + + // Check purgatory for PR event + if let Some(entry) = purgatory.find_pr(event_id_hex) { + if let Some(event) = entry.event { + // Verify commit matches + if entry.commit == *new_oid { + debug!( + "Found matching PR event {} in purgatory for ref {}", + event_id_hex, ref_name + ); + purgatory_events.push(event); + } else { + warn!( + "PR event {} in purgatory has commit mismatch: expected {}, got {}", + event_id_hex, entry.commit, new_oid + ); + return Ok(AuthorizationResult::denied(format!( + "PR event {} commit mismatch: expected {}, got {}", + event_id_hex, entry.commit, new_oid + ))); + } + } else { + // Placeholder exists - allow push (git-data-first scenario) + debug!( + "Found placeholder already for PR event {} in purgatory - as we dont have the event and therefore dont know the required commit_id we allow overwriting with a different commit_id", + event_id_hex + ); + } + } else { + // No entry in purgatory - check database for existing event + let nostr_refs_owned = vec![(String::new(), new_oid.clone(), ref_name.clone())]; + if let Err(e) = validate_nostr_ref_pushes(database, &nostr_refs_owned).await { + warn!("refs/nostr/ validation failed: {}", e); + return Ok(AuthorizationResult::denied(format!( + "refs/nostr/ validation failed: {}", + e + ))); + } + debug!( + "No purgatory entry for {} - validated against database", + event_id_hex + ); + } + } } - debug!("refs/nostr/ push validated successfully"); } - // If only refs/nostr/ refs, we're done - return success - if other_refs.is_empty() { - debug!("Only refs/nostr/ refs in push - authorization complete"); - return Ok(AuthorizationResult { - authorized: true, - reason: "Push to refs/nostr/ validated against events".to_string(), - state: None, - maintainers: vec![], - }); - } + // Handle normal refs - validate against state events + if !state_refs.is_empty() { + debug!( + "Found {} non-refs/nostr/ refs - checking state authorization", + state_refs.len() + ); - // For non-refs/nostr/ refs, require state validation - debug!( - "Found {} non-refs/nostr/ refs - checking state authorization", - other_refs.len() - ); - let auth_result = get_authorization_for_owner(database, identifier, owner_pubkey).await?; + let auth_result = get_state_authorization_for_specific_owner_repo( + database, + identifier, + owner_pubkey, + purgatory, + &pushed_refs, //it would be better to accept state_refs but thats in different format + repo_path, + ) + .await?; - if !auth_result.authorized { - return Ok(auth_result); - } + if !auth_result.authorized { + return Ok(auth_result); + } - // Convert other_refs for validation - let other_refs_owned: Vec<(String, String, String)> = other_refs - .into_iter() - .map(|(a, b, c)| (a.clone(), b.clone(), c.clone())) - .collect(); + // Collect state events from purgatory + purgatory_events.extend(auth_result.purgatory_events); - // Validate non-refs/nostr/ refs against state - if let Some(ref state) = auth_result.state { - debug!( - "Validating against state with {} branches", - state.branches.len() - ); + // Validate refs against state + let other_refs_owned: Vec<(String, String, String)> = state_refs + .into_iter() + .map(|(a, b, c)| (a.clone(), b.clone(), c.clone())) + .collect(); - // If we have a state event but couldn't parse any refs, reject the push. - // This protects against parsing failures allowing unauthorized pushes. - if other_refs_owned.is_empty() && !state.branches.is_empty() { - warn!("No refs parsed from push request but state event has branches - rejecting"); - return Ok(AuthorizationResult::denied( - "Failed to parse refs from push request - cannot validate against state", - )); - } + if let Some(ref state) = auth_result.state { + debug!( + "Validating against state with {} branches", + state.branches.len() + ); + + if other_refs_owned.is_empty() && !state.branches.is_empty() { + warn!("No refs parsed from push request but state event has branches - rejecting"); + return Ok(AuthorizationResult::denied( + "Failed to parse refs from push request - cannot validate against state", + )); + } - if let Err(e) = validate_push_refs(state, &other_refs_owned) { - warn!("Ref validation failed: {}", e); - return Ok(AuthorizationResult::denied(format!( - "Ref validation failed: {}", - e - ))); + if let Err(e) = validate_push_refs(state, &other_refs_owned) { + warn!("Ref validation failed: {}", e); + return Ok(AuthorizationResult::denied(format!( + "Ref validation failed: {}", + e + ))); + } + debug!("Ref validation passed"); } - debug!("Ref validation passed"); - } else { - warn!("No state in auth_result - cannot validate refs"); + + // Return result with purgatory events + return Ok(AuthorizationResult { + authorized: true, + reason: auth_result.reason, + state: auth_result.state, + maintainers: auth_result.maintainers, + purgatory_events, + }); } - Ok(auth_result) + // Only refs/nostr/ refs - return success with collected events + Ok(AuthorizationResult { + authorized: true, + reason: "Push to refs/nostr/ validated".to_string(), + state: None, + maintainers: vec![], + purgatory_events, + }) } /// Errors that can occur in Git handlers diff --git a/src/git/mod.rs b/src/git/mod.rs index 599a94b..5c99b3e 100644 --- a/src/git/mod.rs +++ b/src/git/mod.rs @@ -340,6 +340,74 @@ pub fn validate_nostr_ref( Ok(true) } +/// Clean up placeholder refs from all repositories on shutdown. +/// +/// Walks through all git repositories in the git_data_path and deletes +/// `refs/nostr/` refs for the given event IDs. This is called +/// on shutdown to clean up placeholders created when git data arrived +/// before the corresponding PR event. +/// +/// # Arguments +/// * `git_data_path` - Base directory containing git repositories +/// * `event_ids` - Event IDs whose refs/nostr/ refs should be deleted +/// +/// # Returns +/// Number of refs successfully deleted +pub fn cleanup_placeholder_refs(git_data_path: &str, event_ids: &[String]) -> usize { + if event_ids.is_empty() { + return 0; + } + + let git_path = PathBuf::from(git_data_path); + if !git_path.exists() { + debug!("Git data path does not exist: {}", git_data_path); + return 0; + } + + let mut deleted_count = 0; + + // Walk through all repositories (npub/repo.git structure) + if let Ok(npub_entries) = std::fs::read_dir(&git_path) { + for npub_entry in npub_entries.flatten() { + if !npub_entry.path().is_dir() { + continue; + } + + // For each npub directory, check repos + if let Ok(repo_entries) = std::fs::read_dir(npub_entry.path()) { + for repo_entry in repo_entries.flatten() { + let repo_path = repo_entry.path(); + if !repo_path.is_dir() || !repo_path.to_string_lossy().ends_with(".git") { + continue; + } + + // Try to delete refs/nostr/ for each placeholder event + for event_id in event_ids { + let ref_name = format!("refs/nostr/{}", event_id); + if delete_ref(&repo_path, &ref_name).is_ok() { + deleted_count += 1; + info!( + "Cleaned up placeholder ref {} from {}", + ref_name, + repo_path.display() + ); + } + } + } + } + } + } + + if deleted_count > 0 { + info!( + "Shutdown cleanup: removed {} placeholder refs from git repositories", + deleted_count + ); + } + + deleted_count +} + /// Get the current HEAD ref from a repository /// /// # Arguments -- cgit v1.2.3