From c67ebe6f33bfa191f17eb0df24d3ee18092c74e1 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Wed, 7 Jan 2026 23:31:38 +0000 Subject: refactor: unify event processing logic Eliminates code duplication by extracting core event processing into reusable functions. All state and PR event processing now uses the same unified logic from src/git/process.rs. Changes: - Add src/git/process.rs with unified processing functions - process_state_with_git_data() for state events - process_pr_with_git_data() for PR events - Pure functions with comprehensive result types - Refactor policy handlers to use unified processing - src/nostr/policy/state.rs: Remove ~70 lines of duplicated logic - src/nostr/policy/pr_event.rs: Remove ~40 lines of duplicated logic - Refactor purgatory processing to use unified functions - src/git/sync.rs: Remove ~125 lines of duplicated logic - Make extract_owner_from_repo_path() public for reuse Benefits: - DRY: Single source of truth for event processing - Testable: Pure functions with clear contracts - Maintainable: Changes happen in one place - Consistent: All code paths use same logic All 217 unit tests + 40 integration tests pass (257/257). --- src/git/mod.rs | 1 + src/git/process.rs | 255 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/git/sync.rs | 190 +++++++++------------------------------ 3 files changed, 299 insertions(+), 147 deletions(-) create mode 100644 src/git/process.rs (limited to 'src/git') diff --git a/src/git/mod.rs b/src/git/mod.rs index fb17c53..205e3bc 100644 --- a/src/git/mod.rs +++ b/src/git/mod.rs @@ -19,6 +19,7 @@ pub mod authorization; pub mod handlers; +pub mod process; pub mod protocol; pub mod subprocess; pub mod sync; diff --git a/src/git/process.rs b/src/git/process.rs new file mode 100644 index 0000000..d052c04 --- /dev/null +++ b/src/git/process.rs @@ -0,0 +1,255 @@ +//! Event Processing - Unified logic for processing state and PR events with git data +//! +//! This module provides the core processing logic used when events have git data available. +//! These functions are used in multiple scenarios: +//! - When events arrive with git data already available (policy handlers) +//! - When events are released from purgatory (purgatory sync) +//! - When git pushes trigger purgatory releases (receive-pack handler) + +use std::path::Path; +use nostr_sdk::Event; +use crate::git::authorization::{collect_authorized_maintainers, RepositoryData}; +use crate::git::sync::{align_repository_with_state, sync_pr_refs_to_tagged_owner_repos, copy_missing_oids_between_repos}; +use crate::git; +use crate::nostr::events::RepositoryState; + +/// Result of processing a state event with git data +#[derive(Debug, Default, Clone)] +pub struct ProcessStateResult { + /// Number of repositories synced (OIDs copied + refs aligned) + pub repos_synced: usize, + /// Number of refs created across all repos + pub refs_created: usize, + /// Number of refs updated across all repos + pub refs_updated: usize, + /// Number of refs deleted across all repos + pub refs_deleted: usize, + /// Errors encountered (non-fatal) + pub errors: Vec, +} + +/// Result of processing a PR event with git data +#[derive(Debug, Default, Clone)] +pub struct ProcessPrResult { + /// Number of repositories synced + pub repos_synced: usize, + /// Number of refs created across all repos + pub refs_created: usize, + /// Errors encountered (non-fatal) + pub errors: Vec, +} + +/// Process a single state event that has git data available. +/// +/// This is the core processing logic used when: +/// - A state event arrives with git data already available +/// - A state event is released from purgatory +/// +/// Does NOT save to database or notify subscribers - caller handles that. +/// +/// # Processing Steps +/// 1. Identify owner repos where state author is an authorized maintainer +/// 2. For each owner repo, check if this state is the latest authorized +/// 3. Copy missing OIDs from source repo to target repo +/// 4. Align refs (branches, tags, HEAD) with the state +/// +/// # Arguments +/// * `state` - The state event to process +/// * `source_repo_path` - Path to repo that has the git data +/// * `db_repo_data` - Repository data from database (announcements + states) +/// * `git_data_path` - Base path for git repositories +/// +/// # Returns +/// ProcessStateResult with statistics +pub fn process_state_with_git_data( + state: &RepositoryState, + source_repo_path: &Path, + db_repo_data: &RepositoryData, + git_data_path: &Path, +) -> ProcessStateResult { + let mut result = ProcessStateResult::default(); + + let state_author = state.event.pubkey.to_hex(); + + // Collect authorized maintainers per owner + let by_owner = collect_authorized_maintainers(&db_repo_data.announcements); + + // Step 1: Identify owner repos that the state event author is maintainer for + let authorized_owners: Vec<&String> = by_owner + .iter() + .filter(|(_, maintainers)| maintainers.contains(&state_author)) + .map(|(owner, _)| owner) + .collect(); + + if authorized_owners.is_empty() { + tracing::debug!( + identifier = %state.identifier, + author = %state_author, + "State event author not authorized for any owner" + ); + return result; + } + + // Process each owner repo that authorizes this state event author + for owner in &authorized_owners { + let maintainers = by_owner.get(*owner).unwrap(); + + // Step 2: Check if this state event is the latest authorized for this owner + let is_latest = crate::git::sync::is_latest_authorized_state_public( + state, + maintainers, + &db_repo_data.states, + ); + + if !is_latest { + tracing::debug!( + identifier = %state.identifier, + owner = %owner, + "Skipping owner - newer authorized state exists" + ); + continue; + } + + // Find the announcement for this owner + let Some(announcement) = db_repo_data + .announcements + .iter() + .find(|a| a.event.pubkey.to_hex() == **owner) + else { + continue; + }; + + let target_repo_path = git_data_path.join(announcement.repo_path()); + + // Step 3: Check git repo exists for that owner + if !target_repo_path.exists() { + tracing::debug!( + identifier = %state.identifier, + owner = %owner, + repo_path = %target_repo_path.display(), + "Skipping owner - repository doesn't exist" + ); + continue; + } + + // Step 4: Copy all required OIDs to that repo (unless it's source_repo_path) + if target_repo_path != source_repo_path { + if let Err(e) = copy_missing_oids_between_repos( + source_repo_path, + &target_repo_path, + state, + ) { + tracing::warn!( + identifier = %state.identifier, + source = %source_repo_path.display(), + target = %target_repo_path.display(), + error = %e, + "Failed to copy OIDs between repos" + ); + result.errors.push(e); + continue; // Skip this owner repo + } + } + + // Step 5: Reset the git state in that repo to match the state event + let align_result = align_repository_with_state(&target_repo_path, state); + result.repos_synced += 1; + result.refs_created += align_result.refs_created; + result.refs_updated += align_result.refs_updated; + result.refs_deleted += align_result.refs_deleted; + + tracing::info!( + identifier = %state.identifier, + owner = %owner, + repo_path = %target_repo_path.display(), + refs_created = align_result.refs_created, + refs_updated = align_result.refs_updated, + refs_deleted = align_result.refs_deleted, + head_set = align_result.head_set, + "Aligned repository with state" + ); + } + + result +} + +/// Process a single PR event that has git data available. +/// +/// This is the core processing logic used when: +/// - A PR event arrives with git data already available +/// - A PR event is released from purgatory +/// +/// Does NOT save to database or notify subscribers - caller handles that. +/// +/// # Processing Steps +/// 1. Sync PR commit to owner repos using tagged maintainer logic +/// 2. Create refs/nostr/ ref in source repo (if missing) +/// 3. Create refs/nostr/ refs in all synced repos +/// +/// # Arguments +/// * `event` - The PR event to process +/// * `commit` - The commit hash from the PR event +/// * `source_repo_path` - Path to repo that has the commit +/// * `db_repo_data` - Repository data from database (announcements + states) +/// * `git_data_path` - Base path for git repositories +/// * `source_owner_pubkey` - Owner pubkey of source repo (to skip) +/// +/// # Returns +/// ProcessPrResult with statistics +pub fn process_pr_with_git_data( + event: &Event, + commit: &str, + source_repo_path: &Path, + db_repo_data: &RepositoryData, + git_data_path: &Path, + source_owner_pubkey: &str, +) -> ProcessPrResult { + let mut result = ProcessPrResult::default(); + + let event_id = event.id.to_hex(); + + // Sync PR ref to owner repos using tagged maintainer logic + let pr_refs = vec![(event_id.clone(), commit.to_string())]; + let pr_events = vec![event.clone()]; + + let sync_result = sync_pr_refs_to_tagged_owner_repos( + source_repo_path, + &pr_refs, + &pr_events, + db_repo_data, + git_data_path, + source_owner_pubkey, + ); + result.repos_synced += sync_result.repos_synced; + result.refs_created += sync_result.refs_created; + result.errors.extend( + sync_result + .errors + .into_iter() + .map(|(_, e)| e), + ); + + // Create the ref in the source repo if it doesn't exist + let ref_name = format!("refs/nostr/{}", event_id); + if git::get_ref_commit(source_repo_path, &ref_name).is_none() { + if let Err(e) = git::update_ref(source_repo_path, &ref_name, commit) { + tracing::warn!( + event_id = %event_id, + repo = %source_repo_path.display(), + error = %e, + "Failed to create PR ref in source repo" + ); + result.errors.push(e); + } else { + result.refs_created += 1; + tracing::info!( + event_id = %event_id, + commit = %commit, + repo = %source_repo_path.display(), + "Created PR ref in source repo" + ); + } + } + + result +} diff --git a/src/git/sync.rs b/src/git/sync.rs index 2f43e6e..5e2d3f2 100644 --- a/src/git/sync.rs +++ b/src/git/sync.rs @@ -908,12 +908,9 @@ async fn process_purgatory_state_events( } }; - // Collect authorized maintainers per owner (computed once) - let by_owner = collect_authorized_maintainers(&db_repo_data.announcements); - // Process each state event in chronological order for entry in &purgatory_states { - // Step 0: Check if we have all the git data needed to apply this state event + // Check if we have all the git data needed to apply this state event if !can_apply_state(&entry.event, source_repo_path) { debug!( identifier = %identifier, @@ -940,122 +937,19 @@ async fn process_purgatory_state_events( } }; - let state_author = state.event.pubkey.to_hex(); - - // Step 1: Identify owner repos that the state event author is maintainer for - let authorized_owners: Vec<&String> = by_owner - .iter() - .filter(|(_, maintainers)| maintainers.contains(&state_author)) - .map(|(owner, _)| owner) - .collect(); - - if authorized_owners.is_empty() { - debug!( - identifier = %identifier, - event_id = %entry.event.id, - pubkey = %state_author, - "State event author not authorized for any owner - skipping" - ); - continue; - } - - // Track if we applied to at least one owner repo - let mut applied_to_any = false; - - // Process each owner repo that authorizes this state event author - for owner in &authorized_owners { - let maintainers = by_owner.get(*owner).unwrap(); - - // Step 2: Check if this state event is the latest authorized for this owner - // Only consider database states, not other purgatory states - let is_latest = is_latest_authorized_state( - &state, - maintainers, - &db_repo_data.states, - ); - - if !is_latest { - debug!( - identifier = %identifier, - event_id = %entry.event.id, - owner = %owner, - "Skipping owner - a newer authorized state exists" - ); - continue; - } - - // Find the announcement for this owner - let announcement = db_repo_data - .announcements - .iter() - .find(|a| a.event.pubkey.to_hex() == **owner); - - let Some(announcement) = announcement else { - continue; - }; - - let target_repo_path = git_data_path.join(announcement.repo_path()); - - // Step 3: Check git repo exists for that owner - if !target_repo_path.exists() { - debug!( - identifier = %identifier, - owner = %owner, - repo_path = %target_repo_path.display(), - "Skipping owner - repository doesn't exist" - ); - continue; - } - - // Step 4: Copy all required OIDs to that repo (unless it's source_repo_path) - if target_repo_path != source_repo_path { - if let Err(e) = - copy_missing_oids_between_repos(source_repo_path, &target_repo_path, &state) - { - warn!( - identifier = %identifier, - source = %source_repo_path.display(), - target = %target_repo_path.display(), - error = %e, - "Failed to copy OIDs between repos" - ); - result - .errors - .push((target_repo_path.display().to_string(), e).1); - // Continue anyway - we'll try to align what we can - } - } - - // Step 5: Reset the git state in that repo to match the state event - // (excluding refs/nostr/*) - let align_result = align_repository_with_state(&target_repo_path, &state); - result.repos_synced += 1; - result.refs_created += align_result.refs_created; - result.refs_updated += align_result.refs_updated; - result.refs_deleted += align_result.refs_deleted; - - info!( - identifier = %identifier, - owner = %owner, - event_id = %entry.event.id, - repo_path = %target_repo_path.display(), - refs_created = align_result.refs_created, - refs_updated = align_result.refs_updated, - refs_deleted = align_result.refs_deleted, - head_set = align_result.head_set, - "Aligned repository with state from purgatory" - ); - - applied_to_any = true; - } + // Use unified processing function + let process_result = crate::git::process::process_state_with_git_data( + &state, + source_repo_path, + &db_repo_data, + git_data_path, + ); - // We have the git data now, so we should release from purgatory regardless of - // whether we applied to any repo. The question is: should we save to DB or just remove? - // - // - If there's a newer state event from the same author already in the DB, just remove - // (no point saving an older event that will never be used) - // - Otherwise, save it to the DB (even if we didn't apply to any repo, because in the - // future the currently-authorized state event might be deleted and this one should apply) + result.repos_synced += process_result.repos_synced; + result.refs_created += process_result.refs_created; + result.refs_updated += process_result.refs_updated; + result.refs_deleted += process_result.refs_deleted; + result.errors.extend(process_result.errors); // Check if there's a newer state from the same author in the database let has_newer_from_same_author = db_repo_data.states.iter().any(|s| { @@ -1073,17 +967,16 @@ async fn process_purgatory_state_events( debug!( identifier = %identifier, event_id = %entry.event.id, - author = %state_author, "Removed older state event from purgatory - newer event from same author exists in DB" ); } else { - // Save to database (even if we didn't apply to any repo) + // Save to database match database.save_event(&entry.event).await { Ok(_) => { info!( identifier = %identifier, event_id = %entry.event.id, - applied_to_repos = applied_to_any, + repos_synced = process_result.repos_synced, "Saved purgatory state event to database" ); @@ -1169,6 +1062,25 @@ fn is_latest_authorized_state( } } +/// Check if a state event is the latest authorized state for a given maintainer set. +/// +/// Only considers states already in the database, not other purgatory states. +/// +/// # Arguments +/// * `state` - The state event to check +/// * `maintainers` - The set of authorized maintainers for the owner +/// * `db_states` - State events from the database +/// +/// # Returns +/// true if this state is the latest (or equal latest) among all authorized states in the DB +pub fn is_latest_authorized_state_public( + state: &RepositoryState, + maintainers: &[String], + db_states: &[RepositoryState], +) -> bool { + is_latest_authorized_state(state, maintainers, db_states) +} + /// Process PR events from purgatory that can now be satisfied. async fn process_purgatory_pr_events( identifier: &str, @@ -1224,39 +1136,23 @@ async fn process_purgatory_pr_events( continue; } - // Sync PR ref to owner repos - let pr_refs = vec![(event.id.to_hex(), entry.commit.clone())]; - let pr_events = vec![event.clone()]; - - // Get owner pubkey from source repo path + // Extract owner pubkey let owner_pubkey = extract_owner_from_repo_path(source_repo_path, git_data_path) .unwrap_or_default(); - let sync_result = sync_pr_refs_to_tagged_owner_repos( + // Use unified processing function + let process_result = crate::git::process::process_pr_with_git_data( + event, + &entry.commit, source_repo_path, - &pr_refs, - &pr_events, &db_repo_data, git_data_path, &owner_pubkey, ); - result.repos_synced += sync_result.repos_synced; - result.refs_created += sync_result.refs_created; - // Create the ref in the source repo if it doesn't exist - let ref_name = format!("refs/nostr/{}", event.id.to_hex()); - if git::get_ref_commit(source_repo_path, &ref_name).is_none() { - if let Err(e) = git::update_ref(source_repo_path, &ref_name, &entry.commit) { - warn!( - identifier = %identifier, - event_id = %event.id, - error = %e, - "Failed to create PR ref in source repo" - ); - } else { - result.refs_created += 1; - } - } + result.repos_synced += process_result.repos_synced; + result.refs_created += process_result.refs_created; + result.errors.extend(process_result.errors); // Save event to database match database.save_event(event).await { @@ -1307,7 +1203,7 @@ async fn process_purgatory_pr_events( /// Extract owner pubkey from a repository path. /// /// Given a path like `{git_data_path}/{npub}/{identifier}.git`, extracts the npub. -fn extract_owner_from_repo_path(repo_path: &Path, git_data_path: &Path) -> Option { +pub fn extract_owner_from_repo_path(repo_path: &Path, git_data_path: &Path) -> Option { let relative = repo_path.strip_prefix(git_data_path).ok()?; let components: Vec<_> = relative.components().collect(); if !components.is_empty() { -- cgit v1.2.3