From e72edbae86affcb9fc0429bd197639bf438ffb6c Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Wed, 7 Jan 2026 12:24:42 +0000 Subject: Add unified process_newly_available_git_data function Implement the unified function that handles all post-git-data-available processing, regardless of how data arrived (git push or purgatory sync). This function: - Discovers satisfiable events from purgatory (state and PR events) - Syncs OIDs to authorized owner repos - Aligns refs and sets HEAD - Saves events to database - Notifies WebSocket subscribers - Removes from purgatory New additions: - ProcessResult struct for tracking processing outcomes - process_newly_available_git_data async function in src/git/sync.rs - Helper functions: extract_identifier_from_repo_path, extract_identifier_from_pr_event - Purgatory::find_prs_for_identifier method for PR event discovery - Unit tests for all helper functions Also fixes: - Simplified extract_domain to avoid url crate dependency - Removed unused imports in sync/loop.rs --- src/purgatory/sync/functions.rs | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'src/purgatory/sync/functions.rs') diff --git a/src/purgatory/sync/functions.rs b/src/purgatory/sync/functions.rs index 751dd5e..13b2e47 100644 --- a/src/purgatory/sync/functions.rs +++ b/src/purgatory/sync/functions.rs @@ -20,16 +20,32 @@ use super::throttle::ThrottleManager; /// Extract domain from a URL. /// +/// Supports HTTP(S) URLs. SSH URLs (git@...) are not supported. +/// /// # Examples /// /// ```ignore /// assert_eq!(extract_domain("https://github.com/foo/bar.git"), Some("github.com".to_string())); +/// assert_eq!(extract_domain("http://example.com:8080/repo.git"), Some("example.com".to_string())); /// assert_eq!(extract_domain("git@github.com:foo/bar.git"), None); // SSH URLs not supported /// ``` fn extract_domain(url: &str) -> Option { - url::Url::parse(url) - .ok() - .and_then(|u| u.host_str().map(|s| s.to_string())) + // Simple URL parsing for HTTP(S) URLs + // Format: scheme://[user@]host[:port]/path + let url = url.strip_prefix("https://").or_else(|| url.strip_prefix("http://"))?; + + // Remove user info if present (e.g., "user@host" -> "host") + let url = url.split('@').last()?; + + // Extract host (before first '/' or ':') + let host = url.split('/').next()?; + let host = host.split(':').next()?; + + if host.is_empty() { + None + } else { + Some(host.to_string()) + } } /// Find the next URL to try for an identifier. -- cgit v1.2.3