diff options
| author | DanConwayDev <DanConwayDev@protonmail.com> | 2026-01-07 12:24:42 +0000 |
|---|---|---|
| committer | DanConwayDev <DanConwayDev@protonmail.com> | 2026-01-07 12:24:42 +0000 |
| commit | e72edbae86affcb9fc0429bd197639bf438ffb6c (patch) | |
| tree | b59f31287d10fe9b22d7e5af4cb3aa94ef06dc8a /src/purgatory | |
| parent | 18bfb246029a848a0b307e7c8a8e4df57addabb2 (diff) | |
Add unified process_newly_available_git_data function
Implement the unified function that handles all post-git-data-available
processing, regardless of how data arrived (git push or purgatory sync).
This function:
- Discovers satisfiable events from purgatory (state and PR events)
- Syncs OIDs to authorized owner repos
- Aligns refs and sets HEAD
- Saves events to database
- Notifies WebSocket subscribers
- Removes from purgatory
New additions:
- ProcessResult struct for tracking processing outcomes
- process_newly_available_git_data async function in src/git/sync.rs
- Helper functions: extract_identifier_from_repo_path, extract_identifier_from_pr_event
- Purgatory::find_prs_for_identifier method for PR event discovery
- Unit tests for all helper functions
Also fixes:
- Simplified extract_domain to avoid url crate dependency
- Removed unused imports in sync/loop.rs
Diffstat (limited to 'src/purgatory')
| -rw-r--r-- | src/purgatory/mod.rs | 27 | ||||
| -rw-r--r-- | src/purgatory/sync/functions.rs | 22 | ||||
| -rw-r--r-- | src/purgatory/sync/loop.rs | 2 |
3 files changed, 47 insertions, 4 deletions
diff --git a/src/purgatory/mod.rs b/src/purgatory/mod.rs index fcb812b..11fe41f 100644 --- a/src/purgatory/mod.rs +++ b/src/purgatory/mod.rs | |||
| @@ -365,6 +365,33 @@ impl Purgatory { | |||
| 365 | }) | 365 | }) |
| 366 | } | 366 | } |
| 367 | 367 | ||
| 368 | /// Find all PR events for a specific repository identifier. | ||
| 369 | /// | ||
| 370 | /// PR events reference repositories via `a` tags with format `30617:<owner_pubkey>:<identifier>`. | ||
| 371 | /// This function scans all PR entries and returns those that reference the given identifier. | ||
| 372 | /// | ||
| 373 | /// Note: This is a linear scan since PR events are indexed by event_id, not by identifier. | ||
| 374 | /// For repositories with many PR events, this could be optimized with a secondary index. | ||
| 375 | /// | ||
| 376 | /// # Arguments | ||
| 377 | /// * `identifier` - The repository identifier to search for | ||
| 378 | /// | ||
| 379 | /// # Returns | ||
| 380 | /// Vector of PR purgatory entries that reference this identifier | ||
| 381 | pub fn find_prs_for_identifier(&self, identifier: &str) -> Vec<PrPurgatoryEntry> { | ||
| 382 | self.pr_events | ||
| 383 | .iter() | ||
| 384 | .filter(|entry| { | ||
| 385 | if let Some(ref event) = entry.value().event { | ||
| 386 | Self::event_references_identifier(event, identifier) | ||
| 387 | } else { | ||
| 388 | false | ||
| 389 | } | ||
| 390 | }) | ||
| 391 | .map(|entry| entry.value().clone()) | ||
| 392 | .collect() | ||
| 393 | } | ||
| 394 | |||
| 368 | /// Remove a state event from purgatory. | 395 | /// Remove a state event from purgatory. |
| 369 | /// | 396 | /// |
| 370 | /// Removes all entries for the given identifier. | 397 | /// Removes all entries for the given identifier. |
diff --git a/src/purgatory/sync/functions.rs b/src/purgatory/sync/functions.rs index 751dd5e..13b2e47 100644 --- a/src/purgatory/sync/functions.rs +++ b/src/purgatory/sync/functions.rs | |||
| @@ -20,16 +20,32 @@ use super::throttle::ThrottleManager; | |||
| 20 | 20 | ||
| 21 | /// Extract domain from a URL. | 21 | /// Extract domain from a URL. |
| 22 | /// | 22 | /// |
| 23 | /// Supports HTTP(S) URLs. SSH URLs (git@...) are not supported. | ||
| 24 | /// | ||
| 23 | /// # Examples | 25 | /// # Examples |
| 24 | /// | 26 | /// |
| 25 | /// ```ignore | 27 | /// ```ignore |
| 26 | /// assert_eq!(extract_domain("https://github.com/foo/bar.git"), Some("github.com".to_string())); | 28 | /// assert_eq!(extract_domain("https://github.com/foo/bar.git"), Some("github.com".to_string())); |
| 29 | /// assert_eq!(extract_domain("http://example.com:8080/repo.git"), Some("example.com".to_string())); | ||
| 27 | /// assert_eq!(extract_domain("git@github.com:foo/bar.git"), None); // SSH URLs not supported | 30 | /// assert_eq!(extract_domain("git@github.com:foo/bar.git"), None); // SSH URLs not supported |
| 28 | /// ``` | 31 | /// ``` |
| 29 | fn extract_domain(url: &str) -> Option<String> { | 32 | fn extract_domain(url: &str) -> Option<String> { |
| 30 | url::Url::parse(url) | 33 | // Simple URL parsing for HTTP(S) URLs |
| 31 | .ok() | 34 | // Format: scheme://[user@]host[:port]/path |
| 32 | .and_then(|u| u.host_str().map(|s| s.to_string())) | 35 | let url = url.strip_prefix("https://").or_else(|| url.strip_prefix("http://"))?; |
| 36 | |||
| 37 | // Remove user info if present (e.g., "user@host" -> "host") | ||
| 38 | let url = url.split('@').last()?; | ||
| 39 | |||
| 40 | // Extract host (before first '/' or ':') | ||
| 41 | let host = url.split('/').next()?; | ||
| 42 | let host = host.split(':').next()?; | ||
| 43 | |||
| 44 | if host.is_empty() { | ||
| 45 | None | ||
| 46 | } else { | ||
| 47 | Some(host.to_string()) | ||
| 48 | } | ||
| 33 | } | 49 | } |
| 34 | 50 | ||
| 35 | /// Find the next URL to try for an identifier. | 51 | /// Find the next URL to try for an identifier. |
diff --git a/src/purgatory/sync/loop.rs b/src/purgatory/sync/loop.rs index aaf1300..ebca766 100644 --- a/src/purgatory/sync/loop.rs +++ b/src/purgatory/sync/loop.rs | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | use std::sync::Arc; | 12 | use std::sync::Arc; |
| 13 | use std::time::Duration; | 13 | use std::time::Duration; |
| 14 | use tokio::task::JoinHandle; | 14 | use tokio::task::JoinHandle; |
| 15 | use tracing::{debug, info, warn}; | 15 | use tracing::{debug, info}; |
| 16 | 16 | ||
| 17 | use crate::purgatory::Purgatory; | 17 | use crate::purgatory::Purgatory; |
| 18 | 18 | ||