diff options
| author | DanConwayDev <DanConwayDev@protonmail.com> | 2026-02-03 14:50:22 +0000 |
|---|---|---|
| committer | DanConwayDev <DanConwayDev@protonmail.com> | 2026-02-03 15:18:23 +0000 |
| commit | 874a8abe1d076cfafd9baf919ec23d7d58200698 (patch) | |
| tree | dce0d0d36bddc496ff32f8555a8790d8dc7be7e4 /src/purgatory/sync | |
| parent | 9fd4350c57bbe986ebf65bf3ea4c996572e81884 (diff) | |
| parent | 92a9a3bfe0bc522e8ae411991a366a3a6310d525 (diff) | |
Merge relay.ngit.dev migration: bug fixes and migration tooling
This merge includes critical bug fixes and comprehensive migration tooling
developed during the relay.ngit.dev migration effort.
Bug Fixes:
- Fix git protocol error handling to return HTTP 200 with ERR pkt-line
- Fix naughty list false positives and DNS failure identification
- Fix database query filters in load_existing_events (remove .since())
- Fix OID fetch tracking to distinguish 0 OIDs from successful fetches
- Fix purgatory event source tracking for filtered expiry logging
- Implement OID retry logic for 'not our ref' errors
Migration Tools & Documentation:
- Complete 5-phase migration analysis pipeline with orchestration script
- Phase 1: Event fetching from source relay
- Phase 2: Git sync verification
- Phase 3: Categorization and relay comparison
- Phase 4: Log extraction (parse failures, purgatory expiry)
- Phase 5: Action classification for migration decisions
- Comprehensive migration guide with lessons learned
- Troubleshooting guide for permission and corruption issues
Configuration:
- Add NGIT_LOG_LEVEL configuration option
- Update git throttle limits to 60/minute
- Improve logging throughout for better observability
Diffstat (limited to 'src/purgatory/sync')
| -rw-r--r-- | src/purgatory/sync/context.rs | 163 | ||||
| -rw-r--r-- | src/purgatory/sync/functions.rs | 22 |
2 files changed, 110 insertions, 75 deletions
diff --git a/src/purgatory/sync/context.rs b/src/purgatory/sync/context.rs index 33c2d12..904f8af 100644 --- a/src/purgatory/sync/context.rs +++ b/src/purgatory/sync/context.rs | |||
| @@ -361,94 +361,121 @@ impl SyncContext for RealSyncContext { | |||
| 361 | let naughty_list = self.git_naughty_list.clone(); | 361 | let naughty_list = self.git_naughty_list.clone(); |
| 362 | 362 | ||
| 363 | tokio::task::spawn_blocking(move || -> Result<Vec<String>> { | 363 | tokio::task::spawn_blocking(move || -> Result<Vec<String>> { |
| 364 | // git fetch <remote> <sha1> <sha2> ... - fetch all OIDs with full history | 364 | let mut remaining_oids = missing_oids.clone(); |
| 365 | let mut args = vec!["fetch", &url]; | 365 | let mut missing_from_remote: Vec<String> = Vec::new(); |
| 366 | args.extend(missing_oids.iter().map(|s| s.as_str())); | 366 | |
| 367 | 367 | // Retry loop: keep fetching until success or no OIDs left | |
| 368 | let output = Command::new("git") | 368 | loop { |
| 369 | .args(&args) | 369 | if remaining_oids.is_empty() { |
| 370 | .current_dir(&repo_path) | 370 | // All OIDs were missing from remote |
| 371 | .output(); | 371 | debug!( |
| 372 | 372 | url = %url, | |
| 373 | match output { | 373 | missing_count = missing_from_remote.len(), |
| 374 | Ok(result) if result.status.success() => { | 374 | "All requested OIDs missing from remote" |
| 375 | // Count how many OIDs we now have | 375 | ); |
| 376 | let fetched: Vec<String> = missing_oids | 376 | return Ok(vec![]); |
| 377 | .iter() | ||
| 378 | .filter(|oid| crate::git::oid_exists(&repo_path, oid)) | ||
| 379 | .cloned() | ||
| 380 | .collect(); | ||
| 381 | |||
| 382 | debug!(fetched_count = fetched.len(), "Successfully fetched OIDs"); | ||
| 383 | |||
| 384 | Ok(fetched) | ||
| 385 | } | 377 | } |
| 386 | Ok(result) => { | 378 | |
| 387 | let stderr = String::from_utf8_lossy(&result.stderr); | 379 | // git fetch <remote> <sha1> <sha2> ... - fetch all OIDs with full history |
| 388 | 380 | let mut args = vec!["fetch".to_string(), url.clone()]; | |
| 389 | // Extract domain and classify error for naughty list | 381 | args.extend(remaining_oids.iter().cloned()); |
| 390 | if let Some(domain) = extract_domain(&url) { | 382 | |
| 391 | if let Some(category) = NaughtyListTracker::classify_error(&stderr) { | 383 | let output = Command::new("git") |
| 392 | let is_new = naughty_list.record(&domain, category, stderr.to_string()); | 384 | .args(&args) |
| 393 | 385 | .current_dir(&repo_path) | |
| 394 | if is_new { | 386 | .output(); |
| 395 | tracing::warn!( | 387 | |
| 396 | domain = %domain, | 388 | match output { |
| 397 | category = %category, | 389 | Ok(result) if result.status.success() => { |
| 398 | error = %stderr, | 390 | // Fetch succeeded - count how many OIDs we now have |
| 399 | "Git remote domain added to naughty list" | 391 | let fetched: Vec<String> = missing_oids |
| 400 | ); | 392 | .iter() |
| 401 | } else { | 393 | .filter(|oid| crate::git::oid_exists(&repo_path, oid)) |
| 402 | debug!( | 394 | .cloned() |
| 403 | domain = %domain, | 395 | .collect(); |
| 404 | category = %category, | 396 | |
| 405 | "Git remote domain still on naughty list" | 397 | if !missing_from_remote.is_empty() { |
| 406 | ); | 398 | debug!( |
| 407 | } | 399 | url = %url, |
| 400 | fetched_count = fetched.len(), | ||
| 401 | missing_count = missing_from_remote.len(), | ||
| 402 | missing_oids = ?missing_from_remote, | ||
| 403 | "Fetch completed after retries - some OIDs were missing from remote" | ||
| 404 | ); | ||
| 405 | } else { | ||
| 406 | debug!(url = %url, fetched_count = fetched.len(), "Successfully fetched OIDs"); | ||
| 408 | } | 407 | } |
| 408 | |||
| 409 | return Ok(fetched); | ||
| 409 | } | 410 | } |
| 411 | Ok(result) => { | ||
| 412 | let stderr = String::from_utf8_lossy(&result.stderr); | ||
| 410 | 413 | ||
| 411 | // Check for "not our ref" errors and provide a clearer error message | 414 | // Check for "not our ref" error - this is retryable |
| 412 | let error_msg = if stderr.contains("upload-pack: not our ref") { | 415 | if stderr.contains("upload-pack: not our ref") { |
| 413 | // Parse out the missing OID from stderr (git only reports one at a time) | 416 | // Parse out the missing OID from stderr |
| 414 | let missing_oid = stderr | 417 | let missing_oid = stderr.lines().find_map(|line| { |
| 415 | .lines() | ||
| 416 | .find_map(|line| { | ||
| 417 | if line.contains("not our ref") { | 418 | if line.contains("not our ref") { |
| 418 | // Extract the OID from lines like: | 419 | // Extract the OID from lines like: |
| 419 | // "fatal: remote error: upload-pack: not our ref <oid>" | 420 | // "fatal: remote error: upload-pack: not our ref <oid>" |
| 420 | line.split("not our ref").nth(1).map(|s| s.trim().to_string()) | 421 | line.split("not our ref") |
| 422 | .nth(1) | ||
| 423 | .map(|s| s.trim().to_string()) | ||
| 421 | } else { | 424 | } else { |
| 422 | None | 425 | None |
| 423 | } | 426 | } |
| 424 | }); | 427 | }); |
| 425 | 428 | ||
| 426 | let total_requested = missing_oids.len(); | 429 | if let Some(ref oid) = missing_oid { |
| 430 | // Remove the missing OID and retry with remaining | ||
| 431 | remaining_oids.retain(|o| o != oid); | ||
| 432 | missing_from_remote.push(oid.clone()); | ||
| 427 | 433 | ||
| 428 | if let Some(oid) = missing_oid { | 434 | debug!( |
| 429 | if total_requested > 1 { | ||
| 430 | // BUG: Git stops at first missing OID, so we don't know if the others exist | ||
| 431 | // We need retry logic to fetch remaining OIDs individually | ||
| 432 | tracing::warn!( | ||
| 433 | url = %url, | 435 | url = %url, |
| 434 | missing_oid = %oid, | 436 | missing_oid = %oid, |
| 435 | total_requested = total_requested, | 437 | remaining_count = remaining_oids.len(), |
| 436 | "Git fetch failed on first missing OID - other requested OIDs may exist but were not fetched. Retry logic needed." | 438 | "OID not found on remote, retrying with remaining OIDs" |
| 437 | ); | 439 | ); |
| 438 | format!("remote missing oid {} (BUG: {} other oids not attempted)", oid, total_requested - 1) | 440 | |
| 439 | } else { | 441 | continue; // Retry with remaining OIDs |
| 440 | format!("remote missing only oid requested: {}", oid) | 442 | } |
| 443 | } | ||
| 444 | |||
| 445 | // Non-retryable error - record to naughty list and return error | ||
| 446 | if let Some(domain) = extract_domain(&url) { | ||
| 447 | if let Some(category) = NaughtyListTracker::classify_error(&stderr) { | ||
| 448 | let is_new = | ||
| 449 | naughty_list.record(&domain, category, stderr.to_string()); | ||
| 450 | |||
| 451 | if is_new { | ||
| 452 | tracing::warn!( | ||
| 453 | domain = %domain, | ||
| 454 | category = %category, | ||
| 455 | error = %stderr, | ||
| 456 | "Git remote domain added to naughty list" | ||
| 457 | ); | ||
| 458 | } else { | ||
| 459 | debug!( | ||
| 460 | domain = %domain, | ||
| 461 | category = %category, | ||
| 462 | error = %stderr, | ||
| 463 | "Git fetch failed (domain on naughty list)" | ||
| 464 | ); | ||
| 465 | } | ||
| 441 | } | 466 | } |
| 442 | } else { | ||
| 443 | format!("git fetch failed: {}", stderr) | ||
| 444 | } | 467 | } |
| 445 | } else { | ||
| 446 | format!("git fetch failed: {}", stderr) | ||
| 447 | }; | ||
| 448 | 468 | ||
| 449 | Err(anyhow::anyhow!("{}", error_msg)) | 469 | return Err(anyhow::anyhow!("git fetch failed for {}: {}", url, stderr)); |
| 470 | } | ||
| 471 | Err(e) => { | ||
| 472 | return Err(anyhow::anyhow!( | ||
| 473 | "git fetch command error for {}: {}", | ||
| 474 | url, | ||
| 475 | e | ||
| 476 | )) | ||
| 477 | } | ||
| 450 | } | 478 | } |
| 451 | Err(e) => Err(anyhow::anyhow!("git fetch command error: {}", e)), | ||
| 452 | } | 479 | } |
| 453 | }) | 480 | }) |
| 454 | .await | 481 | .await |
diff --git a/src/purgatory/sync/functions.rs b/src/purgatory/sync/functions.rs index 65d29af..9207d58 100644 --- a/src/purgatory/sync/functions.rs +++ b/src/purgatory/sync/functions.rs | |||
| @@ -368,15 +368,23 @@ pub async fn sync_identifier_from_url<C: SyncContext + ?Sized>( | |||
| 368 | let fetch_result = ctx.fetch_oids(&target_repo, url, &needed_oids).await; | 368 | let fetch_result = ctx.fetch_oids(&target_repo, url, &needed_oids).await; |
| 369 | throttle_manager.complete_request(&domain); | 369 | throttle_manager.complete_request(&domain); |
| 370 | 370 | ||
| 371 | let oids_fetched = match fetch_result { | 371 | let fetched_oids = match fetch_result { |
| 372 | Ok(fetched) => { | 372 | Ok(fetched) if !fetched.is_empty() => { |
| 373 | debug!( | 373 | debug!( |
| 374 | identifier = %identifier, | 374 | identifier = %identifier, |
| 375 | url = %url, | 375 | url = %url, |
| 376 | oids_fetched = fetched.len(), | 376 | oids_fetched = fetched.len(), |
| 377 | "Fetch succeeded" | 377 | "Fetch succeeded" |
| 378 | ); | 378 | ); |
| 379 | fetched.len() | 379 | fetched |
| 380 | } | ||
| 381 | Ok(_) => { | ||
| 382 | debug!( | ||
| 383 | identifier = %identifier, | ||
| 384 | url = %url, | ||
| 385 | "Fetch returned no OIDs (not available on remote)" | ||
| 386 | ); | ||
| 387 | vec![] | ||
| 380 | } | 388 | } |
| 381 | Err(e) => { | 389 | Err(e) => { |
| 382 | debug!( | 390 | debug!( |
| @@ -385,13 +393,13 @@ pub async fn sync_identifier_from_url<C: SyncContext + ?Sized>( | |||
| 385 | error = %e, | 393 | error = %e, |
| 386 | "Fetch failed" | 394 | "Fetch failed" |
| 387 | ); | 395 | ); |
| 388 | 0 | 396 | vec![] |
| 389 | } | 397 | } |
| 390 | }; | 398 | }; |
| 391 | 399 | ||
| 392 | // Try to process any events that can now be satisfied | 400 | // Try to process any events that can now be satisfied |
| 393 | if oids_fetched > 0 { | 401 | if !fetched_oids.is_empty() { |
| 394 | let new_oids: HashSet<String> = needed_oids.into_iter().collect(); | 402 | let new_oids: HashSet<String> = fetched_oids.iter().cloned().collect(); |
| 395 | if let Err(e) = ctx | 403 | if let Err(e) = ctx |
| 396 | .process_newly_available_git_data(&target_repo, &new_oids) | 404 | .process_newly_available_git_data(&target_repo, &new_oids) |
| 397 | .await | 405 | .await |
| @@ -404,7 +412,7 @@ pub async fn sync_identifier_from_url<C: SyncContext + ?Sized>( | |||
| 404 | } | 412 | } |
| 405 | } | 413 | } |
| 406 | 414 | ||
| 407 | oids_fetched | 415 | fetched_oids.len() |
| 408 | } | 416 | } |
| 409 | 417 | ||
| 410 | /// Sync git data for an identifier. | 418 | /// Sync git data for an identifier. |