diff options
Diffstat (limited to 'src/purgatory/sync')
| -rw-r--r-- | src/purgatory/sync/context.rs | 163 | ||||
| -rw-r--r-- | src/purgatory/sync/functions.rs | 22 |
2 files changed, 110 insertions, 75 deletions
diff --git a/src/purgatory/sync/context.rs b/src/purgatory/sync/context.rs index ece8cd6..8297515 100644 --- a/src/purgatory/sync/context.rs +++ b/src/purgatory/sync/context.rs | |||
| @@ -375,94 +375,121 @@ impl SyncContext for RealSyncContext { | |||
| 375 | let naughty_list = self.git_naughty_list.clone(); | 375 | let naughty_list = self.git_naughty_list.clone(); |
| 376 | 376 | ||
| 377 | tokio::task::spawn_blocking(move || -> Result<Vec<String>> { | 377 | tokio::task::spawn_blocking(move || -> Result<Vec<String>> { |
| 378 | // git fetch <remote> <sha1> <sha2> ... - fetch all OIDs with full history | 378 | let mut remaining_oids = missing_oids.clone(); |
| 379 | let mut args = vec!["fetch", &url]; | 379 | let mut missing_from_remote: Vec<String> = Vec::new(); |
| 380 | args.extend(missing_oids.iter().map(|s| s.as_str())); | 380 | |
| 381 | 381 | // Retry loop: keep fetching until success or no OIDs left | |
| 382 | let output = Command::new("git") | 382 | loop { |
| 383 | .args(&args) | 383 | if remaining_oids.is_empty() { |
| 384 | .current_dir(&repo_path) | 384 | // All OIDs were missing from remote |
| 385 | .output(); | 385 | debug!( |
| 386 | 386 | url = %url, | |
| 387 | match output { | 387 | missing_count = missing_from_remote.len(), |
| 388 | Ok(result) if result.status.success() => { | 388 | "All requested OIDs missing from remote" |
| 389 | // Count how many OIDs we now have | 389 | ); |
| 390 | let fetched: Vec<String> = missing_oids | 390 | return Ok(vec![]); |
| 391 | .iter() | ||
| 392 | .filter(|oid| crate::git::oid_exists(&repo_path, oid)) | ||
| 393 | .cloned() | ||
| 394 | .collect(); | ||
| 395 | |||
| 396 | debug!(fetched_count = fetched.len(), "Successfully fetched OIDs"); | ||
| 397 | |||
| 398 | Ok(fetched) | ||
| 399 | } | 391 | } |
| 400 | Ok(result) => { | 392 | |
| 401 | let stderr = String::from_utf8_lossy(&result.stderr); | 393 | // git fetch <remote> <sha1> <sha2> ... - fetch all OIDs with full history |
| 402 | 394 | let mut args = vec!["fetch".to_string(), url.clone()]; | |
| 403 | // Extract domain and classify error for naughty list | 395 | args.extend(remaining_oids.iter().cloned()); |
| 404 | if let Some(domain) = extract_domain(&url) { | 396 | |
| 405 | if let Some(category) = NaughtyListTracker::classify_error(&stderr) { | 397 | let output = Command::new("git") |
| 406 | let is_new = naughty_list.record(&domain, category, stderr.to_string()); | 398 | .args(&args) |
| 407 | 399 | .current_dir(&repo_path) | |
| 408 | if is_new { | 400 | .output(); |
| 409 | tracing::warn!( | 401 | |
| 410 | domain = %domain, | 402 | match output { |
| 411 | category = %category, | 403 | Ok(result) if result.status.success() => { |
| 412 | error = %stderr, | 404 | // Fetch succeeded - count how many OIDs we now have |
| 413 | "Git remote domain added to naughty list" | 405 | let fetched: Vec<String> = missing_oids |
| 414 | ); | 406 | .iter() |
| 415 | } else { | 407 | .filter(|oid| crate::git::oid_exists(&repo_path, oid)) |
| 416 | debug!( | 408 | .cloned() |
| 417 | domain = %domain, | 409 | .collect(); |
| 418 | category = %category, | 410 | |
| 419 | "Git remote domain still on naughty list" | 411 | if !missing_from_remote.is_empty() { |
| 420 | ); | 412 | debug!( |
| 421 | } | 413 | url = %url, |
| 414 | fetched_count = fetched.len(), | ||
| 415 | missing_count = missing_from_remote.len(), | ||
| 416 | missing_oids = ?missing_from_remote, | ||
| 417 | "Fetch completed after retries - some OIDs were missing from remote" | ||
| 418 | ); | ||
| 419 | } else { | ||
| 420 | debug!(url = %url, fetched_count = fetched.len(), "Successfully fetched OIDs"); | ||
| 422 | } | 421 | } |
| 422 | |||
| 423 | return Ok(fetched); | ||
| 423 | } | 424 | } |
| 425 | Ok(result) => { | ||
| 426 | let stderr = String::from_utf8_lossy(&result.stderr); | ||
| 424 | 427 | ||
| 425 | // Check for "not our ref" errors and provide a clearer error message | 428 | // Check for "not our ref" error - this is retryable |
| 426 | let error_msg = if stderr.contains("upload-pack: not our ref") { | 429 | if stderr.contains("upload-pack: not our ref") { |
| 427 | // Parse out the missing OID from stderr (git only reports one at a time) | 430 | // Parse out the missing OID from stderr |
| 428 | let missing_oid = stderr | 431 | let missing_oid = stderr.lines().find_map(|line| { |
| 429 | .lines() | ||
| 430 | .find_map(|line| { | ||
| 431 | if line.contains("not our ref") { | 432 | if line.contains("not our ref") { |
| 432 | // Extract the OID from lines like: | 433 | // Extract the OID from lines like: |
| 433 | // "fatal: remote error: upload-pack: not our ref <oid>" | 434 | // "fatal: remote error: upload-pack: not our ref <oid>" |
| 434 | line.split("not our ref").nth(1).map(|s| s.trim().to_string()) | 435 | line.split("not our ref") |
| 436 | .nth(1) | ||
| 437 | .map(|s| s.trim().to_string()) | ||
| 435 | } else { | 438 | } else { |
| 436 | None | 439 | None |
| 437 | } | 440 | } |
| 438 | }); | 441 | }); |
| 439 | 442 | ||
| 440 | let total_requested = missing_oids.len(); | 443 | if let Some(ref oid) = missing_oid { |
| 444 | // Remove the missing OID and retry with remaining | ||
| 445 | remaining_oids.retain(|o| o != oid); | ||
| 446 | missing_from_remote.push(oid.clone()); | ||
| 441 | 447 | ||
| 442 | if let Some(oid) = missing_oid { | 448 | debug!( |
| 443 | if total_requested > 1 { | ||
| 444 | // BUG: Git stops at first missing OID, so we don't know if the others exist | ||
| 445 | // We need retry logic to fetch remaining OIDs individually | ||
| 446 | tracing::warn!( | ||
| 447 | url = %url, | 449 | url = %url, |
| 448 | missing_oid = %oid, | 450 | missing_oid = %oid, |
| 449 | total_requested = total_requested, | 451 | remaining_count = remaining_oids.len(), |
| 450 | "Git fetch failed on first missing OID - other requested OIDs may exist but were not fetched. Retry logic needed." | 452 | "OID not found on remote, retrying with remaining OIDs" |
| 451 | ); | 453 | ); |
| 452 | format!("remote missing oid {} (BUG: {} other oids not attempted)", oid, total_requested - 1) | 454 | |
| 453 | } else { | 455 | continue; // Retry with remaining OIDs |
| 454 | format!("remote missing only oid requested: {}", oid) | 456 | } |
| 457 | } | ||
| 458 | |||
| 459 | // Non-retryable error - record to naughty list and return error | ||
| 460 | if let Some(domain) = extract_domain(&url) { | ||
| 461 | if let Some(category) = NaughtyListTracker::classify_error(&stderr) { | ||
| 462 | let is_new = | ||
| 463 | naughty_list.record(&domain, category, stderr.to_string()); | ||
| 464 | |||
| 465 | if is_new { | ||
| 466 | tracing::warn!( | ||
| 467 | domain = %domain, | ||
| 468 | category = %category, | ||
| 469 | error = %stderr, | ||
| 470 | "Git remote domain added to naughty list" | ||
| 471 | ); | ||
| 472 | } else { | ||
| 473 | debug!( | ||
| 474 | domain = %domain, | ||
| 475 | category = %category, | ||
| 476 | error = %stderr, | ||
| 477 | "Git fetch failed (domain on naughty list)" | ||
| 478 | ); | ||
| 479 | } | ||
| 455 | } | 480 | } |
| 456 | } else { | ||
| 457 | format!("git fetch failed: {}", stderr) | ||
| 458 | } | 481 | } |
| 459 | } else { | ||
| 460 | format!("git fetch failed: {}", stderr) | ||
| 461 | }; | ||
| 462 | 482 | ||
| 463 | Err(anyhow::anyhow!("{}", error_msg)) | 483 | return Err(anyhow::anyhow!("git fetch failed for {}: {}", url, stderr)); |
| 484 | } | ||
| 485 | Err(e) => { | ||
| 486 | return Err(anyhow::anyhow!( | ||
| 487 | "git fetch command error for {}: {}", | ||
| 488 | url, | ||
| 489 | e | ||
| 490 | )) | ||
| 491 | } | ||
| 464 | } | 492 | } |
| 465 | Err(e) => Err(anyhow::anyhow!("git fetch command error: {}", e)), | ||
| 466 | } | 493 | } |
| 467 | }) | 494 | }) |
| 468 | .await | 495 | .await |
diff --git a/src/purgatory/sync/functions.rs b/src/purgatory/sync/functions.rs index 65d29af..9207d58 100644 --- a/src/purgatory/sync/functions.rs +++ b/src/purgatory/sync/functions.rs | |||
| @@ -368,15 +368,23 @@ pub async fn sync_identifier_from_url<C: SyncContext + ?Sized>( | |||
| 368 | let fetch_result = ctx.fetch_oids(&target_repo, url, &needed_oids).await; | 368 | let fetch_result = ctx.fetch_oids(&target_repo, url, &needed_oids).await; |
| 369 | throttle_manager.complete_request(&domain); | 369 | throttle_manager.complete_request(&domain); |
| 370 | 370 | ||
| 371 | let oids_fetched = match fetch_result { | 371 | let fetched_oids = match fetch_result { |
| 372 | Ok(fetched) => { | 372 | Ok(fetched) if !fetched.is_empty() => { |
| 373 | debug!( | 373 | debug!( |
| 374 | identifier = %identifier, | 374 | identifier = %identifier, |
| 375 | url = %url, | 375 | url = %url, |
| 376 | oids_fetched = fetched.len(), | 376 | oids_fetched = fetched.len(), |
| 377 | "Fetch succeeded" | 377 | "Fetch succeeded" |
| 378 | ); | 378 | ); |
| 379 | fetched.len() | 379 | fetched |
| 380 | } | ||
| 381 | Ok(_) => { | ||
| 382 | debug!( | ||
| 383 | identifier = %identifier, | ||
| 384 | url = %url, | ||
| 385 | "Fetch returned no OIDs (not available on remote)" | ||
| 386 | ); | ||
| 387 | vec![] | ||
| 380 | } | 388 | } |
| 381 | Err(e) => { | 389 | Err(e) => { |
| 382 | debug!( | 390 | debug!( |
| @@ -385,13 +393,13 @@ pub async fn sync_identifier_from_url<C: SyncContext + ?Sized>( | |||
| 385 | error = %e, | 393 | error = %e, |
| 386 | "Fetch failed" | 394 | "Fetch failed" |
| 387 | ); | 395 | ); |
| 388 | 0 | 396 | vec![] |
| 389 | } | 397 | } |
| 390 | }; | 398 | }; |
| 391 | 399 | ||
| 392 | // Try to process any events that can now be satisfied | 400 | // Try to process any events that can now be satisfied |
| 393 | if oids_fetched > 0 { | 401 | if !fetched_oids.is_empty() { |
| 394 | let new_oids: HashSet<String> = needed_oids.into_iter().collect(); | 402 | let new_oids: HashSet<String> = fetched_oids.iter().cloned().collect(); |
| 395 | if let Err(e) = ctx | 403 | if let Err(e) = ctx |
| 396 | .process_newly_available_git_data(&target_repo, &new_oids) | 404 | .process_newly_available_git_data(&target_repo, &new_oids) |
| 397 | .await | 405 | .await |
| @@ -404,7 +412,7 @@ pub async fn sync_identifier_from_url<C: SyncContext + ?Sized>( | |||
| 404 | } | 412 | } |
| 405 | } | 413 | } |
| 406 | 414 | ||
| 407 | oids_fetched | 415 | fetched_oids.len() |
| 408 | } | 416 | } |
| 409 | 417 | ||
| 410 | /// Sync git data for an identifier. | 418 | /// Sync git data for an identifier. |