diff options
Diffstat (limited to 'src/purgatory')
| -rw-r--r-- | src/purgatory/sync/context.rs | 163 |
1 files changed, 96 insertions, 67 deletions
diff --git a/src/purgatory/sync/context.rs b/src/purgatory/sync/context.rs index 33c2d12..0df8be0 100644 --- a/src/purgatory/sync/context.rs +++ b/src/purgatory/sync/context.rs | |||
| @@ -361,59 +361,60 @@ impl SyncContext for RealSyncContext { | |||
| 361 | let naughty_list = self.git_naughty_list.clone(); | 361 | let naughty_list = self.git_naughty_list.clone(); |
| 362 | 362 | ||
| 363 | tokio::task::spawn_blocking(move || -> Result<Vec<String>> { | 363 | tokio::task::spawn_blocking(move || -> Result<Vec<String>> { |
| 364 | // git fetch <remote> <sha1> <sha2> ... - fetch all OIDs with full history | 364 | let mut remaining_oids = missing_oids.clone(); |
| 365 | let mut args = vec!["fetch", &url]; | 365 | let mut missing_from_remote: Vec<String> = Vec::new(); |
| 366 | args.extend(missing_oids.iter().map(|s| s.as_str())); | 366 | |
| 367 | 367 | // Retry loop: keep fetching until success or no OIDs left | |
| 368 | let output = Command::new("git") | 368 | loop { |
| 369 | .args(&args) | 369 | if remaining_oids.is_empty() { |
| 370 | .current_dir(&repo_path) | 370 | // All OIDs were missing from remote |
| 371 | .output(); | 371 | debug!( |
| 372 | 372 | url = %url, | |
| 373 | match output { | 373 | missing_count = missing_from_remote.len(), |
| 374 | Ok(result) if result.status.success() => { | 374 | "All requested OIDs missing from remote" |
| 375 | // Count how many OIDs we now have | 375 | ); |
| 376 | let fetched: Vec<String> = missing_oids | 376 | return Ok(vec![]); |
| 377 | .iter() | ||
| 378 | .filter(|oid| crate::git::oid_exists(&repo_path, oid)) | ||
| 379 | .cloned() | ||
| 380 | .collect(); | ||
| 381 | |||
| 382 | debug!(fetched_count = fetched.len(), "Successfully fetched OIDs"); | ||
| 383 | |||
| 384 | Ok(fetched) | ||
| 385 | } | 377 | } |
| 386 | Ok(result) => { | 378 | |
| 387 | let stderr = String::from_utf8_lossy(&result.stderr); | 379 | // git fetch <remote> <sha1> <sha2> ... - fetch all OIDs with full history |
| 388 | 380 | let mut args = vec!["fetch".to_string(), url.clone()]; | |
| 389 | // Extract domain and classify error for naughty list | 381 | args.extend(remaining_oids.iter().cloned()); |
| 390 | if let Some(domain) = extract_domain(&url) { | 382 | |
| 391 | if let Some(category) = NaughtyListTracker::classify_error(&stderr) { | 383 | let output = Command::new("git") |
| 392 | let is_new = naughty_list.record(&domain, category, stderr.to_string()); | 384 | .args(&args) |
| 393 | 385 | .current_dir(&repo_path) | |
| 394 | if is_new { | 386 | .output(); |
| 395 | tracing::warn!( | 387 | |
| 396 | domain = %domain, | 388 | match output { |
| 397 | category = %category, | 389 | Ok(result) if result.status.success() => { |
| 398 | error = %stderr, | 390 | // Fetch succeeded - count how many OIDs we now have |
| 399 | "Git remote domain added to naughty list" | 391 | let fetched: Vec<String> = missing_oids |
| 400 | ); | 392 | .iter() |
| 401 | } else { | 393 | .filter(|oid| crate::git::oid_exists(&repo_path, oid)) |
| 402 | debug!( | 394 | .cloned() |
| 403 | domain = %domain, | 395 | .collect(); |
| 404 | category = %category, | 396 | |
| 405 | "Git remote domain still on naughty list" | 397 | if !missing_from_remote.is_empty() { |
| 406 | ); | 398 | debug!( |
| 407 | } | 399 | url = %url, |
| 400 | fetched_count = fetched.len(), | ||
| 401 | missing_count = missing_from_remote.len(), | ||
| 402 | missing_oids = ?missing_from_remote, | ||
| 403 | "Fetch completed after retries - some OIDs were missing from remote" | ||
| 404 | ); | ||
| 405 | } else { | ||
| 406 | debug!(fetched_count = fetched.len(), "Successfully fetched OIDs"); | ||
| 408 | } | 407 | } |
| 408 | |||
| 409 | return Ok(fetched); | ||
| 409 | } | 410 | } |
| 411 | Ok(result) => { | ||
| 412 | let stderr = String::from_utf8_lossy(&result.stderr); | ||
| 410 | 413 | ||
| 411 | // Check for "not our ref" errors and provide a clearer error message | 414 | // Check for "not our ref" error - this is retryable |
| 412 | let error_msg = if stderr.contains("upload-pack: not our ref") { | 415 | if stderr.contains("upload-pack: not our ref") { |
| 413 | // Parse out the missing OID from stderr (git only reports one at a time) | 416 | // Parse out the missing OID from stderr |
| 414 | let missing_oid = stderr | 417 | let missing_oid = stderr.lines().find_map(|line| { |
| 415 | .lines() | ||
| 416 | .find_map(|line| { | ||
| 417 | if line.contains("not our ref") { | 418 | if line.contains("not our ref") { |
| 418 | // Extract the OID from lines like: | 419 | // Extract the OID from lines like: |
| 419 | // "fatal: remote error: upload-pack: not our ref <oid>" | 420 | // "fatal: remote error: upload-pack: not our ref <oid>" |
| @@ -423,32 +424,60 @@ impl SyncContext for RealSyncContext { | |||
| 423 | } | 424 | } |
| 424 | }); | 425 | }); |
| 425 | 426 | ||
| 426 | let total_requested = missing_oids.len(); | 427 | if let Some(ref oid) = missing_oid { |
| 428 | // Remove the missing OID and retry with remaining | ||
| 429 | remaining_oids.retain(|o| o != oid); | ||
| 430 | missing_from_remote.push(oid.clone()); | ||
| 427 | 431 | ||
| 428 | if let Some(oid) = missing_oid { | 432 | debug!( |
| 429 | if total_requested > 1 { | ||
| 430 | // BUG: Git stops at first missing OID, so we don't know if the others exist | ||
| 431 | // We need retry logic to fetch remaining OIDs individually | ||
| 432 | tracing::warn!( | ||
| 433 | url = %url, | 433 | url = %url, |
| 434 | missing_oid = %oid, | 434 | missing_oid = %oid, |
| 435 | total_requested = total_requested, | 435 | remaining_count = remaining_oids.len(), |
| 436 | "Git fetch failed on first missing OID - other requested OIDs may exist but were not fetched. Retry logic needed." | 436 | "OID not found on remote, retrying with remaining OIDs" |
| 437 | ); | 437 | ); |
| 438 | format!("remote missing oid {} (BUG: {} other oids not attempted)", oid, total_requested - 1) | 438 | |
| 439 | } else { | 439 | continue; // Retry with remaining OIDs |
| 440 | format!("remote missing only oid requested: {}", oid) | 440 | } |
| 441 | } | ||
| 442 | |||
| 443 | // Non-retryable error - record to naughty list and return error | ||
| 444 | if let Some(domain) = extract_domain(&url) { | ||
| 445 | if let Some(category) = NaughtyListTracker::classify_error(&stderr) { | ||
| 446 | let is_new = | ||
| 447 | naughty_list.record(&domain, category, stderr.to_string()); | ||
| 448 | |||
| 449 | if is_new { | ||
| 450 | tracing::warn!( | ||
| 451 | domain = %domain, | ||
| 452 | category = %category, | ||
| 453 | error = %stderr, | ||
| 454 | "Git remote domain added to naughty list" | ||
| 455 | ); | ||
| 456 | } else { | ||
| 457 | debug!( | ||
| 458 | domain = %domain, | ||
| 459 | category = %category, | ||
| 460 | error = %stderr, | ||
| 461 | "Git fetch failed (domain on naughty list)" | ||
| 462 | ); | ||
| 463 | } | ||
| 441 | } | 464 | } |
| 442 | } else { | ||
| 443 | format!("git fetch failed: {}", stderr) | ||
| 444 | } | 465 | } |
| 445 | } else { | ||
| 446 | format!("git fetch failed: {}", stderr) | ||
| 447 | }; | ||
| 448 | 466 | ||
| 449 | Err(anyhow::anyhow!("{}", error_msg)) | 467 | return Err(anyhow::anyhow!( |
| 468 | "git fetch failed for {}: {}", | ||
| 469 | url, | ||
| 470 | stderr | ||
| 471 | )); | ||
| 472 | } | ||
| 473 | Err(e) => { | ||
| 474 | return Err(anyhow::anyhow!( | ||
| 475 | "git fetch command error for {}: {}", | ||
| 476 | url, | ||
| 477 | e | ||
| 478 | )) | ||
| 479 | } | ||
| 450 | } | 480 | } |
| 451 | Err(e) => Err(anyhow::anyhow!("git fetch command error: {}", e)), | ||
| 452 | } | 481 | } |
| 453 | }) | 482 | }) |
| 454 | .await | 483 | .await |