upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/src/purgatory/sync/context.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/purgatory/sync/context.rs')
-rw-r--r--src/purgatory/sync/context.rs163
1 files changed, 95 insertions, 68 deletions
diff --git a/src/purgatory/sync/context.rs b/src/purgatory/sync/context.rs
index ece8cd6..8297515 100644
--- a/src/purgatory/sync/context.rs
+++ b/src/purgatory/sync/context.rs
@@ -375,94 +375,121 @@ impl SyncContext for RealSyncContext {
375 let naughty_list = self.git_naughty_list.clone(); 375 let naughty_list = self.git_naughty_list.clone();
376 376
377 tokio::task::spawn_blocking(move || -> Result<Vec<String>> { 377 tokio::task::spawn_blocking(move || -> Result<Vec<String>> {
378 // git fetch <remote> <sha1> <sha2> ... - fetch all OIDs with full history 378 let mut remaining_oids = missing_oids.clone();
379 let mut args = vec!["fetch", &url]; 379 let mut missing_from_remote: Vec<String> = Vec::new();
380 args.extend(missing_oids.iter().map(|s| s.as_str())); 380
381 381 // Retry loop: keep fetching until success or no OIDs left
382 let output = Command::new("git") 382 loop {
383 .args(&args) 383 if remaining_oids.is_empty() {
384 .current_dir(&repo_path) 384 // All OIDs were missing from remote
385 .output(); 385 debug!(
386 386 url = %url,
387 match output { 387 missing_count = missing_from_remote.len(),
388 Ok(result) if result.status.success() => { 388 "All requested OIDs missing from remote"
389 // Count how many OIDs we now have 389 );
390 let fetched: Vec<String> = missing_oids 390 return Ok(vec![]);
391 .iter()
392 .filter(|oid| crate::git::oid_exists(&repo_path, oid))
393 .cloned()
394 .collect();
395
396 debug!(fetched_count = fetched.len(), "Successfully fetched OIDs");
397
398 Ok(fetched)
399 } 391 }
400 Ok(result) => { 392
401 let stderr = String::from_utf8_lossy(&result.stderr); 393 // git fetch <remote> <sha1> <sha2> ... - fetch all OIDs with full history
402 394 let mut args = vec!["fetch".to_string(), url.clone()];
403 // Extract domain and classify error for naughty list 395 args.extend(remaining_oids.iter().cloned());
404 if let Some(domain) = extract_domain(&url) { 396
405 if let Some(category) = NaughtyListTracker::classify_error(&stderr) { 397 let output = Command::new("git")
406 let is_new = naughty_list.record(&domain, category, stderr.to_string()); 398 .args(&args)
407 399 .current_dir(&repo_path)
408 if is_new { 400 .output();
409 tracing::warn!( 401
410 domain = %domain, 402 match output {
411 category = %category, 403 Ok(result) if result.status.success() => {
412 error = %stderr, 404 // Fetch succeeded - count how many OIDs we now have
413 "Git remote domain added to naughty list" 405 let fetched: Vec<String> = missing_oids
414 ); 406 .iter()
415 } else { 407 .filter(|oid| crate::git::oid_exists(&repo_path, oid))
416 debug!( 408 .cloned()
417 domain = %domain, 409 .collect();
418 category = %category, 410
419 "Git remote domain still on naughty list" 411 if !missing_from_remote.is_empty() {
420 ); 412 debug!(
421 } 413 url = %url,
414 fetched_count = fetched.len(),
415 missing_count = missing_from_remote.len(),
416 missing_oids = ?missing_from_remote,
417 "Fetch completed after retries - some OIDs were missing from remote"
418 );
419 } else {
420 debug!(url = %url, fetched_count = fetched.len(), "Successfully fetched OIDs");
422 } 421 }
422
423 return Ok(fetched);
423 } 424 }
425 Ok(result) => {
426 let stderr = String::from_utf8_lossy(&result.stderr);
424 427
425 // Check for "not our ref" errors and provide a clearer error message 428 // Check for "not our ref" error - this is retryable
426 let error_msg = if stderr.contains("upload-pack: not our ref") { 429 if stderr.contains("upload-pack: not our ref") {
427 // Parse out the missing OID from stderr (git only reports one at a time) 430 // Parse out the missing OID from stderr
428 let missing_oid = stderr 431 let missing_oid = stderr.lines().find_map(|line| {
429 .lines()
430 .find_map(|line| {
431 if line.contains("not our ref") { 432 if line.contains("not our ref") {
432 // Extract the OID from lines like: 433 // Extract the OID from lines like:
433 // "fatal: remote error: upload-pack: not our ref <oid>" 434 // "fatal: remote error: upload-pack: not our ref <oid>"
434 line.split("not our ref").nth(1).map(|s| s.trim().to_string()) 435 line.split("not our ref")
436 .nth(1)
437 .map(|s| s.trim().to_string())
435 } else { 438 } else {
436 None 439 None
437 } 440 }
438 }); 441 });
439 442
440 let total_requested = missing_oids.len(); 443 if let Some(ref oid) = missing_oid {
444 // Remove the missing OID and retry with remaining
445 remaining_oids.retain(|o| o != oid);
446 missing_from_remote.push(oid.clone());
441 447
442 if let Some(oid) = missing_oid { 448 debug!(
443 if total_requested > 1 {
444 // BUG: Git stops at first missing OID, so we don't know if the others exist
445 // We need retry logic to fetch remaining OIDs individually
446 tracing::warn!(
447 url = %url, 449 url = %url,
448 missing_oid = %oid, 450 missing_oid = %oid,
449 total_requested = total_requested, 451 remaining_count = remaining_oids.len(),
450 "Git fetch failed on first missing OID - other requested OIDs may exist but were not fetched. Retry logic needed." 452 "OID not found on remote, retrying with remaining OIDs"
451 ); 453 );
452 format!("remote missing oid {} (BUG: {} other oids not attempted)", oid, total_requested - 1) 454
453 } else { 455 continue; // Retry with remaining OIDs
454 format!("remote missing only oid requested: {}", oid) 456 }
457 }
458
459 // Non-retryable error - record to naughty list and return error
460 if let Some(domain) = extract_domain(&url) {
461 if let Some(category) = NaughtyListTracker::classify_error(&stderr) {
462 let is_new =
463 naughty_list.record(&domain, category, stderr.to_string());
464
465 if is_new {
466 tracing::warn!(
467 domain = %domain,
468 category = %category,
469 error = %stderr,
470 "Git remote domain added to naughty list"
471 );
472 } else {
473 debug!(
474 domain = %domain,
475 category = %category,
476 error = %stderr,
477 "Git fetch failed (domain on naughty list)"
478 );
479 }
455 } 480 }
456 } else {
457 format!("git fetch failed: {}", stderr)
458 } 481 }
459 } else {
460 format!("git fetch failed: {}", stderr)
461 };
462 482
463 Err(anyhow::anyhow!("{}", error_msg)) 483 return Err(anyhow::anyhow!("git fetch failed for {}: {}", url, stderr));
484 }
485 Err(e) => {
486 return Err(anyhow::anyhow!(
487 "git fetch command error for {}: {}",
488 url,
489 e
490 ))
491 }
464 } 492 }
465 Err(e) => Err(anyhow::anyhow!("git fetch command error: {}", e)),
466 } 493 }
467 }) 494 })
468 .await 495 .await