upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/src/purgatory/sync
diff options
context:
space:
mode:
Diffstat (limited to 'src/purgatory/sync')
-rw-r--r--src/purgatory/sync/context.rs163
-rw-r--r--src/purgatory/sync/functions.rs22
2 files changed, 110 insertions, 75 deletions
diff --git a/src/purgatory/sync/context.rs b/src/purgatory/sync/context.rs
index 33c2d12..904f8af 100644
--- a/src/purgatory/sync/context.rs
+++ b/src/purgatory/sync/context.rs
@@ -361,94 +361,121 @@ impl SyncContext for RealSyncContext {
361 let naughty_list = self.git_naughty_list.clone(); 361 let naughty_list = self.git_naughty_list.clone();
362 362
363 tokio::task::spawn_blocking(move || -> Result<Vec<String>> { 363 tokio::task::spawn_blocking(move || -> Result<Vec<String>> {
364 // git fetch <remote> <sha1> <sha2> ... - fetch all OIDs with full history 364 let mut remaining_oids = missing_oids.clone();
365 let mut args = vec!["fetch", &url]; 365 let mut missing_from_remote: Vec<String> = Vec::new();
366 args.extend(missing_oids.iter().map(|s| s.as_str())); 366
367 367 // Retry loop: keep fetching until success or no OIDs left
368 let output = Command::new("git") 368 loop {
369 .args(&args) 369 if remaining_oids.is_empty() {
370 .current_dir(&repo_path) 370 // All OIDs were missing from remote
371 .output(); 371 debug!(
372 372 url = %url,
373 match output { 373 missing_count = missing_from_remote.len(),
374 Ok(result) if result.status.success() => { 374 "All requested OIDs missing from remote"
375 // Count how many OIDs we now have 375 );
376 let fetched: Vec<String> = missing_oids 376 return Ok(vec![]);
377 .iter()
378 .filter(|oid| crate::git::oid_exists(&repo_path, oid))
379 .cloned()
380 .collect();
381
382 debug!(fetched_count = fetched.len(), "Successfully fetched OIDs");
383
384 Ok(fetched)
385 } 377 }
386 Ok(result) => { 378
387 let stderr = String::from_utf8_lossy(&result.stderr); 379 // git fetch <remote> <sha1> <sha2> ... - fetch all OIDs with full history
388 380 let mut args = vec!["fetch".to_string(), url.clone()];
389 // Extract domain and classify error for naughty list 381 args.extend(remaining_oids.iter().cloned());
390 if let Some(domain) = extract_domain(&url) { 382
391 if let Some(category) = NaughtyListTracker::classify_error(&stderr) { 383 let output = Command::new("git")
392 let is_new = naughty_list.record(&domain, category, stderr.to_string()); 384 .args(&args)
393 385 .current_dir(&repo_path)
394 if is_new { 386 .output();
395 tracing::warn!( 387
396 domain = %domain, 388 match output {
397 category = %category, 389 Ok(result) if result.status.success() => {
398 error = %stderr, 390 // Fetch succeeded - count how many OIDs we now have
399 "Git remote domain added to naughty list" 391 let fetched: Vec<String> = missing_oids
400 ); 392 .iter()
401 } else { 393 .filter(|oid| crate::git::oid_exists(&repo_path, oid))
402 debug!( 394 .cloned()
403 domain = %domain, 395 .collect();
404 category = %category, 396
405 "Git remote domain still on naughty list" 397 if !missing_from_remote.is_empty() {
406 ); 398 debug!(
407 } 399 url = %url,
400 fetched_count = fetched.len(),
401 missing_count = missing_from_remote.len(),
402 missing_oids = ?missing_from_remote,
403 "Fetch completed after retries - some OIDs were missing from remote"
404 );
405 } else {
406 debug!(url = %url, fetched_count = fetched.len(), "Successfully fetched OIDs");
408 } 407 }
408
409 return Ok(fetched);
409 } 410 }
411 Ok(result) => {
412 let stderr = String::from_utf8_lossy(&result.stderr);
410 413
411 // Check for "not our ref" errors and provide a clearer error message 414 // Check for "not our ref" error - this is retryable
412 let error_msg = if stderr.contains("upload-pack: not our ref") { 415 if stderr.contains("upload-pack: not our ref") {
413 // Parse out the missing OID from stderr (git only reports one at a time) 416 // Parse out the missing OID from stderr
414 let missing_oid = stderr 417 let missing_oid = stderr.lines().find_map(|line| {
415 .lines()
416 .find_map(|line| {
417 if line.contains("not our ref") { 418 if line.contains("not our ref") {
418 // Extract the OID from lines like: 419 // Extract the OID from lines like:
419 // "fatal: remote error: upload-pack: not our ref <oid>" 420 // "fatal: remote error: upload-pack: not our ref <oid>"
420 line.split("not our ref").nth(1).map(|s| s.trim().to_string()) 421 line.split("not our ref")
422 .nth(1)
423 .map(|s| s.trim().to_string())
421 } else { 424 } else {
422 None 425 None
423 } 426 }
424 }); 427 });
425 428
426 let total_requested = missing_oids.len(); 429 if let Some(ref oid) = missing_oid {
430 // Remove the missing OID and retry with remaining
431 remaining_oids.retain(|o| o != oid);
432 missing_from_remote.push(oid.clone());
427 433
428 if let Some(oid) = missing_oid { 434 debug!(
429 if total_requested > 1 {
430 // BUG: Git stops at first missing OID, so we don't know if the others exist
431 // We need retry logic to fetch remaining OIDs individually
432 tracing::warn!(
433 url = %url, 435 url = %url,
434 missing_oid = %oid, 436 missing_oid = %oid,
435 total_requested = total_requested, 437 remaining_count = remaining_oids.len(),
436 "Git fetch failed on first missing OID - other requested OIDs may exist but were not fetched. Retry logic needed." 438 "OID not found on remote, retrying with remaining OIDs"
437 ); 439 );
438 format!("remote missing oid {} (BUG: {} other oids not attempted)", oid, total_requested - 1) 440
439 } else { 441 continue; // Retry with remaining OIDs
440 format!("remote missing only oid requested: {}", oid) 442 }
443 }
444
445 // Non-retryable error - record to naughty list and return error
446 if let Some(domain) = extract_domain(&url) {
447 if let Some(category) = NaughtyListTracker::classify_error(&stderr) {
448 let is_new =
449 naughty_list.record(&domain, category, stderr.to_string());
450
451 if is_new {
452 tracing::warn!(
453 domain = %domain,
454 category = %category,
455 error = %stderr,
456 "Git remote domain added to naughty list"
457 );
458 } else {
459 debug!(
460 domain = %domain,
461 category = %category,
462 error = %stderr,
463 "Git fetch failed (domain on naughty list)"
464 );
465 }
441 } 466 }
442 } else {
443 format!("git fetch failed: {}", stderr)
444 } 467 }
445 } else {
446 format!("git fetch failed: {}", stderr)
447 };
448 468
449 Err(anyhow::anyhow!("{}", error_msg)) 469 return Err(anyhow::anyhow!("git fetch failed for {}: {}", url, stderr));
470 }
471 Err(e) => {
472 return Err(anyhow::anyhow!(
473 "git fetch command error for {}: {}",
474 url,
475 e
476 ))
477 }
450 } 478 }
451 Err(e) => Err(anyhow::anyhow!("git fetch command error: {}", e)),
452 } 479 }
453 }) 480 })
454 .await 481 .await
diff --git a/src/purgatory/sync/functions.rs b/src/purgatory/sync/functions.rs
index 65d29af..9207d58 100644
--- a/src/purgatory/sync/functions.rs
+++ b/src/purgatory/sync/functions.rs
@@ -368,15 +368,23 @@ pub async fn sync_identifier_from_url<C: SyncContext + ?Sized>(
368 let fetch_result = ctx.fetch_oids(&target_repo, url, &needed_oids).await; 368 let fetch_result = ctx.fetch_oids(&target_repo, url, &needed_oids).await;
369 throttle_manager.complete_request(&domain); 369 throttle_manager.complete_request(&domain);
370 370
371 let oids_fetched = match fetch_result { 371 let fetched_oids = match fetch_result {
372 Ok(fetched) => { 372 Ok(fetched) if !fetched.is_empty() => {
373 debug!( 373 debug!(
374 identifier = %identifier, 374 identifier = %identifier,
375 url = %url, 375 url = %url,
376 oids_fetched = fetched.len(), 376 oids_fetched = fetched.len(),
377 "Fetch succeeded" 377 "Fetch succeeded"
378 ); 378 );
379 fetched.len() 379 fetched
380 }
381 Ok(_) => {
382 debug!(
383 identifier = %identifier,
384 url = %url,
385 "Fetch returned no OIDs (not available on remote)"
386 );
387 vec![]
380 } 388 }
381 Err(e) => { 389 Err(e) => {
382 debug!( 390 debug!(
@@ -385,13 +393,13 @@ pub async fn sync_identifier_from_url<C: SyncContext + ?Sized>(
385 error = %e, 393 error = %e,
386 "Fetch failed" 394 "Fetch failed"
387 ); 395 );
388 0 396 vec![]
389 } 397 }
390 }; 398 };
391 399
392 // Try to process any events that can now be satisfied 400 // Try to process any events that can now be satisfied
393 if oids_fetched > 0 { 401 if !fetched_oids.is_empty() {
394 let new_oids: HashSet<String> = needed_oids.into_iter().collect(); 402 let new_oids: HashSet<String> = fetched_oids.iter().cloned().collect();
395 if let Err(e) = ctx 403 if let Err(e) = ctx
396 .process_newly_available_git_data(&target_repo, &new_oids) 404 .process_newly_available_git_data(&target_repo, &new_oids)
397 .await 405 .await
@@ -404,7 +412,7 @@ pub async fn sync_identifier_from_url<C: SyncContext + ?Sized>(
404 } 412 }
405 } 413 }
406 414
407 oids_fetched 415 fetched_oids.len()
408} 416}
409 417
410/// Sync git data for an identifier. 418/// Sync git data for an identifier.