diff options
| author | DanConwayDev <DanConwayDev@protonmail.com> | 2026-04-10 20:29:34 +0000 |
|---|---|---|
| committer | DanConwayDev <DanConwayDev@protonmail.com> | 2026-04-10 20:29:34 +0000 |
| commit | 54636ee239e4f8e25142e99807f246f956d2f003 (patch) | |
| tree | 282269373761b7ab54f605528967708b9907039b | |
| parent | 368f0556267b32c5478b7fb68b8a30d42942ee6f (diff) | |
feat: scan filesystem for orphan git repos with no matching 30617 event
Extends cleanup-empty-repos with a second scan direction (filesystem → DB).
Bare git repos under the git data path that have no corresponding 30617
announcement event are identified as orphans and cleaned up.
Empty orphans are always removed. Non-empty orphans are flagged in the
report but only deleted when --purge-orphans is also passed, preventing
accidental data loss.
| -rw-r--r-- | src/cleanup_empty_repos.rs | 285 |
1 files changed, 276 insertions, 9 deletions
diff --git a/src/cleanup_empty_repos.rs b/src/cleanup_empty_repos.rs index 8f5492d..e7eea61 100644 --- a/src/cleanup_empty_repos.rs +++ b/src/cleanup_empty_repos.rs | |||
| @@ -11,6 +11,15 @@ | |||
| 11 | //! git data. If the bare repo is empty or absent, the events are stale and should be | 11 | //! git data. If the bare repo is empty or absent, the events are stale and should be |
| 12 | //! removed so the relay does not serve them. | 12 | //! removed so the relay does not serve them. |
| 13 | //! | 13 | //! |
| 14 | //! Two scans are performed: | ||
| 15 | //! | ||
| 16 | //! 1. **DB → filesystem**: finds 30617 events whose bare git repo is empty or missing. | ||
| 17 | //! Both the 30617 and any matching 30618 events are removed. | ||
| 18 | //! | ||
| 19 | //! 2. **Filesystem → DB**: finds bare git repos on disk with no matching 30617 event. | ||
| 20 | //! Empty orphan repos are always removed. Non-empty orphan repos are flagged and | ||
| 21 | //! only removed when `--purge-orphans` is also passed. | ||
| 22 | //! | ||
| 14 | //! ## Usage | 23 | //! ## Usage |
| 15 | //! | 24 | //! |
| 16 | //! ```text | 25 | //! ```text |
| @@ -22,6 +31,11 @@ | |||
| 22 | //! ngit-grasp cleanup-empty-repos --relay-data-path /var/lib/ngit-grasp/relay \ | 31 | //! ngit-grasp cleanup-empty-repos --relay-data-path /var/lib/ngit-grasp/relay \ |
| 23 | //! --git-data-path /var/lib/ngit-grasp/git \ | 32 | //! --git-data-path /var/lib/ngit-grasp/git \ |
| 24 | //! --execute | 33 | //! --execute |
| 34 | //! | ||
| 35 | //! # Also purge non-empty orphan repos (no matching 30617 in DB) | ||
| 36 | //! ngit-grasp cleanup-empty-repos --relay-data-path /var/lib/ngit-grasp/relay \ | ||
| 37 | //! --git-data-path /var/lib/ngit-grasp/git \ | ||
| 38 | //! --execute --purge-orphans | ||
| 25 | //! ``` | 39 | //! ``` |
| 26 | //! | 40 | //! |
| 27 | //! The relay service should be stopped before running with `--execute` to avoid | 41 | //! The relay service should be stopped before running with `--execute` to avoid |
| @@ -59,6 +73,26 @@ pub struct CleanupArgs { | |||
| 59 | /// would be deleted. Stop the relay service before using this flag. | 73 | /// would be deleted. Stop the relay service before using this flag. |
| 60 | #[arg(long, default_value_t = false)] | 74 | #[arg(long, default_value_t = false)] |
| 61 | pub execute: bool, | 75 | pub execute: bool, |
| 76 | |||
| 77 | /// Also purge non-empty orphan git repos (repos on disk with no matching 30617 event). | ||
| 78 | /// | ||
| 79 | /// By default, non-empty orphan repos are flagged but not deleted. Pass this flag | ||
| 80 | /// together with `--execute` to permanently delete them. Use with caution. | ||
| 81 | #[arg(long, default_value_t = false)] | ||
| 82 | pub purge_orphans: bool, | ||
| 83 | } | ||
| 84 | |||
| 85 | /// A bare git repo on disk that has no matching kind 30617 event in the DB. | ||
| 86 | #[derive(Debug)] | ||
| 87 | struct OrphanRepo { | ||
| 88 | /// Absolute path to the bare repo directory | ||
| 89 | repo_path: PathBuf, | ||
| 90 | /// npub directory name (may not be a valid npub) | ||
| 91 | npub: String, | ||
| 92 | /// Repository directory name (e.g. "my-repo.git") | ||
| 93 | dir_name: String, | ||
| 94 | /// Whether the repo has any refs (non-empty) | ||
| 95 | has_data: bool, | ||
| 62 | } | 96 | } |
| 63 | 97 | ||
| 64 | /// A repository that has an empty (or missing) bare git repo on disk. | 98 | /// A repository that has an empty (or missing) bare git repo on disk. |
| @@ -170,8 +204,17 @@ pub async fn run(args: &CleanupArgs) -> Result<()> { | |||
| 170 | }); | 204 | }); |
| 171 | } | 205 | } |
| 172 | 206 | ||
| 173 | if empty_repos.is_empty() { | 207 | // --- Filesystem → DB scan: orphan repos --- |
| 174 | println!("No empty repositories found. Nothing to do."); | 208 | println!("Scanning git data directory for orphan repos (no matching 30617 event)..."); |
| 209 | let orphan_repos = find_orphan_repos(git_data_path, &database).await?; | ||
| 210 | println!( | ||
| 211 | "Found {} orphan repo(s) on disk with no matching 30617 event.", | ||
| 212 | orphan_repos.len() | ||
| 213 | ); | ||
| 214 | println!(); | ||
| 215 | |||
| 216 | if empty_repos.is_empty() && orphan_repos.is_empty() { | ||
| 217 | println!("Nothing to do."); | ||
| 175 | return Ok(()); | 218 | return Ok(()); |
| 176 | } | 219 | } |
| 177 | 220 | ||
| @@ -211,13 +254,64 @@ pub async fn run(args: &CleanupArgs) -> Result<()> { | |||
| 211 | ); | 254 | ); |
| 212 | } | 255 | } |
| 213 | 256 | ||
| 214 | println!(); | 257 | // Print orphan report |
| 258 | if !orphan_repos.is_empty() { | ||
| 259 | println!( | ||
| 260 | "Found {} orphan repo(s) on disk with no matching 30617 event:\n", | ||
| 261 | orphan_repos.len() | ||
| 262 | ); | ||
| 263 | let mut empty_orphan_count = 0usize; | ||
| 264 | let mut nonempty_orphan_count = 0usize; | ||
| 265 | for (i, repo) in orphan_repos.iter().enumerate() { | ||
| 266 | let status = if repo.has_data { | ||
| 267 | nonempty_orphan_count += 1; | ||
| 268 | "NON-EMPTY (has git data)" | ||
| 269 | } else { | ||
| 270 | empty_orphan_count += 1; | ||
| 271 | "empty (no refs)" | ||
| 272 | }; | ||
| 273 | println!( | ||
| 274 | " [{:>3}] {}/{} — {}", | ||
| 275 | i + 1, | ||
| 276 | repo.npub, | ||
| 277 | repo.dir_name, | ||
| 278 | status, | ||
| 279 | ); | ||
| 280 | println!(" repo path: {}", repo.repo_path.display()); | ||
| 281 | } | ||
| 282 | println!(); | ||
| 283 | if nonempty_orphan_count > 0 { | ||
| 284 | println!( | ||
| 285 | " NOTE: {} non-empty orphan repo(s) will NOT be deleted unless --purge-orphans is passed.", | ||
| 286 | nonempty_orphan_count | ||
| 287 | ); | ||
| 288 | } | ||
| 289 | if empty_orphan_count > 0 { | ||
| 290 | println!( | ||
| 291 | " NOTE: {} empty orphan repo(s) will be deleted (no git data to lose).", | ||
| 292 | empty_orphan_count | ||
| 293 | ); | ||
| 294 | } | ||
| 295 | println!(); | ||
| 296 | } | ||
| 215 | 297 | ||
| 216 | if !args.execute { | 298 | if !args.execute { |
| 299 | let would_delete = empty_repos.len() | ||
| 300 | + orphan_repos.iter().filter(|r| !r.has_data).count() | ||
| 301 | + if args.purge_orphans { | ||
| 302 | orphan_repos.iter().filter(|r| r.has_data).count() | ||
| 303 | } else { | ||
| 304 | 0 | ||
| 305 | }; | ||
| 217 | println!( | 306 | println!( |
| 218 | "DRY-RUN: {} repository/repositories would be cleaned up.", | 307 | "DRY-RUN: {} item(s) would be cleaned up.", |
| 219 | empty_repos.len() | 308 | would_delete |
| 220 | ); | 309 | ); |
| 310 | if orphan_repos.iter().any(|r| r.has_data) && !args.purge_orphans { | ||
| 311 | println!( | ||
| 312 | " (non-empty orphan repos flagged above would be skipped; add --purge-orphans to include them)" | ||
| 313 | ); | ||
| 314 | } | ||
| 221 | println!("Run with --execute to perform the cleanup (stop the relay first)."); | 315 | println!("Run with --execute to perform the cleanup (stop the relay first)."); |
| 222 | return Ok(()); | 316 | return Ok(()); |
| 223 | } | 317 | } |
| @@ -328,18 +422,191 @@ pub async fn run(args: &CleanupArgs) -> Result<()> { | |||
| 328 | } | 422 | } |
| 329 | } | 423 | } |
| 330 | 424 | ||
| 425 | // --- Execute orphan repo cleanup --- | ||
| 426 | let mut deleted_orphan_repos = 0usize; | ||
| 427 | let mut skipped_nonempty_orphans = 0usize; | ||
| 428 | |||
| 429 | for repo in &orphan_repos { | ||
| 430 | if repo.has_data && !args.purge_orphans { | ||
| 431 | println!( | ||
| 432 | "SKIP (non-empty, --purge-orphans not set): {}/{} — {}", | ||
| 433 | repo.npub, | ||
| 434 | repo.dir_name, | ||
| 435 | repo.repo_path.display() | ||
| 436 | ); | ||
| 437 | skipped_nonempty_orphans += 1; | ||
| 438 | continue; | ||
| 439 | } | ||
| 440 | |||
| 441 | println!( | ||
| 442 | "Deleting orphan repo {}/{} ({})...", | ||
| 443 | repo.npub, | ||
| 444 | repo.dir_name, | ||
| 445 | if repo.has_data { "non-empty" } else { "empty" } | ||
| 446 | ); | ||
| 447 | |||
| 448 | match std::fs::remove_dir_all(&repo.repo_path) { | ||
| 449 | Ok(()) => { | ||
| 450 | println!(" Deleted git repo: {}", repo.repo_path.display()); | ||
| 451 | deleted_orphan_repos += 1; | ||
| 452 | |||
| 453 | // Remove the parent npub directory if now empty | ||
| 454 | if let Some(npub_dir) = repo.repo_path.parent() { | ||
| 455 | if npub_dir.exists() { | ||
| 456 | match std::fs::read_dir(npub_dir) { | ||
| 457 | Ok(mut entries) => { | ||
| 458 | if entries.next().is_none() { | ||
| 459 | if let Err(e) = std::fs::remove_dir(npub_dir) { | ||
| 460 | eprintln!( | ||
| 461 | " WARN: Could not remove empty npub dir {}: {}", | ||
| 462 | npub_dir.display(), | ||
| 463 | e | ||
| 464 | ); | ||
| 465 | } else { | ||
| 466 | println!( | ||
| 467 | " Removed empty npub dir: {}", | ||
| 468 | npub_dir.display() | ||
| 469 | ); | ||
| 470 | } | ||
| 471 | } | ||
| 472 | } | ||
| 473 | Err(e) => { | ||
| 474 | eprintln!( | ||
| 475 | " WARN: Could not read npub dir {}: {}", | ||
| 476 | npub_dir.display(), | ||
| 477 | e | ||
| 478 | ); | ||
| 479 | } | ||
| 480 | } | ||
| 481 | } | ||
| 482 | } | ||
| 483 | } | ||
| 484 | Err(e) => { | ||
| 485 | eprintln!( | ||
| 486 | " ERROR: Failed to delete orphan repo {}: {}", | ||
| 487 | repo.repo_path.display(), | ||
| 488 | e | ||
| 489 | ); | ||
| 490 | failed_repos += 1; | ||
| 491 | } | ||
| 492 | } | ||
| 493 | } | ||
| 494 | |||
| 331 | println!(); | 495 | println!(); |
| 332 | println!("=== Cleanup complete ==="); | 496 | println!("=== Cleanup complete ==="); |
| 333 | println!(" Git repos deleted : {}", deleted_repos); | 497 | println!(" Git repos deleted (stale events) : {}", deleted_repos); |
| 498 | println!(" Git repos deleted (orphans) : {}", deleted_orphan_repos); | ||
| 499 | if skipped_nonempty_orphans > 0 { | ||
| 500 | println!( | ||
| 501 | " Non-empty orphans skipped : {} (re-run with --purge-orphans to delete)", | ||
| 502 | skipped_nonempty_orphans | ||
| 503 | ); | ||
| 504 | } | ||
| 334 | if failed_repos > 0 { | 505 | if failed_repos > 0 { |
| 335 | println!(" Git repos failed : {} (see errors above)", failed_repos); | 506 | println!(" Git repos failed : {} (see errors above)", failed_repos); |
| 336 | } | 507 | } |
| 337 | println!(" 30617 events removed : {}", deleted_announcements); | 508 | println!(" 30617 events removed : {}", deleted_announcements); |
| 338 | println!(" 30618 events removed : {}", deleted_state_events); | 509 | println!(" 30618 events removed : {}", deleted_state_events); |
| 339 | 510 | ||
| 340 | Ok(()) | 511 | Ok(()) |
| 341 | } | 512 | } |
| 342 | 513 | ||
| 514 | /// Scan the git data directory for bare repos that have no matching 30617 event in the DB. | ||
| 515 | /// | ||
| 516 | /// The expected layout is `<git_data_path>/<npub>/<identifier>.git`. | ||
| 517 | /// Any directory under `<git_data_path>` that ends in `.git` and has no corresponding | ||
| 518 | /// 30617 event (matched by pubkey + identifier d-tag) is returned as an orphan. | ||
| 519 | async fn find_orphan_repos( | ||
| 520 | git_data_path: &Path, | ||
| 521 | database: &Arc<dyn NostrDatabase>, | ||
| 522 | ) -> Result<Vec<OrphanRepo>> { | ||
| 523 | let mut orphans = Vec::new(); | ||
| 524 | |||
| 525 | // Iterate npub-level directories | ||
| 526 | let npub_entries = match std::fs::read_dir(git_data_path) { | ||
| 527 | Ok(e) => e, | ||
| 528 | Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(orphans), | ||
| 529 | Err(e) => { | ||
| 530 | return Err(anyhow::anyhow!( | ||
| 531 | "Failed to read git data directory {}: {}", | ||
| 532 | git_data_path.display(), | ||
| 533 | e | ||
| 534 | )) | ||
| 535 | } | ||
| 536 | }; | ||
| 537 | |||
| 538 | for npub_entry in npub_entries { | ||
| 539 | let npub_entry = npub_entry.context("Failed to read git data directory entry")?; | ||
| 540 | let npub_path = npub_entry.path(); | ||
| 541 | if !npub_path.is_dir() { | ||
| 542 | continue; | ||
| 543 | } | ||
| 544 | let npub = npub_entry.file_name().to_string_lossy().into_owned(); | ||
| 545 | |||
| 546 | // Iterate repo-level directories inside this npub dir | ||
| 547 | let repo_entries = match std::fs::read_dir(&npub_path) { | ||
| 548 | Ok(e) => e, | ||
| 549 | Err(e) => { | ||
| 550 | eprintln!( | ||
| 551 | " WARN: Could not read npub directory {}: {}", | ||
| 552 | npub_path.display(), | ||
| 553 | e | ||
| 554 | ); | ||
| 555 | continue; | ||
| 556 | } | ||
| 557 | }; | ||
| 558 | |||
| 559 | for repo_entry in repo_entries { | ||
| 560 | let repo_entry = repo_entry.context("Failed to read repo directory entry")?; | ||
| 561 | let repo_path = repo_entry.path(); | ||
| 562 | if !repo_path.is_dir() { | ||
| 563 | continue; | ||
| 564 | } | ||
| 565 | let dir_name = repo_entry.file_name().to_string_lossy().into_owned(); | ||
| 566 | if !dir_name.ends_with(".git") { | ||
| 567 | continue; | ||
| 568 | } | ||
| 569 | |||
| 570 | // Derive the identifier (strip .git suffix) | ||
| 571 | let identifier = dir_name.strip_suffix(".git").unwrap_or(&dir_name); | ||
| 572 | |||
| 573 | // Check whether a 30617 event exists for this (npub, identifier) | ||
| 574 | // We query by identifier d-tag; if the npub is not a valid bech32 pubkey | ||
| 575 | // we won't be able to filter by author, so we check the results manually. | ||
| 576 | let filter = Filter::new() | ||
| 577 | .kind(Kind::GitRepoAnnouncement) | ||
| 578 | .identifier(identifier.to_string()); | ||
| 579 | |||
| 580 | let matching = database | ||
| 581 | .query(filter) | ||
| 582 | .await | ||
| 583 | .with_context(|| format!("Failed to query 30617 for identifier {}", identifier))?; | ||
| 584 | |||
| 585 | // Verify at least one event's owner npub matches the directory name | ||
| 586 | let has_event = matching.iter().any(|ev| { | ||
| 587 | ev.pubkey | ||
| 588 | .to_bech32() | ||
| 589 | .map(|n| n == npub) | ||
| 590 | .unwrap_or(false) | ||
| 591 | }); | ||
| 592 | |||
| 593 | if has_event { | ||
| 594 | continue; | ||
| 595 | } | ||
| 596 | |||
| 597 | let (_, is_empty) = check_repo_empty(&repo_path); | ||
| 598 | orphans.push(OrphanRepo { | ||
| 599 | repo_path, | ||
| 600 | npub: npub.clone(), | ||
| 601 | dir_name, | ||
| 602 | has_data: !is_empty, | ||
| 603 | }); | ||
| 604 | } | ||
| 605 | } | ||
| 606 | |||
| 607 | Ok(orphans) | ||
| 608 | } | ||
| 609 | |||
| 343 | /// Check whether a bare git repository is empty (has no refs). | 610 | /// Check whether a bare git repository is empty (has no refs). |
| 344 | /// | 611 | /// |
| 345 | /// Returns `(exists, is_empty)`: | 612 | /// Returns `(exists, is_empty)`: |