upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDanConwayDev <DanConwayDev@protonmail.com>2026-04-10 20:29:34 +0000
committerDanConwayDev <DanConwayDev@protonmail.com>2026-04-10 20:29:34 +0000
commit54636ee239e4f8e25142e99807f246f956d2f003 (patch)
tree282269373761b7ab54f605528967708b9907039b
parent368f0556267b32c5478b7fb68b8a30d42942ee6f (diff)
feat: scan filesystem for orphan git repos with no matching 30617 event
Extends cleanup-empty-repos with a second scan direction (filesystem → DB). Bare git repos under the git data path that have no corresponding 30617 announcement event are identified as orphans and cleaned up. Empty orphans are always removed. Non-empty orphans are flagged in the report but only deleted when --purge-orphans is also passed, preventing accidental data loss.
-rw-r--r--src/cleanup_empty_repos.rs285
1 files changed, 276 insertions, 9 deletions
diff --git a/src/cleanup_empty_repos.rs b/src/cleanup_empty_repos.rs
index 8f5492d..e7eea61 100644
--- a/src/cleanup_empty_repos.rs
+++ b/src/cleanup_empty_repos.rs
@@ -11,6 +11,15 @@
11//! git data. If the bare repo is empty or absent, the events are stale and should be 11//! git data. If the bare repo is empty or absent, the events are stale and should be
12//! removed so the relay does not serve them. 12//! removed so the relay does not serve them.
13//! 13//!
14//! Two scans are performed:
15//!
16//! 1. **DB → filesystem**: finds 30617 events whose bare git repo is empty or missing.
17//! Both the 30617 and any matching 30618 events are removed.
18//!
19//! 2. **Filesystem → DB**: finds bare git repos on disk with no matching 30617 event.
20//! Empty orphan repos are always removed. Non-empty orphan repos are flagged and
21//! only removed when `--purge-orphans` is also passed.
22//!
14//! ## Usage 23//! ## Usage
15//! 24//!
16//! ```text 25//! ```text
@@ -22,6 +31,11 @@
22//! ngit-grasp cleanup-empty-repos --relay-data-path /var/lib/ngit-grasp/relay \ 31//! ngit-grasp cleanup-empty-repos --relay-data-path /var/lib/ngit-grasp/relay \
23//! --git-data-path /var/lib/ngit-grasp/git \ 32//! --git-data-path /var/lib/ngit-grasp/git \
24//! --execute 33//! --execute
34//!
35//! # Also purge non-empty orphan repos (no matching 30617 in DB)
36//! ngit-grasp cleanup-empty-repos --relay-data-path /var/lib/ngit-grasp/relay \
37//! --git-data-path /var/lib/ngit-grasp/git \
38//! --execute --purge-orphans
25//! ``` 39//! ```
26//! 40//!
27//! The relay service should be stopped before running with `--execute` to avoid 41//! The relay service should be stopped before running with `--execute` to avoid
@@ -59,6 +73,26 @@ pub struct CleanupArgs {
59 /// would be deleted. Stop the relay service before using this flag. 73 /// would be deleted. Stop the relay service before using this flag.
60 #[arg(long, default_value_t = false)] 74 #[arg(long, default_value_t = false)]
61 pub execute: bool, 75 pub execute: bool,
76
77 /// Also purge non-empty orphan git repos (repos on disk with no matching 30617 event).
78 ///
79 /// By default, non-empty orphan repos are flagged but not deleted. Pass this flag
80 /// together with `--execute` to permanently delete them. Use with caution.
81 #[arg(long, default_value_t = false)]
82 pub purge_orphans: bool,
83}
84
85/// A bare git repo on disk that has no matching kind 30617 event in the DB.
86#[derive(Debug)]
87struct OrphanRepo {
88 /// Absolute path to the bare repo directory
89 repo_path: PathBuf,
90 /// npub directory name (may not be a valid npub)
91 npub: String,
92 /// Repository directory name (e.g. "my-repo.git")
93 dir_name: String,
94 /// Whether the repo has any refs (non-empty)
95 has_data: bool,
62} 96}
63 97
64/// A repository that has an empty (or missing) bare git repo on disk. 98/// A repository that has an empty (or missing) bare git repo on disk.
@@ -170,8 +204,17 @@ pub async fn run(args: &CleanupArgs) -> Result<()> {
170 }); 204 });
171 } 205 }
172 206
173 if empty_repos.is_empty() { 207 // --- Filesystem → DB scan: orphan repos ---
174 println!("No empty repositories found. Nothing to do."); 208 println!("Scanning git data directory for orphan repos (no matching 30617 event)...");
209 let orphan_repos = find_orphan_repos(git_data_path, &database).await?;
210 println!(
211 "Found {} orphan repo(s) on disk with no matching 30617 event.",
212 orphan_repos.len()
213 );
214 println!();
215
216 if empty_repos.is_empty() && orphan_repos.is_empty() {
217 println!("Nothing to do.");
175 return Ok(()); 218 return Ok(());
176 } 219 }
177 220
@@ -211,13 +254,64 @@ pub async fn run(args: &CleanupArgs) -> Result<()> {
211 ); 254 );
212 } 255 }
213 256
214 println!(); 257 // Print orphan report
258 if !orphan_repos.is_empty() {
259 println!(
260 "Found {} orphan repo(s) on disk with no matching 30617 event:\n",
261 orphan_repos.len()
262 );
263 let mut empty_orphan_count = 0usize;
264 let mut nonempty_orphan_count = 0usize;
265 for (i, repo) in orphan_repos.iter().enumerate() {
266 let status = if repo.has_data {
267 nonempty_orphan_count += 1;
268 "NON-EMPTY (has git data)"
269 } else {
270 empty_orphan_count += 1;
271 "empty (no refs)"
272 };
273 println!(
274 " [{:>3}] {}/{} — {}",
275 i + 1,
276 repo.npub,
277 repo.dir_name,
278 status,
279 );
280 println!(" repo path: {}", repo.repo_path.display());
281 }
282 println!();
283 if nonempty_orphan_count > 0 {
284 println!(
285 " NOTE: {} non-empty orphan repo(s) will NOT be deleted unless --purge-orphans is passed.",
286 nonempty_orphan_count
287 );
288 }
289 if empty_orphan_count > 0 {
290 println!(
291 " NOTE: {} empty orphan repo(s) will be deleted (no git data to lose).",
292 empty_orphan_count
293 );
294 }
295 println!();
296 }
215 297
216 if !args.execute { 298 if !args.execute {
299 let would_delete = empty_repos.len()
300 + orphan_repos.iter().filter(|r| !r.has_data).count()
301 + if args.purge_orphans {
302 orphan_repos.iter().filter(|r| r.has_data).count()
303 } else {
304 0
305 };
217 println!( 306 println!(
218 "DRY-RUN: {} repository/repositories would be cleaned up.", 307 "DRY-RUN: {} item(s) would be cleaned up.",
219 empty_repos.len() 308 would_delete
220 ); 309 );
310 if orphan_repos.iter().any(|r| r.has_data) && !args.purge_orphans {
311 println!(
312 " (non-empty orphan repos flagged above would be skipped; add --purge-orphans to include them)"
313 );
314 }
221 println!("Run with --execute to perform the cleanup (stop the relay first)."); 315 println!("Run with --execute to perform the cleanup (stop the relay first).");
222 return Ok(()); 316 return Ok(());
223 } 317 }
@@ -328,18 +422,191 @@ pub async fn run(args: &CleanupArgs) -> Result<()> {
328 } 422 }
329 } 423 }
330 424
425 // --- Execute orphan repo cleanup ---
426 let mut deleted_orphan_repos = 0usize;
427 let mut skipped_nonempty_orphans = 0usize;
428
429 for repo in &orphan_repos {
430 if repo.has_data && !args.purge_orphans {
431 println!(
432 "SKIP (non-empty, --purge-orphans not set): {}/{} — {}",
433 repo.npub,
434 repo.dir_name,
435 repo.repo_path.display()
436 );
437 skipped_nonempty_orphans += 1;
438 continue;
439 }
440
441 println!(
442 "Deleting orphan repo {}/{} ({})...",
443 repo.npub,
444 repo.dir_name,
445 if repo.has_data { "non-empty" } else { "empty" }
446 );
447
448 match std::fs::remove_dir_all(&repo.repo_path) {
449 Ok(()) => {
450 println!(" Deleted git repo: {}", repo.repo_path.display());
451 deleted_orphan_repos += 1;
452
453 // Remove the parent npub directory if now empty
454 if let Some(npub_dir) = repo.repo_path.parent() {
455 if npub_dir.exists() {
456 match std::fs::read_dir(npub_dir) {
457 Ok(mut entries) => {
458 if entries.next().is_none() {
459 if let Err(e) = std::fs::remove_dir(npub_dir) {
460 eprintln!(
461 " WARN: Could not remove empty npub dir {}: {}",
462 npub_dir.display(),
463 e
464 );
465 } else {
466 println!(
467 " Removed empty npub dir: {}",
468 npub_dir.display()
469 );
470 }
471 }
472 }
473 Err(e) => {
474 eprintln!(
475 " WARN: Could not read npub dir {}: {}",
476 npub_dir.display(),
477 e
478 );
479 }
480 }
481 }
482 }
483 }
484 Err(e) => {
485 eprintln!(
486 " ERROR: Failed to delete orphan repo {}: {}",
487 repo.repo_path.display(),
488 e
489 );
490 failed_repos += 1;
491 }
492 }
493 }
494
331 println!(); 495 println!();
332 println!("=== Cleanup complete ==="); 496 println!("=== Cleanup complete ===");
333 println!(" Git repos deleted : {}", deleted_repos); 497 println!(" Git repos deleted (stale events) : {}", deleted_repos);
498 println!(" Git repos deleted (orphans) : {}", deleted_orphan_repos);
499 if skipped_nonempty_orphans > 0 {
500 println!(
501 " Non-empty orphans skipped : {} (re-run with --purge-orphans to delete)",
502 skipped_nonempty_orphans
503 );
504 }
334 if failed_repos > 0 { 505 if failed_repos > 0 {
335 println!(" Git repos failed : {} (see errors above)", failed_repos); 506 println!(" Git repos failed : {} (see errors above)", failed_repos);
336 } 507 }
337 println!(" 30617 events removed : {}", deleted_announcements); 508 println!(" 30617 events removed : {}", deleted_announcements);
338 println!(" 30618 events removed : {}", deleted_state_events); 509 println!(" 30618 events removed : {}", deleted_state_events);
339 510
340 Ok(()) 511 Ok(())
341} 512}
342 513
514/// Scan the git data directory for bare repos that have no matching 30617 event in the DB.
515///
516/// The expected layout is `<git_data_path>/<npub>/<identifier>.git`.
517/// Any directory under `<git_data_path>` that ends in `.git` and has no corresponding
518/// 30617 event (matched by pubkey + identifier d-tag) is returned as an orphan.
519async fn find_orphan_repos(
520 git_data_path: &Path,
521 database: &Arc<dyn NostrDatabase>,
522) -> Result<Vec<OrphanRepo>> {
523 let mut orphans = Vec::new();
524
525 // Iterate npub-level directories
526 let npub_entries = match std::fs::read_dir(git_data_path) {
527 Ok(e) => e,
528 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(orphans),
529 Err(e) => {
530 return Err(anyhow::anyhow!(
531 "Failed to read git data directory {}: {}",
532 git_data_path.display(),
533 e
534 ))
535 }
536 };
537
538 for npub_entry in npub_entries {
539 let npub_entry = npub_entry.context("Failed to read git data directory entry")?;
540 let npub_path = npub_entry.path();
541 if !npub_path.is_dir() {
542 continue;
543 }
544 let npub = npub_entry.file_name().to_string_lossy().into_owned();
545
546 // Iterate repo-level directories inside this npub dir
547 let repo_entries = match std::fs::read_dir(&npub_path) {
548 Ok(e) => e,
549 Err(e) => {
550 eprintln!(
551 " WARN: Could not read npub directory {}: {}",
552 npub_path.display(),
553 e
554 );
555 continue;
556 }
557 };
558
559 for repo_entry in repo_entries {
560 let repo_entry = repo_entry.context("Failed to read repo directory entry")?;
561 let repo_path = repo_entry.path();
562 if !repo_path.is_dir() {
563 continue;
564 }
565 let dir_name = repo_entry.file_name().to_string_lossy().into_owned();
566 if !dir_name.ends_with(".git") {
567 continue;
568 }
569
570 // Derive the identifier (strip .git suffix)
571 let identifier = dir_name.strip_suffix(".git").unwrap_or(&dir_name);
572
573 // Check whether a 30617 event exists for this (npub, identifier)
574 // We query by identifier d-tag; if the npub is not a valid bech32 pubkey
575 // we won't be able to filter by author, so we check the results manually.
576 let filter = Filter::new()
577 .kind(Kind::GitRepoAnnouncement)
578 .identifier(identifier.to_string());
579
580 let matching = database
581 .query(filter)
582 .await
583 .with_context(|| format!("Failed to query 30617 for identifier {}", identifier))?;
584
585 // Verify at least one event's owner npub matches the directory name
586 let has_event = matching.iter().any(|ev| {
587 ev.pubkey
588 .to_bech32()
589 .map(|n| n == npub)
590 .unwrap_or(false)
591 });
592
593 if has_event {
594 continue;
595 }
596
597 let (_, is_empty) = check_repo_empty(&repo_path);
598 orphans.push(OrphanRepo {
599 repo_path,
600 npub: npub.clone(),
601 dir_name,
602 has_data: !is_empty,
603 });
604 }
605 }
606
607 Ok(orphans)
608}
609
343/// Check whether a bare git repository is empty (has no refs). 610/// Check whether a bare git repository is empty (has no refs).
344/// 611///
345/// Returns `(exists, is_empty)`: 612/// Returns `(exists, is_empty)`: