diff options
| author | DanConwayDev <DanConwayDev@protonmail.com> | 2026-04-10 19:26:23 +0000 |
|---|---|---|
| committer | DanConwayDev <DanConwayDev@protonmail.com> | 2026-04-10 19:26:23 +0000 |
| commit | 8aef478c6b1e9e3f6ebbad6d57f59f1a84a261ea (patch) | |
| tree | 2aa56188b13fc4b8e6a2fdf302dea19afb65a5a7 /src/cleanup_empty_repos.rs | |
| parent | 2161e3c0a8169a85111cd6dc01ffe2b0fed1493f (diff) | |
feat: add cleanup-empty-repos subcommand to remove stale events for empty git repos
Adds a maintenance subcommand that scans the LMDB database for kind 30617
(repository announcement) events whose bare git repo on disk is empty or
missing, then removes both the 30617 and any matching 30618 (state) events.
A relay should not serve announcement or state events for a repository with
no git data. This was needed to clean up repos leaked by the bug fixed in
2161e3c, and is useful as an ongoing maintenance tool.
Usage (dry-run by default, stop relay before --execute):
ngit-grasp cleanup-empty-repos [--relay-data-path <path>] [--git-data-path <path>] [--execute]
The relay itself is now invoked as an implicit 'serve' subcommand, preserving
full backward compatibility with existing deployments and env-var configuration.
Diffstat (limited to 'src/cleanup_empty_repos.rs')
| -rw-r--r-- | src/cleanup_empty_repos.rs | 373 |
1 files changed, 373 insertions, 0 deletions
diff --git a/src/cleanup_empty_repos.rs b/src/cleanup_empty_repos.rs new file mode 100644 index 0000000..f1d1c3e --- /dev/null +++ b/src/cleanup_empty_repos.rs | |||
| @@ -0,0 +1,373 @@ | |||
| 1 | //! Cleanup Empty Repositories | ||
| 2 | //! | ||
| 3 | //! Scans the LMDB database for kind 30617 (repository announcement) events whose | ||
| 4 | //! corresponding bare git repository on disk is empty (no refs) or missing entirely. | ||
| 5 | //! For each such repository, also removes any kind 30618 (state) events for the same | ||
| 6 | //! (pubkey, identifier) coordinate. | ||
| 7 | //! | ||
| 8 | //! ## Rationale | ||
| 9 | //! | ||
| 10 | //! A relay should not store announcement or state events for a repository that has no | ||
| 11 | //! git data. If the bare repo is empty or absent, the events are stale and should be | ||
| 12 | //! removed so the relay does not serve them. | ||
| 13 | //! | ||
| 14 | //! ## Usage | ||
| 15 | //! | ||
| 16 | //! ```text | ||
| 17 | //! # Dry-run (default): print what would be deleted | ||
| 18 | //! ngit-grasp cleanup-empty-repos --relay-data-path /var/lib/ngit-grasp/relay \ | ||
| 19 | //! --git-data-path /var/lib/ngit-grasp/git | ||
| 20 | //! | ||
| 21 | //! # Execute: delete the bare repos and remove events from the DB | ||
| 22 | //! ngit-grasp cleanup-empty-repos --relay-data-path /var/lib/ngit-grasp/relay \ | ||
| 23 | //! --git-data-path /var/lib/ngit-grasp/git \ | ||
| 24 | //! --execute | ||
| 25 | //! ``` | ||
| 26 | //! | ||
| 27 | //! The relay service should be stopped before running with `--execute` to avoid | ||
| 28 | //! races with the live relay process. | ||
| 29 | |||
| 30 | use std::path::{Path, PathBuf}; | ||
| 31 | use std::process::Command; | ||
| 32 | use std::sync::Arc; | ||
| 33 | |||
| 34 | use anyhow::{Context, Result}; | ||
| 35 | use clap::Args; | ||
| 36 | use nostr_lmdb::NostrLmdb; | ||
| 37 | use nostr_sdk::prelude::*; | ||
| 38 | |||
| 39 | use crate::nostr::events::RepositoryAnnouncement; | ||
| 40 | |||
| 41 | /// Arguments for the `cleanup-empty-repos` subcommand. | ||
| 42 | #[derive(Debug, Args)] | ||
| 43 | pub struct CleanupArgs { | ||
| 44 | /// Path to the LMDB relay data directory (contains the nostr event database). | ||
| 45 | /// | ||
| 46 | /// Defaults to `./data/relay` (same default as the relay itself). | ||
| 47 | #[arg(long, env = "NGIT_RELAY_DATA_PATH", default_value = "./data/relay")] | ||
| 48 | pub relay_data_path: String, | ||
| 49 | |||
| 50 | /// Path to the git data directory (contains bare repositories). | ||
| 51 | /// | ||
| 52 | /// Defaults to `./data/git` (same default as the relay itself). | ||
| 53 | #[arg(long, env = "NGIT_GIT_DATA_PATH", default_value = "./data/git")] | ||
| 54 | pub git_data_path: String, | ||
| 55 | |||
| 56 | /// Actually delete empty repositories and remove their events from the database. | ||
| 57 | /// | ||
| 58 | /// Without this flag the command runs in dry-run mode and only prints what | ||
| 59 | /// would be deleted. Stop the relay service before using this flag. | ||
| 60 | #[arg(long, default_value_t = false)] | ||
| 61 | pub execute: bool, | ||
| 62 | } | ||
| 63 | |||
| 64 | /// A repository that has an empty (or missing) bare git repo on disk. | ||
| 65 | #[derive(Debug)] | ||
| 66 | struct EmptyRepo { | ||
| 67 | /// The kind 30617 event | ||
| 68 | announcement: Event, | ||
| 69 | /// Derived npub (bech32) of the owner | ||
| 70 | npub: String, | ||
| 71 | /// Repository identifier (d-tag value) | ||
| 72 | identifier: String, | ||
| 73 | /// Absolute path to the bare repo directory | ||
| 74 | repo_path: PathBuf, | ||
| 75 | /// Whether the directory exists at all (vs exists but is empty) | ||
| 76 | repo_exists: bool, | ||
| 77 | /// Any kind 30618 state events found in the local DB for this coordinate | ||
| 78 | state_events: Vec<Event>, | ||
| 79 | } | ||
| 80 | |||
| 81 | /// Run the cleanup-empty-repos subcommand. | ||
| 82 | pub async fn run(args: &CleanupArgs) -> Result<()> { | ||
| 83 | let relay_data_path = Path::new(&args.relay_data_path); | ||
| 84 | let git_data_path = Path::new(&args.git_data_path); | ||
| 85 | |||
| 86 | if args.execute { | ||
| 87 | println!("=== cleanup-empty-repos (EXECUTE MODE) ==="); | ||
| 88 | println!("WARNING: This will permanently delete data. The relay should be stopped."); | ||
| 89 | println!(); | ||
| 90 | } else { | ||
| 91 | println!("=== cleanup-empty-repos (DRY-RUN MODE) ==="); | ||
| 92 | println!("Pass --execute to actually delete. Stop the relay first."); | ||
| 93 | println!(); | ||
| 94 | } | ||
| 95 | |||
| 96 | println!("Relay data path : {}", relay_data_path.display()); | ||
| 97 | println!("Git data path : {}", git_data_path.display()); | ||
| 98 | println!(); | ||
| 99 | |||
| 100 | // Open the LMDB database | ||
| 101 | println!("Opening LMDB database..."); | ||
| 102 | let database: Arc<dyn NostrDatabase> = Arc::new( | ||
| 103 | NostrLmdb::open(relay_data_path) | ||
| 104 | .await | ||
| 105 | .with_context(|| format!("Failed to open LMDB at {}", relay_data_path.display()))?, | ||
| 106 | ); | ||
| 107 | println!("Database opened."); | ||
| 108 | println!(); | ||
| 109 | |||
| 110 | // Query all kind 30617 events | ||
| 111 | let filter = Filter::new().kind(Kind::GitRepoAnnouncement); | ||
| 112 | let announcements = database | ||
| 113 | .query(filter) | ||
| 114 | .await | ||
| 115 | .context("Failed to query kind 30617 events")?; | ||
| 116 | |||
| 117 | println!("Found {} kind 30617 announcement(s) in database.", announcements.len()); | ||
| 118 | println!(); | ||
| 119 | |||
| 120 | // Identify empty repos | ||
| 121 | let mut empty_repos: Vec<EmptyRepo> = Vec::new(); | ||
| 122 | |||
| 123 | for event in announcements.iter() { | ||
| 124 | let announcement = match RepositoryAnnouncement::from_event(event.clone()) { | ||
| 125 | Ok(a) => a, | ||
| 126 | Err(e) => { | ||
| 127 | eprintln!( | ||
| 128 | " WARN: Could not parse announcement {} (skipping): {}", | ||
| 129 | event.id.to_hex(), | ||
| 130 | e | ||
| 131 | ); | ||
| 132 | continue; | ||
| 133 | } | ||
| 134 | }; | ||
| 135 | |||
| 136 | let npub = announcement.owner_npub(); | ||
| 137 | let identifier = announcement.identifier.clone(); | ||
| 138 | let repo_path = git_data_path.join(&announcement.repo_path()); | ||
| 139 | |||
| 140 | let (repo_exists, is_empty) = check_repo_empty(&repo_path); | ||
| 141 | |||
| 142 | if !is_empty { | ||
| 143 | // Repo has git data — leave it alone | ||
| 144 | continue; | ||
| 145 | } | ||
| 146 | |||
| 147 | // Look up any kind 30618 state events for this (pubkey, identifier) in the local DB | ||
| 148 | let state_filter = Filter::new() | ||
| 149 | .kind(Kind::RepoState) | ||
| 150 | .author(event.pubkey) | ||
| 151 | .identifier(identifier.clone()); | ||
| 152 | |||
| 153 | let state_events = database | ||
| 154 | .query(state_filter) | ||
| 155 | .await | ||
| 156 | .with_context(|| { | ||
| 157 | format!( | ||
| 158 | "Failed to query kind 30618 for {}/{}", | ||
| 159 | npub, identifier | ||
| 160 | ) | ||
| 161 | })?; | ||
| 162 | |||
| 163 | empty_repos.push(EmptyRepo { | ||
| 164 | announcement: event.clone(), | ||
| 165 | npub, | ||
| 166 | identifier, | ||
| 167 | repo_path, | ||
| 168 | repo_exists, | ||
| 169 | state_events: state_events.into_iter().collect(), | ||
| 170 | }); | ||
| 171 | } | ||
| 172 | |||
| 173 | if empty_repos.is_empty() { | ||
| 174 | println!("No empty repositories found. Nothing to do."); | ||
| 175 | return Ok(()); | ||
| 176 | } | ||
| 177 | |||
| 178 | // Print report | ||
| 179 | println!( | ||
| 180 | "Found {} repository/repositories with empty or missing git data:\n", | ||
| 181 | empty_repos.len() | ||
| 182 | ); | ||
| 183 | |||
| 184 | for (i, repo) in empty_repos.iter().enumerate() { | ||
| 185 | let repo_status = if repo.repo_exists { | ||
| 186 | "exists but empty (no refs)" | ||
| 187 | } else { | ||
| 188 | "missing from disk" | ||
| 189 | }; | ||
| 190 | println!( | ||
| 191 | " [{:>3}] {}/{} — git repo {}", | ||
| 192 | i + 1, | ||
| 193 | repo.npub, | ||
| 194 | repo.identifier, | ||
| 195 | repo_status, | ||
| 196 | ); | ||
| 197 | println!( | ||
| 198 | " 30617 event : {}", | ||
| 199 | repo.announcement.id.to_hex() | ||
| 200 | ); | ||
| 201 | if repo.state_events.is_empty() { | ||
| 202 | println!(" 30618 events: none in local DB"); | ||
| 203 | } else { | ||
| 204 | for se in &repo.state_events { | ||
| 205 | println!(" 30618 event : {}", se.id.to_hex()); | ||
| 206 | } | ||
| 207 | } | ||
| 208 | println!( | ||
| 209 | " repo path : {}", | ||
| 210 | repo.repo_path.display() | ||
| 211 | ); | ||
| 212 | } | ||
| 213 | |||
| 214 | println!(); | ||
| 215 | |||
| 216 | if !args.execute { | ||
| 217 | println!( | ||
| 218 | "DRY-RUN: {} repository/repositories would be cleaned up.", | ||
| 219 | empty_repos.len() | ||
| 220 | ); | ||
| 221 | println!("Run with --execute to perform the cleanup (stop the relay first)."); | ||
| 222 | return Ok(()); | ||
| 223 | } | ||
| 224 | |||
| 225 | // Execute: delete repos and remove events | ||
| 226 | println!("Executing cleanup..."); | ||
| 227 | println!(); | ||
| 228 | |||
| 229 | let mut deleted_repos = 0usize; | ||
| 230 | let mut failed_repos = 0usize; | ||
| 231 | let mut deleted_announcements = 0usize; | ||
| 232 | let mut deleted_state_events = 0usize; | ||
| 233 | |||
| 234 | for repo in &empty_repos { | ||
| 235 | println!("Cleaning up {}/{}...", repo.npub, repo.identifier); | ||
| 236 | |||
| 237 | // 1. Delete the bare repo directory (if it exists) | ||
| 238 | if repo.repo_exists { | ||
| 239 | match std::fs::remove_dir_all(&repo.repo_path) { | ||
| 240 | Ok(()) => { | ||
| 241 | println!(" Deleted git repo: {}", repo.repo_path.display()); | ||
| 242 | deleted_repos += 1; | ||
| 243 | |||
| 244 | // Remove the parent npub directory if now empty | ||
| 245 | if let Some(npub_dir) = repo.repo_path.parent() { | ||
| 246 | if npub_dir.exists() { | ||
| 247 | match std::fs::read_dir(npub_dir) { | ||
| 248 | Ok(mut entries) => { | ||
| 249 | if entries.next().is_none() { | ||
| 250 | if let Err(e) = std::fs::remove_dir(npub_dir) { | ||
| 251 | eprintln!( | ||
| 252 | " WARN: Could not remove empty npub dir {}: {}", | ||
| 253 | npub_dir.display(), | ||
| 254 | e | ||
| 255 | ); | ||
| 256 | } else { | ||
| 257 | println!( | ||
| 258 | " Removed empty npub dir: {}", | ||
| 259 | npub_dir.display() | ||
| 260 | ); | ||
| 261 | } | ||
| 262 | } | ||
| 263 | } | ||
| 264 | Err(e) => { | ||
| 265 | eprintln!( | ||
| 266 | " WARN: Could not read npub dir {}: {}", | ||
| 267 | npub_dir.display(), | ||
| 268 | e | ||
| 269 | ); | ||
| 270 | } | ||
| 271 | } | ||
| 272 | } | ||
| 273 | } | ||
| 274 | } | ||
| 275 | Err(e) => { | ||
| 276 | eprintln!( | ||
| 277 | " ERROR: Failed to delete git repo {}: {}", | ||
| 278 | repo.repo_path.display(), | ||
| 279 | e | ||
| 280 | ); | ||
| 281 | failed_repos += 1; | ||
| 282 | // Continue — still try to remove the DB events | ||
| 283 | } | ||
| 284 | } | ||
| 285 | } | ||
| 286 | |||
| 287 | // 2. Remove the kind 30617 announcement from the DB | ||
| 288 | // Use a filter matching the specific event ID so we only delete this exact event. | ||
| 289 | let announcement_filter = Filter::new() | ||
| 290 | .kind(Kind::GitRepoAnnouncement) | ||
| 291 | .id(repo.announcement.id); | ||
| 292 | |||
| 293 | match database.delete(announcement_filter).await { | ||
| 294 | Ok(()) => { | ||
| 295 | println!(" Deleted 30617 event: {}", repo.announcement.id.to_hex()); | ||
| 296 | deleted_announcements += 1; | ||
| 297 | } | ||
| 298 | Err(e) => { | ||
| 299 | eprintln!( | ||
| 300 | " ERROR: Failed to delete 30617 event {}: {}", | ||
| 301 | repo.announcement.id.to_hex(), | ||
| 302 | e | ||
| 303 | ); | ||
| 304 | } | ||
| 305 | } | ||
| 306 | |||
| 307 | // 3. Remove any kind 30618 state events for this coordinate | ||
| 308 | if !repo.state_events.is_empty() { | ||
| 309 | let state_filter = Filter::new() | ||
| 310 | .kind(Kind::RepoState) | ||
| 311 | .author(repo.announcement.pubkey) | ||
| 312 | .identifier(repo.identifier.clone()); | ||
| 313 | |||
| 314 | match database.delete(state_filter).await { | ||
| 315 | Ok(()) => { | ||
| 316 | for se in &repo.state_events { | ||
| 317 | println!(" Deleted 30618 event: {}", se.id.to_hex()); | ||
| 318 | deleted_state_events += 1; | ||
| 319 | } | ||
| 320 | } | ||
| 321 | Err(e) => { | ||
| 322 | eprintln!( | ||
| 323 | " ERROR: Failed to delete 30618 events for {}/{}: {}", | ||
| 324 | repo.npub, repo.identifier, e | ||
| 325 | ); | ||
| 326 | } | ||
| 327 | } | ||
| 328 | } | ||
| 329 | } | ||
| 330 | |||
| 331 | println!(); | ||
| 332 | println!("=== Cleanup complete ==="); | ||
| 333 | println!(" Git repos deleted : {}", deleted_repos); | ||
| 334 | if failed_repos > 0 { | ||
| 335 | println!(" Git repos failed : {} (see errors above)", failed_repos); | ||
| 336 | } | ||
| 337 | println!(" 30617 events removed : {}", deleted_announcements); | ||
| 338 | println!(" 30618 events removed : {}", deleted_state_events); | ||
| 339 | |||
| 340 | Ok(()) | ||
| 341 | } | ||
| 342 | |||
| 343 | /// Check whether a bare git repository is empty (has no refs). | ||
| 344 | /// | ||
| 345 | /// Returns `(exists, is_empty)`: | ||
| 346 | /// - `(false, true)` — path does not exist (treated as empty) | ||
| 347 | /// - `(true, true)` — path exists but `git for-each-ref` returns no output | ||
| 348 | /// - `(true, false)` — path exists and has at least one ref | ||
| 349 | fn check_repo_empty(repo_path: &Path) -> (bool, bool) { | ||
| 350 | if !repo_path.exists() { | ||
| 351 | return (false, true); | ||
| 352 | } | ||
| 353 | |||
| 354 | // Run `git for-each-ref --git-dir=<path>` — empty output means no refs | ||
| 355 | let output = Command::new("git") | ||
| 356 | .args(["for-each-ref", "--format=%(refname)"]) | ||
| 357 | .arg("--git-dir") | ||
| 358 | .arg(repo_path) | ||
| 359 | .output(); | ||
| 360 | |||
| 361 | match output { | ||
| 362 | Ok(out) => { | ||
| 363 | // Trim whitespace; if nothing remains, the repo is empty | ||
| 364 | let stdout = String::from_utf8_lossy(&out.stdout); | ||
| 365 | let is_empty = stdout.trim().is_empty(); | ||
| 366 | (true, is_empty) | ||
| 367 | } | ||
| 368 | Err(_) => { | ||
| 369 | // Could not run git — treat as empty to be safe (will be reported) | ||
| 370 | (true, true) | ||
| 371 | } | ||
| 372 | } | ||
| 373 | } | ||