diff options
| -rw-r--r-- | src/cleanup_empty_repos.rs | 373 | ||||
| -rw-r--r-- | src/config.rs | 2 | ||||
| -rw-r--r-- | src/lib.rs | 1 | ||||
| -rw-r--r-- | src/main.rs | 57 |
4 files changed, 430 insertions, 3 deletions
diff --git a/src/cleanup_empty_repos.rs b/src/cleanup_empty_repos.rs new file mode 100644 index 0000000..f1d1c3e --- /dev/null +++ b/src/cleanup_empty_repos.rs | |||
| @@ -0,0 +1,373 @@ | |||
| 1 | //! Cleanup Empty Repositories | ||
| 2 | //! | ||
| 3 | //! Scans the LMDB database for kind 30617 (repository announcement) events whose | ||
| 4 | //! corresponding bare git repository on disk is empty (no refs) or missing entirely. | ||
| 5 | //! For each such repository, also removes any kind 30618 (state) events for the same | ||
| 6 | //! (pubkey, identifier) coordinate. | ||
| 7 | //! | ||
| 8 | //! ## Rationale | ||
| 9 | //! | ||
| 10 | //! A relay should not store announcement or state events for a repository that has no | ||
| 11 | //! git data. If the bare repo is empty or absent, the events are stale and should be | ||
| 12 | //! removed so the relay does not serve them. | ||
| 13 | //! | ||
| 14 | //! ## Usage | ||
| 15 | //! | ||
| 16 | //! ```text | ||
| 17 | //! # Dry-run (default): print what would be deleted | ||
| 18 | //! ngit-grasp cleanup-empty-repos --relay-data-path /var/lib/ngit-grasp/relay \ | ||
| 19 | //! --git-data-path /var/lib/ngit-grasp/git | ||
| 20 | //! | ||
| 21 | //! # Execute: delete the bare repos and remove events from the DB | ||
| 22 | //! ngit-grasp cleanup-empty-repos --relay-data-path /var/lib/ngit-grasp/relay \ | ||
| 23 | //! --git-data-path /var/lib/ngit-grasp/git \ | ||
| 24 | //! --execute | ||
| 25 | //! ``` | ||
| 26 | //! | ||
| 27 | //! The relay service should be stopped before running with `--execute` to avoid | ||
| 28 | //! races with the live relay process. | ||
| 29 | |||
| 30 | use std::path::{Path, PathBuf}; | ||
| 31 | use std::process::Command; | ||
| 32 | use std::sync::Arc; | ||
| 33 | |||
| 34 | use anyhow::{Context, Result}; | ||
| 35 | use clap::Args; | ||
| 36 | use nostr_lmdb::NostrLmdb; | ||
| 37 | use nostr_sdk::prelude::*; | ||
| 38 | |||
| 39 | use crate::nostr::events::RepositoryAnnouncement; | ||
| 40 | |||
| 41 | /// Arguments for the `cleanup-empty-repos` subcommand. | ||
| 42 | #[derive(Debug, Args)] | ||
| 43 | pub struct CleanupArgs { | ||
| 44 | /// Path to the LMDB relay data directory (contains the nostr event database). | ||
| 45 | /// | ||
| 46 | /// Defaults to `./data/relay` (same default as the relay itself). | ||
| 47 | #[arg(long, env = "NGIT_RELAY_DATA_PATH", default_value = "./data/relay")] | ||
| 48 | pub relay_data_path: String, | ||
| 49 | |||
| 50 | /// Path to the git data directory (contains bare repositories). | ||
| 51 | /// | ||
| 52 | /// Defaults to `./data/git` (same default as the relay itself). | ||
| 53 | #[arg(long, env = "NGIT_GIT_DATA_PATH", default_value = "./data/git")] | ||
| 54 | pub git_data_path: String, | ||
| 55 | |||
| 56 | /// Actually delete empty repositories and remove their events from the database. | ||
| 57 | /// | ||
| 58 | /// Without this flag the command runs in dry-run mode and only prints what | ||
| 59 | /// would be deleted. Stop the relay service before using this flag. | ||
| 60 | #[arg(long, default_value_t = false)] | ||
| 61 | pub execute: bool, | ||
| 62 | } | ||
| 63 | |||
| 64 | /// A repository that has an empty (or missing) bare git repo on disk. | ||
| 65 | #[derive(Debug)] | ||
| 66 | struct EmptyRepo { | ||
| 67 | /// The kind 30617 event | ||
| 68 | announcement: Event, | ||
| 69 | /// Derived npub (bech32) of the owner | ||
| 70 | npub: String, | ||
| 71 | /// Repository identifier (d-tag value) | ||
| 72 | identifier: String, | ||
| 73 | /// Absolute path to the bare repo directory | ||
| 74 | repo_path: PathBuf, | ||
| 75 | /// Whether the directory exists at all (vs exists but is empty) | ||
| 76 | repo_exists: bool, | ||
| 77 | /// Any kind 30618 state events found in the local DB for this coordinate | ||
| 78 | state_events: Vec<Event>, | ||
| 79 | } | ||
| 80 | |||
| 81 | /// Run the cleanup-empty-repos subcommand. | ||
| 82 | pub async fn run(args: &CleanupArgs) -> Result<()> { | ||
| 83 | let relay_data_path = Path::new(&args.relay_data_path); | ||
| 84 | let git_data_path = Path::new(&args.git_data_path); | ||
| 85 | |||
| 86 | if args.execute { | ||
| 87 | println!("=== cleanup-empty-repos (EXECUTE MODE) ==="); | ||
| 88 | println!("WARNING: This will permanently delete data. The relay should be stopped."); | ||
| 89 | println!(); | ||
| 90 | } else { | ||
| 91 | println!("=== cleanup-empty-repos (DRY-RUN MODE) ==="); | ||
| 92 | println!("Pass --execute to actually delete. Stop the relay first."); | ||
| 93 | println!(); | ||
| 94 | } | ||
| 95 | |||
| 96 | println!("Relay data path : {}", relay_data_path.display()); | ||
| 97 | println!("Git data path : {}", git_data_path.display()); | ||
| 98 | println!(); | ||
| 99 | |||
| 100 | // Open the LMDB database | ||
| 101 | println!("Opening LMDB database..."); | ||
| 102 | let database: Arc<dyn NostrDatabase> = Arc::new( | ||
| 103 | NostrLmdb::open(relay_data_path) | ||
| 104 | .await | ||
| 105 | .with_context(|| format!("Failed to open LMDB at {}", relay_data_path.display()))?, | ||
| 106 | ); | ||
| 107 | println!("Database opened."); | ||
| 108 | println!(); | ||
| 109 | |||
| 110 | // Query all kind 30617 events | ||
| 111 | let filter = Filter::new().kind(Kind::GitRepoAnnouncement); | ||
| 112 | let announcements = database | ||
| 113 | .query(filter) | ||
| 114 | .await | ||
| 115 | .context("Failed to query kind 30617 events")?; | ||
| 116 | |||
| 117 | println!("Found {} kind 30617 announcement(s) in database.", announcements.len()); | ||
| 118 | println!(); | ||
| 119 | |||
| 120 | // Identify empty repos | ||
| 121 | let mut empty_repos: Vec<EmptyRepo> = Vec::new(); | ||
| 122 | |||
| 123 | for event in announcements.iter() { | ||
| 124 | let announcement = match RepositoryAnnouncement::from_event(event.clone()) { | ||
| 125 | Ok(a) => a, | ||
| 126 | Err(e) => { | ||
| 127 | eprintln!( | ||
| 128 | " WARN: Could not parse announcement {} (skipping): {}", | ||
| 129 | event.id.to_hex(), | ||
| 130 | e | ||
| 131 | ); | ||
| 132 | continue; | ||
| 133 | } | ||
| 134 | }; | ||
| 135 | |||
| 136 | let npub = announcement.owner_npub(); | ||
| 137 | let identifier = announcement.identifier.clone(); | ||
| 138 | let repo_path = git_data_path.join(&announcement.repo_path()); | ||
| 139 | |||
| 140 | let (repo_exists, is_empty) = check_repo_empty(&repo_path); | ||
| 141 | |||
| 142 | if !is_empty { | ||
| 143 | // Repo has git data — leave it alone | ||
| 144 | continue; | ||
| 145 | } | ||
| 146 | |||
| 147 | // Look up any kind 30618 state events for this (pubkey, identifier) in the local DB | ||
| 148 | let state_filter = Filter::new() | ||
| 149 | .kind(Kind::RepoState) | ||
| 150 | .author(event.pubkey) | ||
| 151 | .identifier(identifier.clone()); | ||
| 152 | |||
| 153 | let state_events = database | ||
| 154 | .query(state_filter) | ||
| 155 | .await | ||
| 156 | .with_context(|| { | ||
| 157 | format!( | ||
| 158 | "Failed to query kind 30618 for {}/{}", | ||
| 159 | npub, identifier | ||
| 160 | ) | ||
| 161 | })?; | ||
| 162 | |||
| 163 | empty_repos.push(EmptyRepo { | ||
| 164 | announcement: event.clone(), | ||
| 165 | npub, | ||
| 166 | identifier, | ||
| 167 | repo_path, | ||
| 168 | repo_exists, | ||
| 169 | state_events: state_events.into_iter().collect(), | ||
| 170 | }); | ||
| 171 | } | ||
| 172 | |||
| 173 | if empty_repos.is_empty() { | ||
| 174 | println!("No empty repositories found. Nothing to do."); | ||
| 175 | return Ok(()); | ||
| 176 | } | ||
| 177 | |||
| 178 | // Print report | ||
| 179 | println!( | ||
| 180 | "Found {} repository/repositories with empty or missing git data:\n", | ||
| 181 | empty_repos.len() | ||
| 182 | ); | ||
| 183 | |||
| 184 | for (i, repo) in empty_repos.iter().enumerate() { | ||
| 185 | let repo_status = if repo.repo_exists { | ||
| 186 | "exists but empty (no refs)" | ||
| 187 | } else { | ||
| 188 | "missing from disk" | ||
| 189 | }; | ||
| 190 | println!( | ||
| 191 | " [{:>3}] {}/{} — git repo {}", | ||
| 192 | i + 1, | ||
| 193 | repo.npub, | ||
| 194 | repo.identifier, | ||
| 195 | repo_status, | ||
| 196 | ); | ||
| 197 | println!( | ||
| 198 | " 30617 event : {}", | ||
| 199 | repo.announcement.id.to_hex() | ||
| 200 | ); | ||
| 201 | if repo.state_events.is_empty() { | ||
| 202 | println!(" 30618 events: none in local DB"); | ||
| 203 | } else { | ||
| 204 | for se in &repo.state_events { | ||
| 205 | println!(" 30618 event : {}", se.id.to_hex()); | ||
| 206 | } | ||
| 207 | } | ||
| 208 | println!( | ||
| 209 | " repo path : {}", | ||
| 210 | repo.repo_path.display() | ||
| 211 | ); | ||
| 212 | } | ||
| 213 | |||
| 214 | println!(); | ||
| 215 | |||
| 216 | if !args.execute { | ||
| 217 | println!( | ||
| 218 | "DRY-RUN: {} repository/repositories would be cleaned up.", | ||
| 219 | empty_repos.len() | ||
| 220 | ); | ||
| 221 | println!("Run with --execute to perform the cleanup (stop the relay first)."); | ||
| 222 | return Ok(()); | ||
| 223 | } | ||
| 224 | |||
| 225 | // Execute: delete repos and remove events | ||
| 226 | println!("Executing cleanup..."); | ||
| 227 | println!(); | ||
| 228 | |||
| 229 | let mut deleted_repos = 0usize; | ||
| 230 | let mut failed_repos = 0usize; | ||
| 231 | let mut deleted_announcements = 0usize; | ||
| 232 | let mut deleted_state_events = 0usize; | ||
| 233 | |||
| 234 | for repo in &empty_repos { | ||
| 235 | println!("Cleaning up {}/{}...", repo.npub, repo.identifier); | ||
| 236 | |||
| 237 | // 1. Delete the bare repo directory (if it exists) | ||
| 238 | if repo.repo_exists { | ||
| 239 | match std::fs::remove_dir_all(&repo.repo_path) { | ||
| 240 | Ok(()) => { | ||
| 241 | println!(" Deleted git repo: {}", repo.repo_path.display()); | ||
| 242 | deleted_repos += 1; | ||
| 243 | |||
| 244 | // Remove the parent npub directory if now empty | ||
| 245 | if let Some(npub_dir) = repo.repo_path.parent() { | ||
| 246 | if npub_dir.exists() { | ||
| 247 | match std::fs::read_dir(npub_dir) { | ||
| 248 | Ok(mut entries) => { | ||
| 249 | if entries.next().is_none() { | ||
| 250 | if let Err(e) = std::fs::remove_dir(npub_dir) { | ||
| 251 | eprintln!( | ||
| 252 | " WARN: Could not remove empty npub dir {}: {}", | ||
| 253 | npub_dir.display(), | ||
| 254 | e | ||
| 255 | ); | ||
| 256 | } else { | ||
| 257 | println!( | ||
| 258 | " Removed empty npub dir: {}", | ||
| 259 | npub_dir.display() | ||
| 260 | ); | ||
| 261 | } | ||
| 262 | } | ||
| 263 | } | ||
| 264 | Err(e) => { | ||
| 265 | eprintln!( | ||
| 266 | " WARN: Could not read npub dir {}: {}", | ||
| 267 | npub_dir.display(), | ||
| 268 | e | ||
| 269 | ); | ||
| 270 | } | ||
| 271 | } | ||
| 272 | } | ||
| 273 | } | ||
| 274 | } | ||
| 275 | Err(e) => { | ||
| 276 | eprintln!( | ||
| 277 | " ERROR: Failed to delete git repo {}: {}", | ||
| 278 | repo.repo_path.display(), | ||
| 279 | e | ||
| 280 | ); | ||
| 281 | failed_repos += 1; | ||
| 282 | // Continue — still try to remove the DB events | ||
| 283 | } | ||
| 284 | } | ||
| 285 | } | ||
| 286 | |||
| 287 | // 2. Remove the kind 30617 announcement from the DB | ||
| 288 | // Use a filter matching the specific event ID so we only delete this exact event. | ||
| 289 | let announcement_filter = Filter::new() | ||
| 290 | .kind(Kind::GitRepoAnnouncement) | ||
| 291 | .id(repo.announcement.id); | ||
| 292 | |||
| 293 | match database.delete(announcement_filter).await { | ||
| 294 | Ok(()) => { | ||
| 295 | println!(" Deleted 30617 event: {}", repo.announcement.id.to_hex()); | ||
| 296 | deleted_announcements += 1; | ||
| 297 | } | ||
| 298 | Err(e) => { | ||
| 299 | eprintln!( | ||
| 300 | " ERROR: Failed to delete 30617 event {}: {}", | ||
| 301 | repo.announcement.id.to_hex(), | ||
| 302 | e | ||
| 303 | ); | ||
| 304 | } | ||
| 305 | } | ||
| 306 | |||
| 307 | // 3. Remove any kind 30618 state events for this coordinate | ||
| 308 | if !repo.state_events.is_empty() { | ||
| 309 | let state_filter = Filter::new() | ||
| 310 | .kind(Kind::RepoState) | ||
| 311 | .author(repo.announcement.pubkey) | ||
| 312 | .identifier(repo.identifier.clone()); | ||
| 313 | |||
| 314 | match database.delete(state_filter).await { | ||
| 315 | Ok(()) => { | ||
| 316 | for se in &repo.state_events { | ||
| 317 | println!(" Deleted 30618 event: {}", se.id.to_hex()); | ||
| 318 | deleted_state_events += 1; | ||
| 319 | } | ||
| 320 | } | ||
| 321 | Err(e) => { | ||
| 322 | eprintln!( | ||
| 323 | " ERROR: Failed to delete 30618 events for {}/{}: {}", | ||
| 324 | repo.npub, repo.identifier, e | ||
| 325 | ); | ||
| 326 | } | ||
| 327 | } | ||
| 328 | } | ||
| 329 | } | ||
| 330 | |||
| 331 | println!(); | ||
| 332 | println!("=== Cleanup complete ==="); | ||
| 333 | println!(" Git repos deleted : {}", deleted_repos); | ||
| 334 | if failed_repos > 0 { | ||
| 335 | println!(" Git repos failed : {} (see errors above)", failed_repos); | ||
| 336 | } | ||
| 337 | println!(" 30617 events removed : {}", deleted_announcements); | ||
| 338 | println!(" 30618 events removed : {}", deleted_state_events); | ||
| 339 | |||
| 340 | Ok(()) | ||
| 341 | } | ||
| 342 | |||
| 343 | /// Check whether a bare git repository is empty (has no refs). | ||
| 344 | /// | ||
| 345 | /// Returns `(exists, is_empty)`: | ||
| 346 | /// - `(false, true)` — path does not exist (treated as empty) | ||
| 347 | /// - `(true, true)` — path exists but `git for-each-ref` returns no output | ||
| 348 | /// - `(true, false)` — path exists and has at least one ref | ||
| 349 | fn check_repo_empty(repo_path: &Path) -> (bool, bool) { | ||
| 350 | if !repo_path.exists() { | ||
| 351 | return (false, true); | ||
| 352 | } | ||
| 353 | |||
| 354 | // Run `git for-each-ref --git-dir=<path>` — empty output means no refs | ||
| 355 | let output = Command::new("git") | ||
| 356 | .args(["for-each-ref", "--format=%(refname)"]) | ||
| 357 | .arg("--git-dir") | ||
| 358 | .arg(repo_path) | ||
| 359 | .output(); | ||
| 360 | |||
| 361 | match output { | ||
| 362 | Ok(out) => { | ||
| 363 | // Trim whitespace; if nothing remains, the repo is empty | ||
| 364 | let stdout = String::from_utf8_lossy(&out.stdout); | ||
| 365 | let is_empty = stdout.trim().is_empty(); | ||
| 366 | (true, is_empty) | ||
| 367 | } | ||
| 368 | Err(_) => { | ||
| 369 | // Could not run git — treat as empty to be safe (will be reported) | ||
| 370 | (true, true) | ||
| 371 | } | ||
| 372 | } | ||
| 373 | } | ||
diff --git a/src/config.rs b/src/config.rs index 30e77ab..4dd396a 100644 --- a/src/config.rs +++ b/src/config.rs | |||
| @@ -506,7 +506,7 @@ impl Config { | |||
| 506 | } | 506 | } |
| 507 | 507 | ||
| 508 | /// Load relay owner key from file, or generate and save a new one | 508 | /// Load relay owner key from file, or generate and save a new one |
| 509 | fn load_or_generate_relay_owner_key() -> Result<String> { | 509 | pub fn load_or_generate_relay_owner_key() -> Result<String> { |
| 510 | let key_path = PathBuf::from(Self::RELAY_OWNER_KEY_FILE); | 510 | let key_path = PathBuf::from(Self::RELAY_OWNER_KEY_FILE); |
| 511 | 511 | ||
| 512 | // Try to load existing key | 512 | // Try to load existing key |
| @@ -1,4 +1,5 @@ | |||
| 1 | pub mod audit_cleanup; | 1 | pub mod audit_cleanup; |
| 2 | pub mod cleanup_empty_repos; | ||
| 2 | pub mod config; | 3 | pub mod config; |
| 3 | pub mod git; | 4 | pub mod git; |
| 4 | pub mod http; | 5 | pub mod http; |
diff --git a/src/main.rs b/src/main.rs index 12a875c..bc77fcb 100644 --- a/src/main.rs +++ b/src/main.rs | |||
| @@ -2,12 +2,14 @@ use std::time::Duration; | |||
| 2 | use std::{path::PathBuf, sync::Arc}; | 2 | use std::{path::PathBuf, sync::Arc}; |
| 3 | 3 | ||
| 4 | use anyhow::Result; | 4 | use anyhow::Result; |
| 5 | use clap::Parser; | ||
| 5 | use tokio::signal; | 6 | use tokio::signal; |
| 6 | use tracing::{error, info, warn}; | 7 | use tracing::{error, info, warn}; |
| 7 | use tracing_subscriber::{EnvFilter, FmtSubscriber}; | 8 | use tracing_subscriber::{EnvFilter, FmtSubscriber}; |
| 8 | 9 | ||
| 9 | use ngit_grasp::{ | 10 | use ngit_grasp::{ |
| 10 | audit_cleanup, | 11 | audit_cleanup, |
| 12 | cleanup_empty_repos, | ||
| 11 | config::{Config, DatabaseBackend}, | 13 | config::{Config, DatabaseBackend}, |
| 12 | git, http, | 14 | git, http, |
| 13 | metrics::Metrics, | 15 | metrics::Metrics, |
| @@ -16,10 +18,61 @@ use ngit_grasp::{ | |||
| 16 | sync::{naughty_list::NaughtyListTracker, SyncManager}, | 18 | sync::{naughty_list::NaughtyListTracker, SyncManager}, |
| 17 | }; | 19 | }; |
| 18 | 20 | ||
| 21 | /// Top-level CLI dispatcher. | ||
| 22 | /// | ||
| 23 | /// With no subcommand the binary runs the relay (all relay flags apply). | ||
| 24 | /// With a subcommand it runs the requested maintenance tool instead. | ||
| 25 | #[derive(Debug, Parser)] | ||
| 26 | #[command(author, version, about = "ngit-grasp GRASP relay", long_about = None)] | ||
| 27 | #[command(propagate_version = true)] | ||
| 28 | enum Cli { | ||
| 29 | /// Run the GRASP relay server (default when no subcommand is given). | ||
| 30 | #[command(name = "serve")] | ||
| 31 | Serve(Config), | ||
| 32 | |||
| 33 | /// Remove kind 30617/30618 events whose bare git repository is empty or missing. | ||
| 34 | /// | ||
| 35 | /// Runs in dry-run mode by default. Pass --execute to make changes. | ||
| 36 | /// Stop the relay service before running with --execute. | ||
| 37 | CleanupEmptyRepos(cleanup_empty_repos::CleanupArgs), | ||
| 38 | } | ||
| 39 | |||
| 19 | #[tokio::main] | 40 | #[tokio::main] |
| 20 | async fn main() -> Result<()> { | 41 | async fn main() -> Result<()> { |
| 21 | // Load configuration first (priority: CLI flags > env vars > .env file > defaults) | 42 | // Load .env file before clap parses, so env vars are available. |
| 22 | let config = Config::load()?; | 43 | dotenvy::dotenv().ok(); |
| 44 | |||
| 45 | // Peek at argv[1] to decide whether a subcommand was explicitly provided. | ||
| 46 | // If not, prepend the implicit "serve" subcommand so that clap routes to Cli::Serve | ||
| 47 | // and all relay flags are parsed normally (preserving backward compatibility). | ||
| 48 | let mut args: Vec<String> = std::env::args().collect(); | ||
| 49 | let known_subcommands = ["serve", "cleanup-empty-repos", "help"]; | ||
| 50 | let has_subcommand = args.get(1).map_or(false, |a| { | ||
| 51 | known_subcommands.contains(&a.as_str()) | ||
| 52 | || matches!(a.as_str(), "-h" | "--help" | "-V" | "--version") | ||
| 53 | }); | ||
| 54 | if !has_subcommand { | ||
| 55 | args.insert(1, "serve".to_string()); | ||
| 56 | } | ||
| 57 | |||
| 58 | match Cli::parse_from(args) { | ||
| 59 | Cli::CleanupEmptyRepos(cleanup_args) => { | ||
| 60 | cleanup_empty_repos::run(&cleanup_args).await | ||
| 61 | } | ||
| 62 | Cli::Serve(mut config) => { | ||
| 63 | // Finish initialising the Config (load relay owner key if not provided). | ||
| 64 | if config.relay_owner_nsec.is_none() { | ||
| 65 | config.relay_owner_nsec = Some(Config::load_or_generate_relay_owner_key()?); | ||
| 66 | } else { | ||
| 67 | config.relay_owner_nsec = | ||
| 68 | config.relay_owner_nsec.take().map(|s| s.trim().to_string()); | ||
| 69 | } | ||
| 70 | run_relay(config).await | ||
| 71 | } | ||
| 72 | } | ||
| 73 | } | ||
| 74 | |||
| 75 | async fn run_relay(config: Config) -> Result<()> { | ||
| 23 | 76 | ||
| 24 | // Initialize tracing with configured log level | 77 | // Initialize tracing with configured log level |
| 25 | let subscriber = FmtSubscriber::builder() | 78 | let subscriber = FmtSubscriber::builder() |