upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/cleanup_empty_repos.rs373
-rw-r--r--src/config.rs2
-rw-r--r--src/lib.rs1
-rw-r--r--src/main.rs57
4 files changed, 430 insertions, 3 deletions
diff --git a/src/cleanup_empty_repos.rs b/src/cleanup_empty_repos.rs
new file mode 100644
index 0000000..f1d1c3e
--- /dev/null
+++ b/src/cleanup_empty_repos.rs
@@ -0,0 +1,373 @@
1//! Cleanup Empty Repositories
2//!
3//! Scans the LMDB database for kind 30617 (repository announcement) events whose
4//! corresponding bare git repository on disk is empty (no refs) or missing entirely.
5//! For each such repository, also removes any kind 30618 (state) events for the same
6//! (pubkey, identifier) coordinate.
7//!
8//! ## Rationale
9//!
10//! A relay should not store announcement or state events for a repository that has no
11//! git data. If the bare repo is empty or absent, the events are stale and should be
12//! removed so the relay does not serve them.
13//!
14//! ## Usage
15//!
16//! ```text
17//! # Dry-run (default): print what would be deleted
18//! ngit-grasp cleanup-empty-repos --relay-data-path /var/lib/ngit-grasp/relay \
19//! --git-data-path /var/lib/ngit-grasp/git
20//!
21//! # Execute: delete the bare repos and remove events from the DB
22//! ngit-grasp cleanup-empty-repos --relay-data-path /var/lib/ngit-grasp/relay \
23//! --git-data-path /var/lib/ngit-grasp/git \
24//! --execute
25//! ```
26//!
27//! The relay service should be stopped before running with `--execute` to avoid
28//! races with the live relay process.
29
30use std::path::{Path, PathBuf};
31use std::process::Command;
32use std::sync::Arc;
33
34use anyhow::{Context, Result};
35use clap::Args;
36use nostr_lmdb::NostrLmdb;
37use nostr_sdk::prelude::*;
38
39use crate::nostr::events::RepositoryAnnouncement;
40
41/// Arguments for the `cleanup-empty-repos` subcommand.
42#[derive(Debug, Args)]
43pub struct CleanupArgs {
44 /// Path to the LMDB relay data directory (contains the nostr event database).
45 ///
46 /// Defaults to `./data/relay` (same default as the relay itself).
47 #[arg(long, env = "NGIT_RELAY_DATA_PATH", default_value = "./data/relay")]
48 pub relay_data_path: String,
49
50 /// Path to the git data directory (contains bare repositories).
51 ///
52 /// Defaults to `./data/git` (same default as the relay itself).
53 #[arg(long, env = "NGIT_GIT_DATA_PATH", default_value = "./data/git")]
54 pub git_data_path: String,
55
56 /// Actually delete empty repositories and remove their events from the database.
57 ///
58 /// Without this flag the command runs in dry-run mode and only prints what
59 /// would be deleted. Stop the relay service before using this flag.
60 #[arg(long, default_value_t = false)]
61 pub execute: bool,
62}
63
64/// A repository that has an empty (or missing) bare git repo on disk.
65#[derive(Debug)]
66struct EmptyRepo {
67 /// The kind 30617 event
68 announcement: Event,
69 /// Derived npub (bech32) of the owner
70 npub: String,
71 /// Repository identifier (d-tag value)
72 identifier: String,
73 /// Absolute path to the bare repo directory
74 repo_path: PathBuf,
75 /// Whether the directory exists at all (vs exists but is empty)
76 repo_exists: bool,
77 /// Any kind 30618 state events found in the local DB for this coordinate
78 state_events: Vec<Event>,
79}
80
81/// Run the cleanup-empty-repos subcommand.
82pub async fn run(args: &CleanupArgs) -> Result<()> {
83 let relay_data_path = Path::new(&args.relay_data_path);
84 let git_data_path = Path::new(&args.git_data_path);
85
86 if args.execute {
87 println!("=== cleanup-empty-repos (EXECUTE MODE) ===");
88 println!("WARNING: This will permanently delete data. The relay should be stopped.");
89 println!();
90 } else {
91 println!("=== cleanup-empty-repos (DRY-RUN MODE) ===");
92 println!("Pass --execute to actually delete. Stop the relay first.");
93 println!();
94 }
95
96 println!("Relay data path : {}", relay_data_path.display());
97 println!("Git data path : {}", git_data_path.display());
98 println!();
99
100 // Open the LMDB database
101 println!("Opening LMDB database...");
102 let database: Arc<dyn NostrDatabase> = Arc::new(
103 NostrLmdb::open(relay_data_path)
104 .await
105 .with_context(|| format!("Failed to open LMDB at {}", relay_data_path.display()))?,
106 );
107 println!("Database opened.");
108 println!();
109
110 // Query all kind 30617 events
111 let filter = Filter::new().kind(Kind::GitRepoAnnouncement);
112 let announcements = database
113 .query(filter)
114 .await
115 .context("Failed to query kind 30617 events")?;
116
117 println!("Found {} kind 30617 announcement(s) in database.", announcements.len());
118 println!();
119
120 // Identify empty repos
121 let mut empty_repos: Vec<EmptyRepo> = Vec::new();
122
123 for event in announcements.iter() {
124 let announcement = match RepositoryAnnouncement::from_event(event.clone()) {
125 Ok(a) => a,
126 Err(e) => {
127 eprintln!(
128 " WARN: Could not parse announcement {} (skipping): {}",
129 event.id.to_hex(),
130 e
131 );
132 continue;
133 }
134 };
135
136 let npub = announcement.owner_npub();
137 let identifier = announcement.identifier.clone();
138 let repo_path = git_data_path.join(&announcement.repo_path());
139
140 let (repo_exists, is_empty) = check_repo_empty(&repo_path);
141
142 if !is_empty {
143 // Repo has git data — leave it alone
144 continue;
145 }
146
147 // Look up any kind 30618 state events for this (pubkey, identifier) in the local DB
148 let state_filter = Filter::new()
149 .kind(Kind::RepoState)
150 .author(event.pubkey)
151 .identifier(identifier.clone());
152
153 let state_events = database
154 .query(state_filter)
155 .await
156 .with_context(|| {
157 format!(
158 "Failed to query kind 30618 for {}/{}",
159 npub, identifier
160 )
161 })?;
162
163 empty_repos.push(EmptyRepo {
164 announcement: event.clone(),
165 npub,
166 identifier,
167 repo_path,
168 repo_exists,
169 state_events: state_events.into_iter().collect(),
170 });
171 }
172
173 if empty_repos.is_empty() {
174 println!("No empty repositories found. Nothing to do.");
175 return Ok(());
176 }
177
178 // Print report
179 println!(
180 "Found {} repository/repositories with empty or missing git data:\n",
181 empty_repos.len()
182 );
183
184 for (i, repo) in empty_repos.iter().enumerate() {
185 let repo_status = if repo.repo_exists {
186 "exists but empty (no refs)"
187 } else {
188 "missing from disk"
189 };
190 println!(
191 " [{:>3}] {}/{} — git repo {}",
192 i + 1,
193 repo.npub,
194 repo.identifier,
195 repo_status,
196 );
197 println!(
198 " 30617 event : {}",
199 repo.announcement.id.to_hex()
200 );
201 if repo.state_events.is_empty() {
202 println!(" 30618 events: none in local DB");
203 } else {
204 for se in &repo.state_events {
205 println!(" 30618 event : {}", se.id.to_hex());
206 }
207 }
208 println!(
209 " repo path : {}",
210 repo.repo_path.display()
211 );
212 }
213
214 println!();
215
216 if !args.execute {
217 println!(
218 "DRY-RUN: {} repository/repositories would be cleaned up.",
219 empty_repos.len()
220 );
221 println!("Run with --execute to perform the cleanup (stop the relay first).");
222 return Ok(());
223 }
224
225 // Execute: delete repos and remove events
226 println!("Executing cleanup...");
227 println!();
228
229 let mut deleted_repos = 0usize;
230 let mut failed_repos = 0usize;
231 let mut deleted_announcements = 0usize;
232 let mut deleted_state_events = 0usize;
233
234 for repo in &empty_repos {
235 println!("Cleaning up {}/{}...", repo.npub, repo.identifier);
236
237 // 1. Delete the bare repo directory (if it exists)
238 if repo.repo_exists {
239 match std::fs::remove_dir_all(&repo.repo_path) {
240 Ok(()) => {
241 println!(" Deleted git repo: {}", repo.repo_path.display());
242 deleted_repos += 1;
243
244 // Remove the parent npub directory if now empty
245 if let Some(npub_dir) = repo.repo_path.parent() {
246 if npub_dir.exists() {
247 match std::fs::read_dir(npub_dir) {
248 Ok(mut entries) => {
249 if entries.next().is_none() {
250 if let Err(e) = std::fs::remove_dir(npub_dir) {
251 eprintln!(
252 " WARN: Could not remove empty npub dir {}: {}",
253 npub_dir.display(),
254 e
255 );
256 } else {
257 println!(
258 " Removed empty npub dir: {}",
259 npub_dir.display()
260 );
261 }
262 }
263 }
264 Err(e) => {
265 eprintln!(
266 " WARN: Could not read npub dir {}: {}",
267 npub_dir.display(),
268 e
269 );
270 }
271 }
272 }
273 }
274 }
275 Err(e) => {
276 eprintln!(
277 " ERROR: Failed to delete git repo {}: {}",
278 repo.repo_path.display(),
279 e
280 );
281 failed_repos += 1;
282 // Continue — still try to remove the DB events
283 }
284 }
285 }
286
287 // 2. Remove the kind 30617 announcement from the DB
288 // Use a filter matching the specific event ID so we only delete this exact event.
289 let announcement_filter = Filter::new()
290 .kind(Kind::GitRepoAnnouncement)
291 .id(repo.announcement.id);
292
293 match database.delete(announcement_filter).await {
294 Ok(()) => {
295 println!(" Deleted 30617 event: {}", repo.announcement.id.to_hex());
296 deleted_announcements += 1;
297 }
298 Err(e) => {
299 eprintln!(
300 " ERROR: Failed to delete 30617 event {}: {}",
301 repo.announcement.id.to_hex(),
302 e
303 );
304 }
305 }
306
307 // 3. Remove any kind 30618 state events for this coordinate
308 if !repo.state_events.is_empty() {
309 let state_filter = Filter::new()
310 .kind(Kind::RepoState)
311 .author(repo.announcement.pubkey)
312 .identifier(repo.identifier.clone());
313
314 match database.delete(state_filter).await {
315 Ok(()) => {
316 for se in &repo.state_events {
317 println!(" Deleted 30618 event: {}", se.id.to_hex());
318 deleted_state_events += 1;
319 }
320 }
321 Err(e) => {
322 eprintln!(
323 " ERROR: Failed to delete 30618 events for {}/{}: {}",
324 repo.npub, repo.identifier, e
325 );
326 }
327 }
328 }
329 }
330
331 println!();
332 println!("=== Cleanup complete ===");
333 println!(" Git repos deleted : {}", deleted_repos);
334 if failed_repos > 0 {
335 println!(" Git repos failed : {} (see errors above)", failed_repos);
336 }
337 println!(" 30617 events removed : {}", deleted_announcements);
338 println!(" 30618 events removed : {}", deleted_state_events);
339
340 Ok(())
341}
342
343/// Check whether a bare git repository is empty (has no refs).
344///
345/// Returns `(exists, is_empty)`:
346/// - `(false, true)` — path does not exist (treated as empty)
347/// - `(true, true)` — path exists but `git for-each-ref` returns no output
348/// - `(true, false)` — path exists and has at least one ref
349fn check_repo_empty(repo_path: &Path) -> (bool, bool) {
350 if !repo_path.exists() {
351 return (false, true);
352 }
353
354 // Run `git for-each-ref --git-dir=<path>` — empty output means no refs
355 let output = Command::new("git")
356 .args(["for-each-ref", "--format=%(refname)"])
357 .arg("--git-dir")
358 .arg(repo_path)
359 .output();
360
361 match output {
362 Ok(out) => {
363 // Trim whitespace; if nothing remains, the repo is empty
364 let stdout = String::from_utf8_lossy(&out.stdout);
365 let is_empty = stdout.trim().is_empty();
366 (true, is_empty)
367 }
368 Err(_) => {
369 // Could not run git — treat as empty to be safe (will be reported)
370 (true, true)
371 }
372 }
373}
diff --git a/src/config.rs b/src/config.rs
index 30e77ab..4dd396a 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -506,7 +506,7 @@ impl Config {
506 } 506 }
507 507
508 /// Load relay owner key from file, or generate and save a new one 508 /// Load relay owner key from file, or generate and save a new one
509 fn load_or_generate_relay_owner_key() -> Result<String> { 509 pub fn load_or_generate_relay_owner_key() -> Result<String> {
510 let key_path = PathBuf::from(Self::RELAY_OWNER_KEY_FILE); 510 let key_path = PathBuf::from(Self::RELAY_OWNER_KEY_FILE);
511 511
512 // Try to load existing key 512 // Try to load existing key
diff --git a/src/lib.rs b/src/lib.rs
index d0e2c2d..7b01a07 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,4 +1,5 @@
1pub mod audit_cleanup; 1pub mod audit_cleanup;
2pub mod cleanup_empty_repos;
2pub mod config; 3pub mod config;
3pub mod git; 4pub mod git;
4pub mod http; 5pub mod http;
diff --git a/src/main.rs b/src/main.rs
index 12a875c..bc77fcb 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -2,12 +2,14 @@ use std::time::Duration;
2use std::{path::PathBuf, sync::Arc}; 2use std::{path::PathBuf, sync::Arc};
3 3
4use anyhow::Result; 4use anyhow::Result;
5use clap::Parser;
5use tokio::signal; 6use tokio::signal;
6use tracing::{error, info, warn}; 7use tracing::{error, info, warn};
7use tracing_subscriber::{EnvFilter, FmtSubscriber}; 8use tracing_subscriber::{EnvFilter, FmtSubscriber};
8 9
9use ngit_grasp::{ 10use ngit_grasp::{
10 audit_cleanup, 11 audit_cleanup,
12 cleanup_empty_repos,
11 config::{Config, DatabaseBackend}, 13 config::{Config, DatabaseBackend},
12 git, http, 14 git, http,
13 metrics::Metrics, 15 metrics::Metrics,
@@ -16,10 +18,61 @@ use ngit_grasp::{
16 sync::{naughty_list::NaughtyListTracker, SyncManager}, 18 sync::{naughty_list::NaughtyListTracker, SyncManager},
17}; 19};
18 20
21/// Top-level CLI dispatcher.
22///
23/// With no subcommand the binary runs the relay (all relay flags apply).
24/// With a subcommand it runs the requested maintenance tool instead.
25#[derive(Debug, Parser)]
26#[command(author, version, about = "ngit-grasp GRASP relay", long_about = None)]
27#[command(propagate_version = true)]
28enum Cli {
29 /// Run the GRASP relay server (default when no subcommand is given).
30 #[command(name = "serve")]
31 Serve(Config),
32
33 /// Remove kind 30617/30618 events whose bare git repository is empty or missing.
34 ///
35 /// Runs in dry-run mode by default. Pass --execute to make changes.
36 /// Stop the relay service before running with --execute.
37 CleanupEmptyRepos(cleanup_empty_repos::CleanupArgs),
38}
39
19#[tokio::main] 40#[tokio::main]
20async fn main() -> Result<()> { 41async fn main() -> Result<()> {
21 // Load configuration first (priority: CLI flags > env vars > .env file > defaults) 42 // Load .env file before clap parses, so env vars are available.
22 let config = Config::load()?; 43 dotenvy::dotenv().ok();
44
45 // Peek at argv[1] to decide whether a subcommand was explicitly provided.
46 // If not, prepend the implicit "serve" subcommand so that clap routes to Cli::Serve
47 // and all relay flags are parsed normally (preserving backward compatibility).
48 let mut args: Vec<String> = std::env::args().collect();
49 let known_subcommands = ["serve", "cleanup-empty-repos", "help"];
50 let has_subcommand = args.get(1).map_or(false, |a| {
51 known_subcommands.contains(&a.as_str())
52 || matches!(a.as_str(), "-h" | "--help" | "-V" | "--version")
53 });
54 if !has_subcommand {
55 args.insert(1, "serve".to_string());
56 }
57
58 match Cli::parse_from(args) {
59 Cli::CleanupEmptyRepos(cleanup_args) => {
60 cleanup_empty_repos::run(&cleanup_args).await
61 }
62 Cli::Serve(mut config) => {
63 // Finish initialising the Config (load relay owner key if not provided).
64 if config.relay_owner_nsec.is_none() {
65 config.relay_owner_nsec = Some(Config::load_or_generate_relay_owner_key()?);
66 } else {
67 config.relay_owner_nsec =
68 config.relay_owner_nsec.take().map(|s| s.trim().to_string());
69 }
70 run_relay(config).await
71 }
72 }
73}
74
75async fn run_relay(config: Config) -> Result<()> {
23 76
24 // Initialize tracing with configured log level 77 // Initialize tracing with configured log level
25 let subscriber = FmtSubscriber::builder() 78 let subscriber = FmtSubscriber::builder()