From 2be44c604062c7579e08c0d37b2f32ea8b6c4fcf Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 17:42:13 +0000 Subject: docs: add announcements purgatory design document Addresses the problem of empty bare repos misleading clients and sync downloading refs to deleted repos. Key design points: - Bare repo created immediately so git pushes can succeed - Git data arrival triggers promotion to active status - Expiry extended in two places: state event arrival and git auth - Indexed by (pubkey, identifier) for correct uniqueness - Handles replacement announcements and service changes --- docs/explanation/announcements-purgatory-design.md | 185 +++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 docs/explanation/announcements-purgatory-design.md (limited to 'docs') diff --git a/docs/explanation/announcements-purgatory-design.md b/docs/explanation/announcements-purgatory-design.md new file mode 100644 index 0000000..c9077d9 --- /dev/null +++ b/docs/explanation/announcements-purgatory-design.md @@ -0,0 +1,185 @@ +# Announcements Purgatory Design + +## Problem Statement + +**Primary problem:** Empty bare git repos mislead clients into thinking we host content. + +When an announcement arrives, we must create the bare repo immediately (so git pushes can succeed). But if no git data ever arrives, we serve an empty repo and its announcement indefinitely. Clients see the announcement, try to clone, and get nothing. This is misleading. + +**Secondary problem:** Sync downloads refs to deleted repos. + +When a repo expires or is cleaned up, sync may still try to download state event refs to it. We need announcements to remain in a holding state until git data proves the repo has content worth serving. + +## Solution Overview + +New announcements go to **purgatory** instead of being immediately accepted: + +1. **Announcement arrives** - Create bare repo immediately, add announcement to purgatory +2. **Git data arrives** - Promote announcement from purgatory to active (now served to clients) +3. **No git data before expiry** - Delete bare repo, discard announcement (never served) + +This ensures we only serve announcements for repos that actually have content. + +## Key Design Decisions + +### 1. Bare Repo Created Immediately + +**Decision:** Create the bare git repo when announcement enters purgatory. + +**Why:** Git pushes may arrive at any time. Without a repo, pushes fail. + +**Consequence:** We allocate disk space for repos that may expire unused. Must delete repos on expiry. + +### 2. Git Data Triggers Promotion + +**Decision:** Git data arrival promotes the announcement to active status. + +**Why:** Git data proves the repository has content. State events alone don't prove content exists - they could reference empty repos. + +**Where:** Promotion happens in the git receive path after successful push/fetch with data. + +### 3. Replacement Announcements Skip Purgatory + +**Decision:** Announcements replacing an existing active announcement are accepted immediately. + +**Why:** The repository is already proven active with content. + +**How:** Check if active announcement exists for `(pubkey, identifier)` before routing to purgatory. + +### 4. Expiry Extension (Two Places) + +**Decision:** Extend purgatory announcement expiry in two scenarios: + +| Trigger | Location | Why | +|---------|----------|-----| +| State event arrives | `StatePolicy::process_state_event()` | Repo is actively receiving metadata | +| Git auth extends state event | `src/git/auth.rs` | Repo is actively receiving git data | + +**Why:** Prevents premature expiry during slow sync operations or multi-step pushes. + +### 5. State Events Consider Purgatory Announcements + +**Decision:** When validating state events, check purgatory announcements for authorization. + +**Why:** State events may arrive before git data promotes the announcement. They still need authorization from the announcement's maintainer set. + +## Data Structure + +```rust +// Key: (owner pubkey, identifier) - identifier alone is NOT unique +announcement_purgatory: Arc> + +pub struct AnnouncementPurgatoryEntry { + pub event: Event, + pub identifier: String, + pub owner: PublicKey, + pub repo_path: PathBuf, + pub created_at: Instant, + pub expires_at: Instant, +} +``` + +**Indexed by `(pubkey, identifier)`** because identifier is not unique across different owners. + +## Flows + +### New Announcement Flow + +``` +Announcement arrives + | + v +Is there an active announcement for (pubkey, identifier)? + | + +-- YES --> Accept immediately (replacement) + | + +-- NO --> Create bare repo + Add to purgatory + Return OK to client (but don't serve) +``` + +### Git Data Arrival Flow + +``` +Git push/fetch completes with data + | + v +Is there a purgatory announcement for (pubkey, identifier)? + | + +-- YES --> Promote to active (move to database) + | Now served to clients + | + +-- NO --> Normal processing +``` + +### State Event Arrival Flow + +``` +State event arrives + | + v +Is there an active announcement? + | + +-- YES --> Normal validation + | + +-- NO --> Check purgatory for announcement + | + +-- Found --> Validate against purgatory announcement + | Extend purgatory expiry + | State event goes to state purgatory + | + +-- Not found --> Reject or state purgatory +``` + +## Edge Cases + +| Scenario | Behavior | +|----------|----------| +| Git data before announcement | Push fails (no repo exists) | +| Announcement expires, no git data | Delete bare repo, discard announcement | +| State expires, announcement in purgatory | Announcement keeps its own expiry | +| Multiple owners, same identifier | Each tracked separately by `(pubkey, identifier)` | +| **Newer announcement replaces older (same pubkey)** | Replace purgatory entry, extend expiry | +| **Newer announcement changes services (unacceptable)** | Clear older announcement from purgatory for that `(pubkey, identifier)` | +| Deletion event for purgatory announcement | Remove from purgatory, delete bare repo | + +## Purgatory Exit Conditions + +An announcement leaves purgatory via: + +| Exit | Trigger | Action | +|------|---------|--------| +| **Promotion** | Git data arrives | Move to database, serve to clients | +| **Expiry** | Timeout | Delete bare repo, discard | +| **Deletion** | Kind 5 event | Delete bare repo, discard | +| **Replacement** | Newer announcement (same pubkey, identifier) | Replace entry | +| **Service change** | Newer announcement no longer lists our service | Discard old entry | + +## Integration Points + +| File | Change | +|------|--------| +| `src/purgatory/mod.rs` | Add `announcement_purgatory` store | +| `src/purgatory/types.rs` | Add `AnnouncementPurgatoryEntry` | +| `src/nostr/policy/announcement.rs` | Route new announcements to purgatory | +| `src/git/receive.rs` | Promote on git data arrival | +| `src/git/auth.rs` | Extend purgatory expiry when extending state event expiry | +| `src/nostr/policy/state.rs` | Check purgatory for authorization | + +## Testing + +- Announcement to purgatory, git data promotes it +- Announcement expires without git data (repo deleted) +- State event extends purgatory expiry +- Git auth extends purgatory expiry +- Newer announcement replaces older in purgatory +- Service change clears purgatory entry +- `(pubkey, identifier)` indexing with multiple owners + +## Risks + +| Risk | Mitigation | +|------|------------| +| Disk exhaustion from purgatory repos | Short expiry, monitor purgatory size | +| Race between promotion and expiry | Atomic operations | +| Sync re-fetching expired events | Track expired event IDs | -- cgit v1.2.3 From 854484813dfe45f882fe66ff866621f9a21186fe Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Thu, 5 Feb 2026 15:25:14 +0000 Subject: add notes to announcment purgatory design --- docs/explanation/announcements-purgatory-design.md | 73 ++++++++++++---------- 1 file changed, 40 insertions(+), 33 deletions(-) (limited to 'docs') diff --git a/docs/explanation/announcements-purgatory-design.md b/docs/explanation/announcements-purgatory-design.md index c9077d9..4d0cc6d 100644 --- a/docs/explanation/announcements-purgatory-design.md +++ b/docs/explanation/announcements-purgatory-design.md @@ -2,7 +2,7 @@ ## Problem Statement -**Primary problem:** Empty bare git repos mislead clients into thinking we host content. +**Primary problem:** serving an announcement event and also an empty bare git repos mislead clients into thinking we host content. When an announcement arrives, we must create the bare repo immediately (so git pushes can succeed). But if no git data ever arrives, we serve an empty repo and its announcement indefinitely. Clients see the announcement, try to clone, and get nothing. This is misleading. @@ -50,10 +50,10 @@ This ensures we only serve announcements for repos that actually have content. **Decision:** Extend purgatory announcement expiry in two scenarios: -| Trigger | Location | Why | -|---------|----------|-----| -| State event arrives | `StatePolicy::process_state_event()` | Repo is actively receiving metadata | -| Git auth extends state event | `src/git/auth.rs` | Repo is actively receiving git data | +| Trigger | Location | Why | +| ---------------------------- | ------------------------------------ | ----------------------------------- | +| State event arrives | `StatePolicy::process_state_event()` | Repo is actively receiving metadata | +| Git auth extends state event | `src/git/auth.rs` | Repo is actively receiving git data | **Why:** Prevents premature expiry during slow sync operations or multi-step pushes. @@ -63,6 +63,10 @@ This ensures we only serve announcements for repos that actually have content. **Why:** State events may arrive before git data promotes the announcement. They still need authorization from the announcement's maintainer set. +### 6. We need to request State Events in sysc for announcement in purgatory but not other l2 or l3 events because they will be rejected. + +### 7. When creating the authorised maintainers for a repositoriy we need to also get relivant announcement events from purgatory as well as db. + ## Data Structure ```rust @@ -80,6 +84,7 @@ pub struct AnnouncementPurgatoryEntry { ``` **Indexed by `(pubkey, identifier)`** because identifier is not unique across different owners. +question: would it be more efficent to index by repo_path? this contains the pubkey and identifier data? ## Flows @@ -133,38 +138,38 @@ Is there an active announcement? ## Edge Cases -| Scenario | Behavior | -|----------|----------| -| Git data before announcement | Push fails (no repo exists) | -| Announcement expires, no git data | Delete bare repo, discard announcement | -| State expires, announcement in purgatory | Announcement keeps its own expiry | -| Multiple owners, same identifier | Each tracked separately by `(pubkey, identifier)` | -| **Newer announcement replaces older (same pubkey)** | Replace purgatory entry, extend expiry | -| **Newer announcement changes services (unacceptable)** | Clear older announcement from purgatory for that `(pubkey, identifier)` | -| Deletion event for purgatory announcement | Remove from purgatory, delete bare repo | +| Scenario | Behavior | +| ------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------- | +| Git data before announcement | Push fails (no repo exists) | +| Announcement expires, no git data | Delete bare repo, discard announcement | +| State expires, announcement in purgatory | Announcement keeps its own expiry | +| Multiple owners, same identifier | Each tracked separately by `(pubkey, identifier)` | +| **Newer announcement replaces older (same pubkey)** | Replace purgatory entry, extend expiry, and state event expiry | +| **Newer announcement changes services (unacceptable)** | Clear older announcement from purgatory for that `(pubkey, identifier)`, delete bare repo, remove state event for puragatory if exists | +| Deletion event for purgatory announcement | Remove from purgatory, delete bare repo | ## Purgatory Exit Conditions An announcement leaves purgatory via: -| Exit | Trigger | Action | -|------|---------|--------| -| **Promotion** | Git data arrives | Move to database, serve to clients | -| **Expiry** | Timeout | Delete bare repo, discard | -| **Deletion** | Kind 5 event | Delete bare repo, discard | -| **Replacement** | Newer announcement (same pubkey, identifier) | Replace entry | -| **Service change** | Newer announcement no longer lists our service | Discard old entry | +| Exit | Trigger | Action | +| ------------------ | ---------------------------------------------- | ---------------------------------- | +| **Promotion** | Git data arrives | Move to database, serve to clients | +| **Expiry** | Timeout | Delete bare repo, discard | +| **Deletion** | Kind 5 event | Delete bare repo, discard | +| **Replacement** | Newer announcement (same pubkey, identifier) | Replace entry | +| **Service change** | Newer announcement no longer lists our service | Discard old entry | ## Integration Points -| File | Change | -|------|--------| -| `src/purgatory/mod.rs` | Add `announcement_purgatory` store | -| `src/purgatory/types.rs` | Add `AnnouncementPurgatoryEntry` | -| `src/nostr/policy/announcement.rs` | Route new announcements to purgatory | -| `src/git/receive.rs` | Promote on git data arrival | -| `src/git/auth.rs` | Extend purgatory expiry when extending state event expiry | -| `src/nostr/policy/state.rs` | Check purgatory for authorization | +| File | Change | +| ---------------------------------- | --------------------------------------------------------- | +| `src/purgatory/mod.rs` | Add `announcement_purgatory` store | +| `src/purgatory/types.rs` | Add `AnnouncementPurgatoryEntry` | +| `src/nostr/policy/announcement.rs` | Route new announcements to purgatory | +| `src/git/receive.rs` | Promote on git data arrival | +| `src/git/auth.rs` | Extend purgatory expiry when extending state event expiry | +| `src/nostr/policy/state.rs` | Check purgatory for authorization | ## Testing @@ -178,8 +183,10 @@ An announcement leaves purgatory via: ## Risks -| Risk | Mitigation | -|------|------------| +| Risk | Mitigation | +| ------------------------------------ | ------------------------------------ | | Disk exhaustion from purgatory repos | Short expiry, monitor purgatory size | -| Race between promotion and expiry | Atomic operations | -| Sync re-fetching expired events | Track expired event IDs | +| Race between promotion and expiry | Atomic operations | +| Sync re-fetching expired events | Track expired event IDs | + +question: do expired annoucements go on failed_events list? what if a new state event comes in for it? surely then we want it again? but if not we dont want to keep donwloading it and havea a repo made for it. Should we have a longer period were we keep the event just in case, but delete the bare repo and only remake it when the state event arrives? -- cgit v1.2.3 From 603c87fabda70145b967579b9338051ea9f00704 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Thu, 5 Feb 2026 17:03:37 +0000 Subject: docs: complete high-level announcements purgatory design - Integrate sync-only-state-events decision (SyncLevel concept) - Add authorization must check purgatory decision - Add soft expiry design (delete repo, retain event for 24h) - Add purgatory lifecycle diagram - Create separate implementation details document - Remove inline questions (now resolved) --- docs/explanation/announcements-purgatory-design.md | 128 ++++++--- .../announcements-purgatory-implementation.md | 293 +++++++++++++++++++++ 2 files changed, 385 insertions(+), 36 deletions(-) create mode 100644 docs/explanation/announcements-purgatory-implementation.md (limited to 'docs') diff --git a/docs/explanation/announcements-purgatory-design.md b/docs/explanation/announcements-purgatory-design.md index 4d0cc6d..a06a8b2 100644 --- a/docs/explanation/announcements-purgatory-design.md +++ b/docs/explanation/announcements-purgatory-design.md @@ -2,13 +2,13 @@ ## Problem Statement -**Primary problem:** serving an announcement event and also an empty bare git repos mislead clients into thinking we host content. +**Primary problem:** Serving announcement events alongside empty bare git repos misleads clients into thinking we host content. When an announcement arrives, we must create the bare repo immediately (so git pushes can succeed). But if no git data ever arrives, we serve an empty repo and its announcement indefinitely. Clients see the announcement, try to clone, and get nothing. This is misleading. -**Secondary problem:** Sync downloads refs to deleted repos. +**Secondary problem:** Sync downloads events for repos that may never have content. -When a repo expires or is cleaned up, sync may still try to download state event refs to it. We need announcements to remain in a holding state until git data proves the repo has content worth serving. +Without purgatory, sync would fetch all L2/L3 events (patches, issues, etc.) for announcements that may never receive git data. This wastes bandwidth and creates orphaned events. ## Solution Overview @@ -57,15 +57,37 @@ This ensures we only serve announcements for repos that actually have content. **Why:** Prevents premature expiry during slow sync operations or multi-step pushes. -### 5. State Events Consider Purgatory Announcements +### 5. Authorization Must Check Purgatory Announcements -**Decision:** When validating state events, check purgatory announcements for authorization. +**Decision:** When validating state events or git operations, check purgatory announcements in addition to the database. -**Why:** State events may arrive before git data promotes the announcement. They still need authorization from the announcement's maintainer set. +**Why:** State events and git pushes may arrive before git data promotes the announcement. They still need authorization from the announcement's maintainer set. -### 6. We need to request State Events in sysc for announcement in purgatory but not other l2 or l3 events because they will be rejected. +**Where:** `fetch_repository_data()` and related authorization functions must query both DB and purgatory. -### 7. When creating the authorised maintainers for a repositoriy we need to also get relivant announcement events from purgatory as well as db. +### 6. Sync Only State Events for Purgatory Announcements + +**Decision:** Purgatory announcements trigger sync for state events only, not other L2/L3 events (patches, issues, PRs, etc.). + +**Why:** Other L2/L3 events would be rejected anyway (no promoted announcement in DB). Syncing them wastes bandwidth and creates work for announcements that may never promote. + +**How:** Sync uses a `SyncLevel` concept - `Full` for promoted repos, `StateOnly` for purgatory. On promotion, upgrade to `Full`. + +### 7. Soft Expiry Preserves Event Without Bare Repo + +**Decision:** When a purgatory announcement expires, delete the bare repo but retain the announcement event for an extended period (e.g., 24h). + +**Why:** This handles the case where a state event arrives after initial expiry. Without soft expiry, we'd either: +- Add to `failed_events` and reject the state event (losing potential revival) +- Re-fetch the announcement repeatedly (wasting sync bandwidth) + +**Behavior during soft expiry:** +- Bare repo is deleted (saves disk space) +- Announcement event retained in purgatory with `soft_expired` flag +- Sync continues requesting state events (same as active purgatory) +- If state event arrives: recreate bare repo, clear `soft_expired`, extend expiry +- If announcement republished directly to us: treat as fresh arrival +- After extended expiry: fully remove from purgatory ## Data Structure @@ -78,13 +100,14 @@ pub struct AnnouncementPurgatoryEntry { pub identifier: String, pub owner: PublicKey, pub repo_path: PathBuf, + pub relays: HashSet, // For sync registration pub created_at: Instant, pub expires_at: Instant, + pub soft_expired: bool, // Bare repo deleted, event retained } ``` -**Indexed by `(pubkey, identifier)`** because identifier is not unique across different owners. -question: would it be more efficent to index by repo_path? this contains the pubkey and identifier data? +**Indexed by `(pubkey, identifier)`** because identifier is not unique across different owners. Lookups are primarily from nostr events which have pubkey and identifier readily available. ## Flows @@ -138,27 +161,51 @@ Is there an active announcement? ## Edge Cases -| Scenario | Behavior | -| ------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------- | -| Git data before announcement | Push fails (no repo exists) | -| Announcement expires, no git data | Delete bare repo, discard announcement | -| State expires, announcement in purgatory | Announcement keeps its own expiry | -| Multiple owners, same identifier | Each tracked separately by `(pubkey, identifier)` | -| **Newer announcement replaces older (same pubkey)** | Replace purgatory entry, extend expiry, and state event expiry | -| **Newer announcement changes services (unacceptable)** | Clear older announcement from purgatory for that `(pubkey, identifier)`, delete bare repo, remove state event for puragatory if exists | -| Deletion event for purgatory announcement | Remove from purgatory, delete bare repo | +| Scenario | Behavior | +| ------------------------------------------------------ | ------------------------------------------------------------------------------------------------------- | +| Git data before announcement | Push fails (no repo exists) | +| Announcement expires, no git data | Delete bare repo, set `soft_expired` flag, retain event for extended period | +| Soft-expired announcement fully expires | Remove from purgatory entirely | +| State event arrives for soft-expired announcement | Recreate bare repo, clear `soft_expired`, extend expiry | +| State expires, announcement in purgatory | Announcement keeps its own expiry | +| Multiple owners, same identifier | Each tracked separately by `(pubkey, identifier)` | +| **Newer announcement replaces older (same pubkey)** | Replace purgatory entry, extend expiry, and state event expiry | +| **Newer announcement changes services (unacceptable)** | Clear older announcement from purgatory, delete bare repo, remove state events from purgatory if exists | +| Deletion event for purgatory announcement | Remove from purgatory, delete bare repo | + +## Purgatory Lifecycle -## Purgatory Exit Conditions +An announcement progresses through purgatory states: -An announcement leaves purgatory via: +``` + ┌─────────────────────────────────────┐ + │ │ + v │ +Announcement ──> ACTIVE ──────────────────────────────────┤ + arrives (bare repo exists) │ + │ │ + ├── Git data ──> PROMOTED (exit) │ + │ │ + ├── Deletion ──> REMOVED (exit) │ + │ │ + v │ + SOFT_EXPIRED ──────────────────────────────┘ + (bare repo deleted, ^ + event retained) │ + │ │ + ├── State event arrives (revival) + │ + └── Extended expiry ──> REMOVED (exit) +``` -| Exit | Trigger | Action | -| ------------------ | ---------------------------------------------- | ---------------------------------- | -| **Promotion** | Git data arrives | Move to database, serve to clients | -| **Expiry** | Timeout | Delete bare repo, discard | -| **Deletion** | Kind 5 event | Delete bare repo, discard | -| **Replacement** | Newer announcement (same pubkey, identifier) | Replace entry | -| **Service change** | Newer announcement no longer lists our service | Discard old entry | +| Exit | Trigger | Action | +| -------------- | ---------------------------------------------- | -------------------------------------------- | +| **Promotion** | Git data arrives | Move to database, upgrade sync to Full | +| **Soft expiry**| Initial timeout | Delete bare repo, retain event, continue sync| +| **Full expiry**| Extended timeout (soft-expired) | Remove from purgatory entirely | +| **Deletion** | Kind 5 event | Delete bare repo, remove from purgatory | +| **Replacement**| Newer announcement (same pubkey, identifier) | Replace entry | +| **Service change** | Newer announcement removes our service | Remove from purgatory | ## Integration Points @@ -169,24 +216,33 @@ An announcement leaves purgatory via: | `src/nostr/policy/announcement.rs` | Route new announcements to purgatory | | `src/git/receive.rs` | Promote on git data arrival | | `src/git/auth.rs` | Extend purgatory expiry when extending state event expiry | +| `src/git/authorization.rs` | Check purgatory announcements for maintainer authorization| | `src/nostr/policy/state.rs` | Check purgatory for authorization | +| `src/sync/mod.rs` | Add `SyncLevel` to `RepoSyncNeeds` | +| `src/sync/filters.rs` | Respect sync level when building filters | +| `src/sync/self_subscriber.rs` | Register purgatory announcements with `StateOnly` level | + +See [announcements-purgatory-implementation.md](./announcements-purgatory-implementation.md) for detailed implementation notes. ## Testing - Announcement to purgatory, git data promotes it -- Announcement expires without git data (repo deleted) +- Announcement soft-expires without git data (repo deleted, event retained) +- State event revives soft-expired announcement (repo recreated) +- Soft-expired announcement fully expires after extended period - State event extends purgatory expiry - Git auth extends purgatory expiry - Newer announcement replaces older in purgatory - Service change clears purgatory entry - `(pubkey, identifier)` indexing with multiple owners +- Sync requests only state events for purgatory announcements +- Sync upgrades to full on promotion ## Risks -| Risk | Mitigation | -| ------------------------------------ | ------------------------------------ | -| Disk exhaustion from purgatory repos | Short expiry, monitor purgatory size | -| Race between promotion and expiry | Atomic operations | -| Sync re-fetching expired events | Track expired event IDs | - -question: do expired annoucements go on failed_events list? what if a new state event comes in for it? surely then we want it again? but if not we dont want to keep donwloading it and havea a repo made for it. Should we have a longer period were we keep the event just in case, but delete the bare repo and only remake it when the state event arrives? +| Risk | Mitigation | +| ------------------------------------ | ------------------------------------------------------- | +| Disk exhaustion from purgatory repos | Short expiry, soft expiry deletes repo early | +| Race between promotion and expiry | Atomic operations | +| Sync re-fetching expired events | Soft expiry retains event; no need for `failed_events` | +| Filter explosion from many purgatory | Existing consolidation handles this (threshold at 70) | diff --git a/docs/explanation/announcements-purgatory-implementation.md b/docs/explanation/announcements-purgatory-implementation.md new file mode 100644 index 0000000..d5b8698 --- /dev/null +++ b/docs/explanation/announcements-purgatory-implementation.md @@ -0,0 +1,293 @@ +# Announcements Purgatory Implementation Details + +This document provides detailed implementation notes for the [Announcements Purgatory Design](./announcements-purgatory-design.md). + +## Sync Integration + +### Current Sync Architecture + +The sync system uses a two-index approach: + +```rust +// What we WANT to sync - source of truth from self-subscription +// Key: repo addressable ref (30617:pubkey:identifier) +pub type RepoSyncIndex = Arc>>; + +pub struct RepoSyncNeeds { + pub relays: HashSet, // Relay URLs from announcement + pub root_events: HashSet, // 1617/1618/1621 event IDs +} + +// What we have CONFIRMED syncing + connection state +// Key: relay URL +pub type RelaySyncIndex = Arc>>; +``` + +**Three-Layer Sync Strategy:** +1. **Layer 1:** Announcements (kinds 30617, 10317) +2. **Layer 2:** Repo-tagging events (events with `a`/`A`/`q` tags + kind 30618 by identifier) +3. **Layer 3:** Root-event-tagging events (events with `e`/`E`/`q` tags) + +### Adding SyncLevel + +Add a `sync_level` field to distinguish purgatory from promoted repos: + +```rust +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum SyncLevel { + #[default] + Full, // L2 + L3 (promoted repos) + StateOnly, // Only state events (purgatory announcements) +} + +pub struct RepoSyncNeeds { + pub relays: HashSet, + pub root_events: HashSet, + pub sync_level: SyncLevel, // NEW +} +``` + +### Filter Building Changes + +In `src/sync/filters.rs`, modify filter building to respect sync level: + +```rust +// For StateOnly repos, only build state event filters +pub fn build_layer2_and_layer3_filters( + repos: &HashMap, + // ... +) -> Vec { + let (full_repos, state_only_repos): (Vec<_>, Vec<_>) = repos + .iter() + .partition(|(_, needs)| needs.sync_level == SyncLevel::Full); + + let mut filters = Vec::new(); + + // Full repos get all L2/L3 filters + if !full_repos.is_empty() { + filters.extend(tagged_one_of_our_repo_event_filters(&full_repos)); + filters.extend(state_event_filters_for_our_repos(&full_repos)); + filters.extend(tagged_one_of_our_root_event_filters(&full_repos)); + } + + // StateOnly repos get only state event filters + if !state_only_repos.is_empty() { + filters.extend(state_event_filters_for_our_repos(&state_only_repos)); + } + + filters +} +``` + +The existing `state_event_filters_for_our_repos()` function already builds kind 30618 filters with `#d` tags, which is exactly what we need. + +### Self-Subscriber Changes + +In `src/sync/self_subscriber.rs`, add purgatory announcements to the sync index: + +```rust +// When announcement enters purgatory +fn on_announcement_to_purgatory( + &self, + event: &Event, + identifier: &str, + relays: HashSet, +) { + let key = format!("30617:{}:{}", event.pubkey, identifier); + let mut index = self.repo_sync_index.write().unwrap(); + index.insert(key, RepoSyncNeeds { + relays, + root_events: HashSet::new(), + sync_level: SyncLevel::StateOnly, + }); +} + +// When announcement promotes to database +fn on_announcement_promoted( + &self, + event: &Event, + identifier: &str, +) { + let key = format!("30617:{}:{}", event.pubkey, identifier); + let mut index = self.repo_sync_index.write().unwrap(); + if let Some(needs) = index.get_mut(&key) { + needs.sync_level = SyncLevel::Full; + } +} +``` + +### Algorithm Changes + +In `src/sync/algorithms.rs`, preserve sync level when inverting repo->relay: + +```rust +pub fn derive_relay_targets( + repo_index: &RepoSyncIndex, +) -> HashMap { + // ... existing inversion logic ... + // Ensure sync_level is preserved/aggregated per relay + // A relay gets Full if ANY of its repos are Full +} +``` + +## Authorization Integration + +### Current Authorization Flow + +Authorization lookups happen in `src/git/authorization.rs`: + +| Function | Purpose | Currently Queries | +|----------|---------|-------------------| +| `fetch_repository_data()` | Get announcements + states by identifier | DB only | +| `collect_authorized_maintainers()` | Build maintainer set from announcements | DB only | +| `pubkey_authorised_for_repo_owners()` | Check if pubkey authorized | DB only | + +### Required Changes + +Modify `fetch_repository_data()` to also query purgatory: + +```rust +pub async fn fetch_repository_data( + db: &Database, + purgatory: &Purgatory, // NEW parameter + identifier: &str, +) -> Result { + // Existing DB query + let db_events = db.query(/* kind 30617, 30618 by identifier */).await?; + + // NEW: Also check purgatory for announcements + let purgatory_announcements = purgatory + .get_announcements_by_identifier(identifier); + + // Merge results + let mut announcements = parse_announcements(db_events); + announcements.extend(purgatory_announcements); + + // ... rest of function +} +``` + +This affects: +- `StatePolicy::process_state_event()` - state event validation +- `get_state_authorization_for_specific_owner_repo()` - git push authorization +- `AnnouncementPolicy::is_maintainer_in_any_announcement()` - maintainer exception + +## Purgatory Store Changes + +### New Fields + +```rust +pub struct AnnouncementPurgatoryEntry { + pub event: Event, + pub identifier: String, + pub owner: PublicKey, + pub repo_path: PathBuf, + pub relays: HashSet, // For sync registration + pub created_at: Instant, + pub expires_at: Instant, + pub soft_expired: bool, // Bare repo deleted, event retained +} +``` + +### New Methods + +```rust +impl Purgatory { + /// Get announcements by identifier (for authorization) + pub fn get_announcements_by_identifier( + &self, + identifier: &str, + ) -> Vec<&AnnouncementPurgatoryEntry> { + self.announcement_purgatory + .iter() + .filter(|entry| entry.identifier == identifier) + .collect() + } + + /// Transition to soft-expired state + pub fn soft_expire_announcement( + &self, + key: &(PublicKey, String), + ) -> Option { + if let Some(mut entry) = self.announcement_purgatory.get_mut(key) { + entry.soft_expired = true; + entry.expires_at = Instant::now() + SOFT_EXPIRY_DURATION; // e.g., 24h + Some(entry.repo_path.clone()) // Return path for bare repo deletion + } else { + None + } + } + + /// Revive soft-expired announcement (caller must recreate bare repo) + pub fn revive_announcement( + &self, + key: &(PublicKey, String), + ) -> Option { + if let Some(mut entry) = self.announcement_purgatory.get_mut(key) { + if entry.soft_expired { + entry.soft_expired = false; + entry.expires_at = Instant::now() + ACTIVE_EXPIRY_DURATION; + return Some(entry.repo_path.clone()); // Caller recreates bare repo + } + } + None + } +} +``` + +## Expiry Cleanup Task + +The existing cleanup task needs to handle the two-phase expiry: + +```rust +async fn cleanup_expired_announcements(&self) { + let now = Instant::now(); + + for entry in self.announcement_purgatory.iter() { + if entry.expires_at <= now { + let key = (entry.owner.clone(), entry.identifier.clone()); + + if entry.soft_expired { + // Fully expired - remove entirely + self.announcement_purgatory.remove(&key); + self.unregister_from_sync(&key); + } else { + // First expiry - transition to soft-expired + if let Some(repo_path) = self.soft_expire_announcement(&key) { + delete_bare_repo(&repo_path).await; + } + // Note: stays in sync index with StateOnly level + } + } + } +} +``` + +## State Event Revival Flow + +When a state event arrives for a soft-expired announcement, the state policy must: + +1. Check purgatory for a matching announcement (in addition to DB) +2. Validate authorization against the purgatory announcement +3. If soft-expired, call `revive_announcement()` and recreate the bare repo +4. Extend the announcement's expiry +5. Route the state event to state purgatory + +The exact integration will depend on the current structure of `StatePolicy::process_state_event()` - see implementation phase for details. + +## File Change Summary + +| File | Estimated Lines | Changes | +|------|-----------------|---------| +| `src/sync/mod.rs` | ~10 | Add `SyncLevel` enum, field to `RepoSyncNeeds` | +| `src/sync/filters.rs` | ~20 | Partition repos by sync level, build appropriate filters | +| `src/sync/algorithms.rs` | ~15 | Preserve sync level in relay target derivation | +| `src/sync/self_subscriber.rs` | ~40 | Register purgatory announcements, handle promotion | +| `src/purgatory/mod.rs` | ~80 | Add announcement store, soft expiry methods | +| `src/purgatory/types.rs` | ~20 | Add `AnnouncementPurgatoryEntry` | +| `src/git/authorization.rs` | ~30 | Query purgatory in `fetch_repository_data()` | +| `src/nostr/policy/state.rs` | ~40 | Handle soft-expired revival | +| `src/nostr/policy/announcement.rs` | ~30 | Route to purgatory, check for replacements | +| `src/git/receive.rs` | ~20 | Trigger promotion on git data | + +**Total: ~305 lines of changes** -- cgit v1.2.3 From 0a1908bd6ee19f7079bb2914c0009bea1fc1db37 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Thu, 12 Feb 2026 09:11:38 +0000 Subject: docs: annocunment purgatory clarify soft expiry rationale now we have added announcement purgatory to the protocol spec --- docs/explanation/announcements-purgatory-design.md | 72 ++++++++++++---------- .../announcements-purgatory-implementation.md | 13 ++-- 2 files changed, 47 insertions(+), 38 deletions(-) (limited to 'docs') diff --git a/docs/explanation/announcements-purgatory-design.md b/docs/explanation/announcements-purgatory-design.md index a06a8b2..009547b 100644 --- a/docs/explanation/announcements-purgatory-design.md +++ b/docs/explanation/announcements-purgatory-design.md @@ -48,14 +48,14 @@ This ensures we only serve announcements for repos that actually have content. ### 4. Expiry Extension (Two Places) -**Decision:** Extend purgatory announcement expiry in two scenarios: +**Decision:** Extend purgatory announcement expiry (reset the 30-minute protocol timer) in two scenarios: | Trigger | Location | Why | | ---------------------------- | ------------------------------------ | ----------------------------------- | | State event arrives | `StatePolicy::process_state_event()` | Repo is actively receiving metadata | | Git auth extends state event | `src/git/auth.rs` | Repo is actively receiving git data | -**Why:** Prevents premature expiry during slow sync operations or multi-step pushes. +**Why:** Prevents premature expiry during slow sync operations or multi-step pushes. The protocol's 30-minute expiry is intended for abandoned repositories, not active ones receiving data. ### 5. Authorization Must Check Purgatory Announcements @@ -75,20 +75,26 @@ This ensures we only serve announcements for repos that actually have content. ### 7. Soft Expiry Preserves Event Without Bare Repo -**Decision:** When a purgatory announcement expires, delete the bare repo but retain the announcement event for an extended period (e.g., 24h). +**Decision:** When a purgatory announcement expires (30 minutes per protocol spec), delete the bare repo but retain the announcement event for an extended period (e.g., 24h). -**Why:** This handles the case where a state event arrives after initial expiry. Without soft expiry, we'd either: -- Add to `failed_events` and reject the state event (losing potential revival) -- Re-fetch the announcement repeatedly (wasting sync bandwidth) +**Why the protocol specifies 30 minutes:** The grasp protocol defines a 30-minute expiry for announcement events to ensure clients don't indefinitely cache stale repository information. + +**Why we implement soft expiry:** The protocol's 30-minute expiry creates a sync/storage problem. Without soft expiry, we'd either: + +- Add expired announcements to `failed_events` and permanently reject future state events (losing potential revival when state events arrive late) +- Re-fetch the announcement event repeatedly on every sync cycle (wasting bandwidth and creating unnecessary sync traffic) **Behavior during soft expiry:** -- Bare repo is deleted (saves disk space) + +- Bare repo is deleted (saves disk space, respects protocol expiry) - Announcement event retained in purgatory with `soft_expired` flag - Sync continues requesting state events (same as active purgatory) - If state event arrives: recreate bare repo, clear `soft_expired`, extend expiry - If announcement republished directly to us: treat as fresh arrival - After extended expiry: fully remove from purgatory +**In summary:** Soft expiry is an implementation optimization that prevents us from constantly re-syncing announcement events or permanently blocking repositories that receive delayed state events. + ## Data Structure ```rust @@ -198,29 +204,29 @@ Announcement ──> ACTIVE ───────────────── └── Extended expiry ──> REMOVED (exit) ``` -| Exit | Trigger | Action | -| -------------- | ---------------------------------------------- | -------------------------------------------- | -| **Promotion** | Git data arrives | Move to database, upgrade sync to Full | -| **Soft expiry**| Initial timeout | Delete bare repo, retain event, continue sync| -| **Full expiry**| Extended timeout (soft-expired) | Remove from purgatory entirely | -| **Deletion** | Kind 5 event | Delete bare repo, remove from purgatory | -| **Replacement**| Newer announcement (same pubkey, identifier) | Replace entry | -| **Service change** | Newer announcement removes our service | Remove from purgatory | +| Exit | Trigger | Action | +| ------------------ | -------------------------------------------- | --------------------------------------------- | +| **Promotion** | Git data arrives | Move to database, upgrade sync to Full | +| **Soft expiry** | Initial timeout | Delete bare repo, retain event, continue sync | +| **Full expiry** | Extended timeout (soft-expired) | Remove from purgatory entirely | +| **Deletion** | Kind 5 event | Delete bare repo, remove from purgatory | +| **Replacement** | Newer announcement (same pubkey, identifier) | Replace entry | +| **Service change** | Newer announcement removes our service | Remove from purgatory | ## Integration Points -| File | Change | -| ---------------------------------- | --------------------------------------------------------- | -| `src/purgatory/mod.rs` | Add `announcement_purgatory` store | -| `src/purgatory/types.rs` | Add `AnnouncementPurgatoryEntry` | -| `src/nostr/policy/announcement.rs` | Route new announcements to purgatory | -| `src/git/receive.rs` | Promote on git data arrival | -| `src/git/auth.rs` | Extend purgatory expiry when extending state event expiry | -| `src/git/authorization.rs` | Check purgatory announcements for maintainer authorization| -| `src/nostr/policy/state.rs` | Check purgatory for authorization | -| `src/sync/mod.rs` | Add `SyncLevel` to `RepoSyncNeeds` | -| `src/sync/filters.rs` | Respect sync level when building filters | -| `src/sync/self_subscriber.rs` | Register purgatory announcements with `StateOnly` level | +| File | Change | +| ---------------------------------- | ---------------------------------------------------------- | +| `src/purgatory/mod.rs` | Add `announcement_purgatory` store | +| `src/purgatory/types.rs` | Add `AnnouncementPurgatoryEntry` | +| `src/nostr/policy/announcement.rs` | Route new announcements to purgatory | +| `src/git/receive.rs` | Promote on git data arrival | +| `src/git/auth.rs` | Extend purgatory expiry when extending state event expiry | +| `src/git/authorization.rs` | Check purgatory announcements for maintainer authorization | +| `src/nostr/policy/state.rs` | Check purgatory for authorization | +| `src/sync/mod.rs` | Add `SyncLevel` to `RepoSyncNeeds` | +| `src/sync/filters.rs` | Respect sync level when building filters | +| `src/sync/self_subscriber.rs` | Register purgatory announcements with `StateOnly` level | See [announcements-purgatory-implementation.md](./announcements-purgatory-implementation.md) for detailed implementation notes. @@ -240,9 +246,9 @@ See [announcements-purgatory-implementation.md](./announcements-purgatory-implem ## Risks -| Risk | Mitigation | -| ------------------------------------ | ------------------------------------------------------- | -| Disk exhaustion from purgatory repos | Short expiry, soft expiry deletes repo early | -| Race between promotion and expiry | Atomic operations | -| Sync re-fetching expired events | Soft expiry retains event; no need for `failed_events` | -| Filter explosion from many purgatory | Existing consolidation handles this (threshold at 70) | +| Risk | Mitigation | +| ------------------------------------ | ------------------------------------------------------ | +| Disk exhaustion from purgatory repos | Short expiry, soft expiry deletes repo early | +| Race between promotion and expiry | Atomic operations | +| Sync re-fetching expired events | Soft expiry retains event; no need for `failed_events` | +| Filter explosion from many purgatory | Existing consolidation handles this (threshold at 70) | diff --git a/docs/explanation/announcements-purgatory-implementation.md b/docs/explanation/announcements-purgatory-implementation.md index d5b8698..263c253 100644 --- a/docs/explanation/announcements-purgatory-implementation.md +++ b/docs/explanation/announcements-purgatory-implementation.md @@ -204,21 +204,22 @@ impl Purgatory { .collect() } - /// Transition to soft-expired state + /// Transition to soft-expired state (protocol's 30min expiry reached) pub fn soft_expire_announcement( &self, key: &(PublicKey, String), ) -> Option { if let Some(mut entry) = self.announcement_purgatory.get_mut(key) { entry.soft_expired = true; - entry.expires_at = Instant::now() + SOFT_EXPIRY_DURATION; // e.g., 24h + entry.expires_at = Instant::now() + SOFT_EXPIRY_DURATION; // e.g., 24h extended retention Some(entry.repo_path.clone()) // Return path for bare repo deletion } else { None } } - /// Revive soft-expired announcement (caller must recreate bare repo) + /// Revive soft-expired announcement when state event arrives + /// (caller must recreate bare repo) pub fn revive_announcement( &self, key: &(PublicKey, String), @@ -226,7 +227,7 @@ impl Purgatory { if let Some(mut entry) = self.announcement_purgatory.get_mut(key) { if entry.soft_expired { entry.soft_expired = false; - entry.expires_at = Instant::now() + ACTIVE_EXPIRY_DURATION; + entry.expires_at = Instant::now() + ACTIVE_EXPIRY_DURATION; // Reset 30min protocol timer return Some(entry.repo_path.clone()); // Caller recreates bare repo } } @@ -270,9 +271,11 @@ When a state event arrives for a soft-expired announcement, the state policy mus 1. Check purgatory for a matching announcement (in addition to DB) 2. Validate authorization against the purgatory announcement 3. If soft-expired, call `revive_announcement()` and recreate the bare repo -4. Extend the announcement's expiry +4. Extend the announcement's expiry (reset the 30-minute protocol timer) 5. Route the state event to state purgatory +**Why revival is necessary:** Without soft expiry + revival, late-arriving state events would either be permanently rejected (if we added the announcement to `failed_events`) or cause constant re-syncing of the announcement event. Revival allows us to respect the protocol's 30-minute expiry while still handling delayed state events gracefully. + The exact integration will depend on the current structure of `StatePolicy::process_state_event()` - see implementation phase for details. ## File Change Summary -- cgit v1.2.3 From 4848c4029fc58f6f310a2babeae1ee82a7e41656 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Mon, 23 Feb 2026 14:49:30 +0000 Subject: docs: update purgatory docs to reflect announcements purgatory implementation Remove the pre-implementation planning docs (announcements-purgatory-design.md and announcements-purgatory-implementation.md) now that the feature is built. Update the three living docs to reflect what was actually implemented: - purgatory-design.md: expanded to cover all three purgatory stores (announcement, state, PR), including AnnouncementPurgatoryEntry structure, two-phase soft expiry lifecycle, expiry extension triggers, promotion flow, and updated integration points and file structure - grasp-02-proactive-sync.md: added SyncLevel enum (Full/StateOnly) to RepoSyncNeeds, documented the purgatory announcement sync timer as the registration path for purgatory announcements, updated filter building to describe build_sync_level_aware_filters() and StateOnly behaviour - grasp-02-proactive-sync-purgatory-git-data.md: expanded to cover announcement purgatory as a third entry type, added Timeline E showing soft-expiry and revival, replaced the single expiry section with separate hard-expiry (state/PR) and two-phase soft-expiry (announcements) sections with full justification for the 24-hour extended retention window --- docs/explanation/announcements-purgatory-design.md | 254 ---------- .../announcements-purgatory-implementation.md | 296 ------------ .../grasp-02-proactive-sync-purgatory-git-data.md | 67 ++- docs/explanation/grasp-02-proactive-sync.md | 57 ++- docs/explanation/purgatory-design.md | 520 +++++++++++++-------- 5 files changed, 415 insertions(+), 779 deletions(-) delete mode 100644 docs/explanation/announcements-purgatory-design.md delete mode 100644 docs/explanation/announcements-purgatory-implementation.md (limited to 'docs') diff --git a/docs/explanation/announcements-purgatory-design.md b/docs/explanation/announcements-purgatory-design.md deleted file mode 100644 index 009547b..0000000 --- a/docs/explanation/announcements-purgatory-design.md +++ /dev/null @@ -1,254 +0,0 @@ -# Announcements Purgatory Design - -## Problem Statement - -**Primary problem:** Serving announcement events alongside empty bare git repos misleads clients into thinking we host content. - -When an announcement arrives, we must create the bare repo immediately (so git pushes can succeed). But if no git data ever arrives, we serve an empty repo and its announcement indefinitely. Clients see the announcement, try to clone, and get nothing. This is misleading. - -**Secondary problem:** Sync downloads events for repos that may never have content. - -Without purgatory, sync would fetch all L2/L3 events (patches, issues, etc.) for announcements that may never receive git data. This wastes bandwidth and creates orphaned events. - -## Solution Overview - -New announcements go to **purgatory** instead of being immediately accepted: - -1. **Announcement arrives** - Create bare repo immediately, add announcement to purgatory -2. **Git data arrives** - Promote announcement from purgatory to active (now served to clients) -3. **No git data before expiry** - Delete bare repo, discard announcement (never served) - -This ensures we only serve announcements for repos that actually have content. - -## Key Design Decisions - -### 1. Bare Repo Created Immediately - -**Decision:** Create the bare git repo when announcement enters purgatory. - -**Why:** Git pushes may arrive at any time. Without a repo, pushes fail. - -**Consequence:** We allocate disk space for repos that may expire unused. Must delete repos on expiry. - -### 2. Git Data Triggers Promotion - -**Decision:** Git data arrival promotes the announcement to active status. - -**Why:** Git data proves the repository has content. State events alone don't prove content exists - they could reference empty repos. - -**Where:** Promotion happens in the git receive path after successful push/fetch with data. - -### 3. Replacement Announcements Skip Purgatory - -**Decision:** Announcements replacing an existing active announcement are accepted immediately. - -**Why:** The repository is already proven active with content. - -**How:** Check if active announcement exists for `(pubkey, identifier)` before routing to purgatory. - -### 4. Expiry Extension (Two Places) - -**Decision:** Extend purgatory announcement expiry (reset the 30-minute protocol timer) in two scenarios: - -| Trigger | Location | Why | -| ---------------------------- | ------------------------------------ | ----------------------------------- | -| State event arrives | `StatePolicy::process_state_event()` | Repo is actively receiving metadata | -| Git auth extends state event | `src/git/auth.rs` | Repo is actively receiving git data | - -**Why:** Prevents premature expiry during slow sync operations or multi-step pushes. The protocol's 30-minute expiry is intended for abandoned repositories, not active ones receiving data. - -### 5. Authorization Must Check Purgatory Announcements - -**Decision:** When validating state events or git operations, check purgatory announcements in addition to the database. - -**Why:** State events and git pushes may arrive before git data promotes the announcement. They still need authorization from the announcement's maintainer set. - -**Where:** `fetch_repository_data()` and related authorization functions must query both DB and purgatory. - -### 6. Sync Only State Events for Purgatory Announcements - -**Decision:** Purgatory announcements trigger sync for state events only, not other L2/L3 events (patches, issues, PRs, etc.). - -**Why:** Other L2/L3 events would be rejected anyway (no promoted announcement in DB). Syncing them wastes bandwidth and creates work for announcements that may never promote. - -**How:** Sync uses a `SyncLevel` concept - `Full` for promoted repos, `StateOnly` for purgatory. On promotion, upgrade to `Full`. - -### 7. Soft Expiry Preserves Event Without Bare Repo - -**Decision:** When a purgatory announcement expires (30 minutes per protocol spec), delete the bare repo but retain the announcement event for an extended period (e.g., 24h). - -**Why the protocol specifies 30 minutes:** The grasp protocol defines a 30-minute expiry for announcement events to ensure clients don't indefinitely cache stale repository information. - -**Why we implement soft expiry:** The protocol's 30-minute expiry creates a sync/storage problem. Without soft expiry, we'd either: - -- Add expired announcements to `failed_events` and permanently reject future state events (losing potential revival when state events arrive late) -- Re-fetch the announcement event repeatedly on every sync cycle (wasting bandwidth and creating unnecessary sync traffic) - -**Behavior during soft expiry:** - -- Bare repo is deleted (saves disk space, respects protocol expiry) -- Announcement event retained in purgatory with `soft_expired` flag -- Sync continues requesting state events (same as active purgatory) -- If state event arrives: recreate bare repo, clear `soft_expired`, extend expiry -- If announcement republished directly to us: treat as fresh arrival -- After extended expiry: fully remove from purgatory - -**In summary:** Soft expiry is an implementation optimization that prevents us from constantly re-syncing announcement events or permanently blocking repositories that receive delayed state events. - -## Data Structure - -```rust -// Key: (owner pubkey, identifier) - identifier alone is NOT unique -announcement_purgatory: Arc> - -pub struct AnnouncementPurgatoryEntry { - pub event: Event, - pub identifier: String, - pub owner: PublicKey, - pub repo_path: PathBuf, - pub relays: HashSet, // For sync registration - pub created_at: Instant, - pub expires_at: Instant, - pub soft_expired: bool, // Bare repo deleted, event retained -} -``` - -**Indexed by `(pubkey, identifier)`** because identifier is not unique across different owners. Lookups are primarily from nostr events which have pubkey and identifier readily available. - -## Flows - -### New Announcement Flow - -``` -Announcement arrives - | - v -Is there an active announcement for (pubkey, identifier)? - | - +-- YES --> Accept immediately (replacement) - | - +-- NO --> Create bare repo - Add to purgatory - Return OK to client (but don't serve) -``` - -### Git Data Arrival Flow - -``` -Git push/fetch completes with data - | - v -Is there a purgatory announcement for (pubkey, identifier)? - | - +-- YES --> Promote to active (move to database) - | Now served to clients - | - +-- NO --> Normal processing -``` - -### State Event Arrival Flow - -``` -State event arrives - | - v -Is there an active announcement? - | - +-- YES --> Normal validation - | - +-- NO --> Check purgatory for announcement - | - +-- Found --> Validate against purgatory announcement - | Extend purgatory expiry - | State event goes to state purgatory - | - +-- Not found --> Reject or state purgatory -``` - -## Edge Cases - -| Scenario | Behavior | -| ------------------------------------------------------ | ------------------------------------------------------------------------------------------------------- | -| Git data before announcement | Push fails (no repo exists) | -| Announcement expires, no git data | Delete bare repo, set `soft_expired` flag, retain event for extended period | -| Soft-expired announcement fully expires | Remove from purgatory entirely | -| State event arrives for soft-expired announcement | Recreate bare repo, clear `soft_expired`, extend expiry | -| State expires, announcement in purgatory | Announcement keeps its own expiry | -| Multiple owners, same identifier | Each tracked separately by `(pubkey, identifier)` | -| **Newer announcement replaces older (same pubkey)** | Replace purgatory entry, extend expiry, and state event expiry | -| **Newer announcement changes services (unacceptable)** | Clear older announcement from purgatory, delete bare repo, remove state events from purgatory if exists | -| Deletion event for purgatory announcement | Remove from purgatory, delete bare repo | - -## Purgatory Lifecycle - -An announcement progresses through purgatory states: - -``` - ┌─────────────────────────────────────┐ - │ │ - v │ -Announcement ──> ACTIVE ──────────────────────────────────┤ - arrives (bare repo exists) │ - │ │ - ├── Git data ──> PROMOTED (exit) │ - │ │ - ├── Deletion ──> REMOVED (exit) │ - │ │ - v │ - SOFT_EXPIRED ──────────────────────────────┘ - (bare repo deleted, ^ - event retained) │ - │ │ - ├── State event arrives (revival) - │ - └── Extended expiry ──> REMOVED (exit) -``` - -| Exit | Trigger | Action | -| ------------------ | -------------------------------------------- | --------------------------------------------- | -| **Promotion** | Git data arrives | Move to database, upgrade sync to Full | -| **Soft expiry** | Initial timeout | Delete bare repo, retain event, continue sync | -| **Full expiry** | Extended timeout (soft-expired) | Remove from purgatory entirely | -| **Deletion** | Kind 5 event | Delete bare repo, remove from purgatory | -| **Replacement** | Newer announcement (same pubkey, identifier) | Replace entry | -| **Service change** | Newer announcement removes our service | Remove from purgatory | - -## Integration Points - -| File | Change | -| ---------------------------------- | ---------------------------------------------------------- | -| `src/purgatory/mod.rs` | Add `announcement_purgatory` store | -| `src/purgatory/types.rs` | Add `AnnouncementPurgatoryEntry` | -| `src/nostr/policy/announcement.rs` | Route new announcements to purgatory | -| `src/git/receive.rs` | Promote on git data arrival | -| `src/git/auth.rs` | Extend purgatory expiry when extending state event expiry | -| `src/git/authorization.rs` | Check purgatory announcements for maintainer authorization | -| `src/nostr/policy/state.rs` | Check purgatory for authorization | -| `src/sync/mod.rs` | Add `SyncLevel` to `RepoSyncNeeds` | -| `src/sync/filters.rs` | Respect sync level when building filters | -| `src/sync/self_subscriber.rs` | Register purgatory announcements with `StateOnly` level | - -See [announcements-purgatory-implementation.md](./announcements-purgatory-implementation.md) for detailed implementation notes. - -## Testing - -- Announcement to purgatory, git data promotes it -- Announcement soft-expires without git data (repo deleted, event retained) -- State event revives soft-expired announcement (repo recreated) -- Soft-expired announcement fully expires after extended period -- State event extends purgatory expiry -- Git auth extends purgatory expiry -- Newer announcement replaces older in purgatory -- Service change clears purgatory entry -- `(pubkey, identifier)` indexing with multiple owners -- Sync requests only state events for purgatory announcements -- Sync upgrades to full on promotion - -## Risks - -| Risk | Mitigation | -| ------------------------------------ | ------------------------------------------------------ | -| Disk exhaustion from purgatory repos | Short expiry, soft expiry deletes repo early | -| Race between promotion and expiry | Atomic operations | -| Sync re-fetching expired events | Soft expiry retains event; no need for `failed_events` | -| Filter explosion from many purgatory | Existing consolidation handles this (threshold at 70) | diff --git a/docs/explanation/announcements-purgatory-implementation.md b/docs/explanation/announcements-purgatory-implementation.md deleted file mode 100644 index 263c253..0000000 --- a/docs/explanation/announcements-purgatory-implementation.md +++ /dev/null @@ -1,296 +0,0 @@ -# Announcements Purgatory Implementation Details - -This document provides detailed implementation notes for the [Announcements Purgatory Design](./announcements-purgatory-design.md). - -## Sync Integration - -### Current Sync Architecture - -The sync system uses a two-index approach: - -```rust -// What we WANT to sync - source of truth from self-subscription -// Key: repo addressable ref (30617:pubkey:identifier) -pub type RepoSyncIndex = Arc>>; - -pub struct RepoSyncNeeds { - pub relays: HashSet, // Relay URLs from announcement - pub root_events: HashSet, // 1617/1618/1621 event IDs -} - -// What we have CONFIRMED syncing + connection state -// Key: relay URL -pub type RelaySyncIndex = Arc>>; -``` - -**Three-Layer Sync Strategy:** -1. **Layer 1:** Announcements (kinds 30617, 10317) -2. **Layer 2:** Repo-tagging events (events with `a`/`A`/`q` tags + kind 30618 by identifier) -3. **Layer 3:** Root-event-tagging events (events with `e`/`E`/`q` tags) - -### Adding SyncLevel - -Add a `sync_level` field to distinguish purgatory from promoted repos: - -```rust -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] -pub enum SyncLevel { - #[default] - Full, // L2 + L3 (promoted repos) - StateOnly, // Only state events (purgatory announcements) -} - -pub struct RepoSyncNeeds { - pub relays: HashSet, - pub root_events: HashSet, - pub sync_level: SyncLevel, // NEW -} -``` - -### Filter Building Changes - -In `src/sync/filters.rs`, modify filter building to respect sync level: - -```rust -// For StateOnly repos, only build state event filters -pub fn build_layer2_and_layer3_filters( - repos: &HashMap, - // ... -) -> Vec { - let (full_repos, state_only_repos): (Vec<_>, Vec<_>) = repos - .iter() - .partition(|(_, needs)| needs.sync_level == SyncLevel::Full); - - let mut filters = Vec::new(); - - // Full repos get all L2/L3 filters - if !full_repos.is_empty() { - filters.extend(tagged_one_of_our_repo_event_filters(&full_repos)); - filters.extend(state_event_filters_for_our_repos(&full_repos)); - filters.extend(tagged_one_of_our_root_event_filters(&full_repos)); - } - - // StateOnly repos get only state event filters - if !state_only_repos.is_empty() { - filters.extend(state_event_filters_for_our_repos(&state_only_repos)); - } - - filters -} -``` - -The existing `state_event_filters_for_our_repos()` function already builds kind 30618 filters with `#d` tags, which is exactly what we need. - -### Self-Subscriber Changes - -In `src/sync/self_subscriber.rs`, add purgatory announcements to the sync index: - -```rust -// When announcement enters purgatory -fn on_announcement_to_purgatory( - &self, - event: &Event, - identifier: &str, - relays: HashSet, -) { - let key = format!("30617:{}:{}", event.pubkey, identifier); - let mut index = self.repo_sync_index.write().unwrap(); - index.insert(key, RepoSyncNeeds { - relays, - root_events: HashSet::new(), - sync_level: SyncLevel::StateOnly, - }); -} - -// When announcement promotes to database -fn on_announcement_promoted( - &self, - event: &Event, - identifier: &str, -) { - let key = format!("30617:{}:{}", event.pubkey, identifier); - let mut index = self.repo_sync_index.write().unwrap(); - if let Some(needs) = index.get_mut(&key) { - needs.sync_level = SyncLevel::Full; - } -} -``` - -### Algorithm Changes - -In `src/sync/algorithms.rs`, preserve sync level when inverting repo->relay: - -```rust -pub fn derive_relay_targets( - repo_index: &RepoSyncIndex, -) -> HashMap { - // ... existing inversion logic ... - // Ensure sync_level is preserved/aggregated per relay - // A relay gets Full if ANY of its repos are Full -} -``` - -## Authorization Integration - -### Current Authorization Flow - -Authorization lookups happen in `src/git/authorization.rs`: - -| Function | Purpose | Currently Queries | -|----------|---------|-------------------| -| `fetch_repository_data()` | Get announcements + states by identifier | DB only | -| `collect_authorized_maintainers()` | Build maintainer set from announcements | DB only | -| `pubkey_authorised_for_repo_owners()` | Check if pubkey authorized | DB only | - -### Required Changes - -Modify `fetch_repository_data()` to also query purgatory: - -```rust -pub async fn fetch_repository_data( - db: &Database, - purgatory: &Purgatory, // NEW parameter - identifier: &str, -) -> Result { - // Existing DB query - let db_events = db.query(/* kind 30617, 30618 by identifier */).await?; - - // NEW: Also check purgatory for announcements - let purgatory_announcements = purgatory - .get_announcements_by_identifier(identifier); - - // Merge results - let mut announcements = parse_announcements(db_events); - announcements.extend(purgatory_announcements); - - // ... rest of function -} -``` - -This affects: -- `StatePolicy::process_state_event()` - state event validation -- `get_state_authorization_for_specific_owner_repo()` - git push authorization -- `AnnouncementPolicy::is_maintainer_in_any_announcement()` - maintainer exception - -## Purgatory Store Changes - -### New Fields - -```rust -pub struct AnnouncementPurgatoryEntry { - pub event: Event, - pub identifier: String, - pub owner: PublicKey, - pub repo_path: PathBuf, - pub relays: HashSet, // For sync registration - pub created_at: Instant, - pub expires_at: Instant, - pub soft_expired: bool, // Bare repo deleted, event retained -} -``` - -### New Methods - -```rust -impl Purgatory { - /// Get announcements by identifier (for authorization) - pub fn get_announcements_by_identifier( - &self, - identifier: &str, - ) -> Vec<&AnnouncementPurgatoryEntry> { - self.announcement_purgatory - .iter() - .filter(|entry| entry.identifier == identifier) - .collect() - } - - /// Transition to soft-expired state (protocol's 30min expiry reached) - pub fn soft_expire_announcement( - &self, - key: &(PublicKey, String), - ) -> Option { - if let Some(mut entry) = self.announcement_purgatory.get_mut(key) { - entry.soft_expired = true; - entry.expires_at = Instant::now() + SOFT_EXPIRY_DURATION; // e.g., 24h extended retention - Some(entry.repo_path.clone()) // Return path for bare repo deletion - } else { - None - } - } - - /// Revive soft-expired announcement when state event arrives - /// (caller must recreate bare repo) - pub fn revive_announcement( - &self, - key: &(PublicKey, String), - ) -> Option { - if let Some(mut entry) = self.announcement_purgatory.get_mut(key) { - if entry.soft_expired { - entry.soft_expired = false; - entry.expires_at = Instant::now() + ACTIVE_EXPIRY_DURATION; // Reset 30min protocol timer - return Some(entry.repo_path.clone()); // Caller recreates bare repo - } - } - None - } -} -``` - -## Expiry Cleanup Task - -The existing cleanup task needs to handle the two-phase expiry: - -```rust -async fn cleanup_expired_announcements(&self) { - let now = Instant::now(); - - for entry in self.announcement_purgatory.iter() { - if entry.expires_at <= now { - let key = (entry.owner.clone(), entry.identifier.clone()); - - if entry.soft_expired { - // Fully expired - remove entirely - self.announcement_purgatory.remove(&key); - self.unregister_from_sync(&key); - } else { - // First expiry - transition to soft-expired - if let Some(repo_path) = self.soft_expire_announcement(&key) { - delete_bare_repo(&repo_path).await; - } - // Note: stays in sync index with StateOnly level - } - } - } -} -``` - -## State Event Revival Flow - -When a state event arrives for a soft-expired announcement, the state policy must: - -1. Check purgatory for a matching announcement (in addition to DB) -2. Validate authorization against the purgatory announcement -3. If soft-expired, call `revive_announcement()` and recreate the bare repo -4. Extend the announcement's expiry (reset the 30-minute protocol timer) -5. Route the state event to state purgatory - -**Why revival is necessary:** Without soft expiry + revival, late-arriving state events would either be permanently rejected (if we added the announcement to `failed_events`) or cause constant re-syncing of the announcement event. Revival allows us to respect the protocol's 30-minute expiry while still handling delayed state events gracefully. - -The exact integration will depend on the current structure of `StatePolicy::process_state_event()` - see implementation phase for details. - -## File Change Summary - -| File | Estimated Lines | Changes | -|------|-----------------|---------| -| `src/sync/mod.rs` | ~10 | Add `SyncLevel` enum, field to `RepoSyncNeeds` | -| `src/sync/filters.rs` | ~20 | Partition repos by sync level, build appropriate filters | -| `src/sync/algorithms.rs` | ~15 | Preserve sync level in relay target derivation | -| `src/sync/self_subscriber.rs` | ~40 | Register purgatory announcements, handle promotion | -| `src/purgatory/mod.rs` | ~80 | Add announcement store, soft expiry methods | -| `src/purgatory/types.rs` | ~20 | Add `AnnouncementPurgatoryEntry` | -| `src/git/authorization.rs` | ~30 | Query purgatory in `fetch_repository_data()` | -| `src/nostr/policy/state.rs` | ~40 | Handle soft-expired revival | -| `src/nostr/policy/announcement.rs` | ~30 | Route to purgatory, check for replacements | -| `src/git/receive.rs` | ~20 | Trigger promotion on git data | - -**Total: ~305 lines of changes** diff --git a/docs/explanation/grasp-02-proactive-sync-purgatory-git-data.md b/docs/explanation/grasp-02-proactive-sync-purgatory-git-data.md index 31c3e46..8fb5798 100644 --- a/docs/explanation/grasp-02-proactive-sync-purgatory-git-data.md +++ b/docs/explanation/grasp-02-proactive-sync-purgatory-git-data.md @@ -12,7 +12,13 @@ ## Overview -When Nostr events arrive before their git data, they enter **purgatory** waiting to be served. But they don't wait passively—ngit-grasp **actively hunts** for the missing git data across all git servers assoicated with the repo until it finds what it needs. +When Nostr events arrive before their git data, they enter **purgatory** waiting to be served. But they don't wait passively—ngit-grasp **actively hunts** for the missing git data across all git servers associated with the repo until it finds what it needs. + +This applies to three types of purgatory entries: + +- **Announcement purgatory** — kind 30617 announcements waiting for a git push to prove the repo has content +- **State event purgatory** — kind 30618 state events waiting for their referenced git objects +- **PR event purgatory** — kind 1617/1618 PR events waiting for their referenced commits ### How It Works @@ -42,6 +48,7 @@ We respect remote server capacity with: ✅ **Respectful throttling** - 5 concurrent + 30/min per domain, plays nice with other implementations ✅ **Smart timing** - 3min delay for user pushes, 500ms for synced events ✅ **30min expiry** - Auto-cleanup of events when data never arrives +✅ **Soft expiry for announcements** - Bare repo deleted at 30min, event retained 24h to allow revival ✅ **Fully testable** - Mock-based architecture for reliable unit tests --- @@ -73,6 +80,16 @@ Timeline D: Data never arrives t=60s: Retry → all servers checked, no data ... t=1800s: 30 minutes expired → event discarded, purgatory cleaned up 🗑️ + +Timeline E: Announcement purgatory (no git data within 30 min) + t=0s: Announcement received → bare repo created, enters announcement purgatory + t=0.5s: Start hunting git servers for any content + ... + t=1800s: 30 minutes expired → bare repo deleted, event retained (soft_expired=true) + t=3600s: State event arrives (slow sync) → bare repo recreated, expiry reset ✅ + t=5400s: Git push arrives → announcement promoted to DB, served to clients ✅ + OR + t=86400s: 24 hours elapsed, no revival → event added to expired_events, removed 🗑️ ``` **Without proactive sync**: Events in Timeline C would wait indefinitely (or until manual git push). @@ -330,11 +347,11 @@ Both methods check `has_capacity()` and trigger `try_process_next()` if true. --- -## 30-Minute Purgatory Expiry +## Purgatory Expiry -Purgatory entries **automatically expire** after 30 minutes to prevent unbounded memory growth. +### State and PR Events: 30-Minute Hard Expiry -### Why 30 Minutes? +State and PR purgatory entries **automatically expire** after 30 minutes. From the [GRASP-01 spec](https://github.com/DanConwayDev/grasp/blob/main/01.md#purgatory): @@ -346,25 +363,40 @@ This balances: - 🧹 **Short enough** to prevent memory leaks from abandoned events - 🔄 **Recoverable** events are still on other relays and can be re-submitted -### Implementation +Each entry tracks `expires_at: Instant` (30 min from creation). The sync loop checks expiry before processing via `has_pending_events()`. If all events for an identifier have expired, the identifier is removed from the sync queue. -Each purgatory entry tracks: +To prevent infinite re-sync loops, expired event IDs are added to an `expired_events` set. If a sync delivers an event that previously expired, it is rejected with `"previously expired from purgatory without git data"`. -- `created_at: Instant` - When added to purgatory -- `expires_at: Instant` - When to discard (created_at + 30min) +**Implementation**: [`src/purgatory/mod.rs:DEFAULT_EXPIRY`](../../src/purgatory/mod.rs) -The main sync loop checks expiry before processing: +### Announcement Purgatory: Two-Phase Soft Expiry -```rust -if !self.has_pending_events(&identifier) { - // No events remain (expired or released) → remove from sync queue - self.sync_queue.remove(&identifier); -} -``` +Announcements use a different expiry strategy because they have an additional concern: the bare git repo created on arrival must be cleaned up, but we also need to avoid re-syncing the announcement event on every sync cycle. -**Note**: Expiry is checked implicitly via `has_pending_events()`. If all events for an identifier have expired, the identifier is removed from the sync queue. +**Phase 1 — Initial 30-minute expiry:** -**Implementation**: [`src/purgatory/mod.rs:DEFAULT_EXPIRY`](../../src/purgatory/mod.rs) +- Delete the bare git repo (frees disk space, respects the protocol's 30-minute expiry) +- Set `soft_expired = true` on the entry +- Extend `expires_at` by **24 hours** (`SOFT_EXPIRY_EXTENDED`) +- Continue syncing state events for this repo (same as active purgatory) + +**Phase 2 — 24-hour soft expiry:** + +- Add event ID to `expired_events` (prevents re-sync loops) +- Remove entry completely from `announcement_purgatory` + +**Why not just hard-expire at 30 minutes?** + +The protocol's 30-minute expiry creates a dilemma for announcements: + +- **Option A: Add to `failed_events` at 30 min** → Permanently rejects future state events, losing potential revival when state events arrive late (e.g. from a slow sync) +- **Option B: Remove entirely at 30 min** → The announcement gets re-fetched on every subsequent sync cycle, wasting bandwidth indefinitely + +Soft expiry is the solution: the bare repo is deleted at 30 minutes (respecting the protocol), but the event is retained for 24 hours. During this window, a late-arriving state event can **revive** the announcement—`extend_announcement_expiry()` recreates the bare repo, clears `soft_expired`, and resets the 30-minute timer. After 24 hours with no revival, the event is added to `expired_events` and fully removed. + +**Why 24 hours specifically?** This covers the worst-case sync delay. A relay that was offline for up to 24 hours will re-sync state events when it reconnects. The 24-hour window ensures announcements remain revivable throughout that period without permanently occupying disk space. + +**Implementation**: [`src/purgatory/mod.rs:SOFT_EXPIRY_EXTENDED`](../../src/purgatory/mod.rs) --- @@ -670,6 +702,7 @@ The purgatory sync system is a sophisticated, production-ready implementation th ✅ **Throttles respectfully** - 5 concurrent + 30/min per domain, round-robin fairness ✅ **Times strategically** - 3min for user events, 500ms for synced events ✅ **Expires responsibly** - 30min auto-cleanup prevents memory leaks +✅ **Soft-expires announcements** - Bare repo deleted at 30min, event retained 24h for revival ✅ **Tests thoroughly** - Mock-based architecture enables comprehensive unit tests This design ensures ngit-grasp can serve repositories reliably even when git data and Nostr events arrive out-of-order or from different sources, while respecting remote server capacity and providing excellent observability. diff --git a/docs/explanation/grasp-02-proactive-sync.md b/docs/explanation/grasp-02-proactive-sync.md index ed8fdbf..6696e27 100644 --- a/docs/explanation/grasp-02-proactive-sync.md +++ b/docs/explanation/grasp-02-proactive-sync.md @@ -47,20 +47,37 @@ This state starts afresh when the binary loads. ### RepoSyncIndex (Source of Truth) ```rust -/// What we WANT to sync - derived from events received via self-subscription. -/// Updated immediately when self-subscriber batch fires. +/// What we WANT to sync - derived from events received via self-subscription +/// and from purgatory announcements. +/// Updated immediately when self-subscriber batch fires or purgatory sync timer runs. /// Key: repo addressable ref - 30617:pubkey:identifier pub type RepoSyncIndex = Arc>>; +/// Controls which sync filters are built for a repo +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum SyncLevel { + #[default] + Full, // Full L2 + L3 sync (promoted repos with git data) + StateOnly, // Only state events (kind 30618) — for purgatory announcements +} + #[derive(Debug, Clone, Default)] pub struct RepoSyncNeeds { /// Relay URLs listed in this repo's 30617 announcement pub relays: HashSet, /// Root event IDs - 1617/1618/1621 - that reference this repo pub root_events: HashSet, + /// Controls which filters are built: Full (L2+L3) or StateOnly (kind 30618 only) + pub sync_level: SyncLevel, } ``` +**Two sources populate `RepoSyncIndex`:** + +1. **`SelfSubscriber`** — monitors the relay's own event stream for accepted announcements (kinds 30617, 1617, 1618, 1621). Adds entries with `SyncLevel::Full`. When an announcement is promoted from purgatory to the database, the SelfSubscriber sees it and upgrades the entry to `Full`. + +2. **Purgatory announcement sync timer** (`run_purgatory_announcement_sync`, every 5 seconds) — iterates `purgatory.announcements_for_sync()` and ensures each purgatory announcement has a `SyncLevel::StateOnly` entry in `RepoSyncIndex`. This is the only registration path for purgatory announcements because they are not saved to the database and therefore never seen by the SelfSubscriber. + ### RelaySyncIndex (Confirmed State + Connection) ```rust @@ -336,7 +353,23 @@ The sync system uses three background tasks that run continuously: 1. Queue events to `PendingUpdates` 2. Timer fires (interval, does not reset on events) -3. Process batch: update RepoSyncIndex → derive targets → send AddFilters to SyncManager +3. Process batch: update RepoSyncIndex with `SyncLevel::Full` → derive targets → send AddFilters to SyncManager + +**Note**: The SelfSubscriber only sees announcements that have been accepted to the database (promoted from purgatory). Purgatory announcements are registered separately by the purgatory sync timer (see below). + +### 4. Purgatory Announcement Sync Timer (`run_purgatory_announcement_sync`) + +**Purpose**: Register purgatory announcements in `RepoSyncIndex` so state events are synced for them + +**Interval**: Every 5 seconds (200ms in test mode) + +**Flow**: + +1. Iterate `purgatory.announcements_for_sync()` +2. For each announcement not already in `RepoSyncIndex`: insert with `SyncLevel::StateOnly` +3. When an announcement is promoted (git data arrives), the SelfSubscriber sees the newly accepted event and upgrades the entry to `SyncLevel::Full` + +**Why a separate timer?** Purgatory announcements are never saved to the database, so the SelfSubscriber never sees them. The timer bridges this gap, ensuring state events are synced for repos that may still receive git data. --- @@ -602,9 +635,10 @@ flowchart TB - Self-subscriber monitors own relay for 30617, 1617, 1618, 1621 (NOT 1619 or 30618) - Batches events in `PendingUpdates` (5 second window via interval timer) -- `process_batch()` updates RepoSyncIndex, then builds AddFilters **directly** (no compute_actions) +- `process_batch()` updates RepoSyncIndex with `SyncLevel::Full`, then builds AddFilters **directly** (no compute_actions) - AddFilters sent via channel to SyncManager, which calls `handle_new_sync_filters()` - This path does NOT use compute_actions because it's building fresh filters from the updated index +- Purgatory announcements (not in DB) are registered separately by the purgatory sync timer with `SyncLevel::StateOnly` --- @@ -687,16 +721,23 @@ fn compute_actions( - **Tags**: lowercase `a`, uppercase `A`, and `q` tags for comprehensive coverage - **Batching**: Per 100 repo refs - **Function**: `build_repo_tag_filters(repos, since)` +- **Only for `SyncLevel::Full` repos** — purgatory announcements (`StateOnly`) skip this layer ### Layer 3: Events Tagging Our Root Events - **Tags**: lowercase `e`, uppercase `E`, and `q` tags for comprehensive coverage - **Batching**: Per 100 event IDs - **Function**: `build_root_event_tag_filters(root_events, since)` +- **Only for `SyncLevel::Full` repos** — purgatory announcements (`StateOnly`) skip this layer + +### Combined Layer 2+3 (SyncLevel-Aware) + +The `build_sync_level_aware_filters()` function combines both layers, partitioning repos by `SyncLevel`: -### Combined Layer 2+3 +- **`Full` repos**: state event filters + repo-tag filters + root-event-tag filters +- **`StateOnly` repos**: state event filters only (kind 30618 with `#d` tags) -The `build_layer2_and_layer3_filters()` function combines both layers. Used by: +Used by: - `recompute_new_sync_filters_for_relay` for new item subscriptions - `reconstruct_filters` for rebuilding from confirmed state @@ -871,9 +912,9 @@ flowchart TB ``` src/sync/ -├── mod.rs # SyncManager, main loop, data structures +├── mod.rs # SyncManager, main loop, data structures, SyncLevel, run_purgatory_announcement_sync ├── algorithms.rs # derive_relay_targets(), compute_actions() -├── filters.rs # build_announcement_filter(), build_layer2_and_layer3_filters() +├── filters.rs # build_announcement_filter(), build_sync_level_aware_filters() ├── health.rs # RelayHealthTracker with exponential backoff ├── relay_connection.rs # RelayConnection, RelayEvent handling ├── self_subscriber.rs # SelfSubscriber with batching diff --git a/docs/explanation/purgatory-design.md b/docs/explanation/purgatory-design.md index b984745..bd792d4 100644 --- a/docs/explanation/purgatory-design.md +++ b/docs/explanation/purgatory-design.md @@ -8,7 +8,11 @@ ## Overview -Purgatory is an in-memory holding area that solves the **"which arrives first?"** problem in GRASP. Either nostr events or git pushes can arrive in any order: +Purgatory is an in-memory holding area that solves two related problems in GRASP: + +### Problem 1: "Which arrives first?" (State and PR events) + +Either nostr events or git pushes can arrive in any order: - **Event first**: Event waits in purgatory until git data arrives - **Git first**: Placeholder waits in purgatory until event arrives @@ -19,6 +23,18 @@ When both halves arrive, they are processed together and saved to the database. > Accepted repo state announcements, PRs and PR Updates SHOULD be accepted with message "purgatory: won't be served until git data arrives" and kept in purgatory (not served) until the related git data arrives and otherwise discarded after 30 minutes. +### Problem 2: Misleading empty repository announcements + +When a repository announcement arrives, we must create the bare git repo immediately so pushes can succeed. But if no git data ever arrives, we would serve an empty repo and its announcement indefinitely—clients see the announcement, try to clone, and get nothing. + +**Solution**: New announcements go to **announcement purgatory** instead of being immediately accepted: + +1. **Announcement arrives** → Create bare repo immediately, add announcement to purgatory +2. **Git data arrives** → Promote announcement from purgatory to active (now served to clients) +3. **No git data before expiry** → Delete bare repo, discard announcement (never served) + +This ensures we only serve announcements for repos that actually have content. + --- ## Key Design Principles @@ -31,16 +47,15 @@ Purgatory data is **not persisted** to disk. On restart, all purgatory entries a - Git data can be re-pushed - 30-minute expiry means data is transient anyway -### 2. Separate Storage for State vs PR Events - -State events (kind 30618) and PR events (kind 1617/1618) have fundamentally different matching patterns: +### 2. Separate Storage for Each Event Type -| Event Type | Index | Matching Strategy | -|------------|-------|-------------------| -| **State Events** | `identifier` (d tag) | Compare refs at push time | -| **PR Events** | `event_id` (hex string) | Direct match via `refs/nostr/` | +| Store | Index | Purpose | +|-------|-------|---------| +| `announcement_purgatory` | `(PublicKey, String)` — `(owner, identifier)` | Announcements awaiting git data | +| `state_events` | `identifier` (d tag) | State events awaiting git data | +| `pr_events` | `event_id` (hex string) | PR events awaiting git data | -They use **separate DashMap stores** for efficient concurrent access. +Announcement purgatory uses `(pubkey, identifier)` because identifier alone is not unique across different owners. ### 3. Late Binding for State Events @@ -78,7 +93,23 @@ With purgatory checking during authorization: 2. Git push arrives → Checks **database + purgatory** → State found → **AUTHORIZED** ✅ 3. After push succeeds → Save event to database → Remove from purgatory -See [`src/git/authorization.rs:51-162`](../../src/git/authorization.rs) for implementation. +See [`src/git/authorization.rs`](../../src/git/authorization.rs) for implementation. + +### 6. Announcement Purgatory: Bare Repo Created Immediately + +**Decision:** Create the bare git repo when announcement enters purgatory. + +**Why:** Git pushes may arrive at any time. Without a repo, pushes fail. + +**Consequence:** We allocate disk space for repos that may expire unused. Must delete repos on expiry. + +### 7. Replacement Announcements Skip Purgatory + +**Decision:** Announcements replacing an existing active (database) announcement are accepted immediately. + +**Why:** The repository is already proven active with content. + +**How:** Check if active announcement exists for `(pubkey, identifier)` before routing to purgatory. --- @@ -103,22 +134,54 @@ pub struct RefUpdate { } ``` +### Announcement Purgatory Entry + +```rust +pub struct AnnouncementPurgatoryEntry { + /// The kind 30617 announcement event + pub event: Event, + + /// Repository identifier from 'd' tag + pub identifier: String, + + /// Event author pubkey + pub owner: PublicKey, + + /// Path to the bare git repo on disk (created immediately on entry) + pub repo_path: PathBuf, + + /// Relay URLs from 'relays'/'clone' tags — for sync registration + pub relays: HashSet, + + /// When added to purgatory + pub created_at: Instant, + + /// Expiry deadline (30 min from creation, may be extended) + pub expires_at: Instant, + + /// Whether the bare repo has been deleted (soft expiry phase) + pub soft_expired: bool, +} +``` + +**Indexed by `(pubkey, identifier)`** because identifier is not unique across different owners. + ### State Purgatory Entry ```rust pub struct StatePurgatoryEntry { /// The nostr state event (kind 30618) awaiting git data pub event: Event, - + /// Repository identifier from 'd' tag pub identifier: String, - + /// Event author pubkey pub author: PublicKey, - + /// When added to purgatory pub created_at: Instant, - + /// Expiry deadline (30 min from creation, may be extended) pub expires_at: Instant, } @@ -132,14 +195,14 @@ pub struct StatePurgatoryEntry { pub struct PrPurgatoryEntry { /// The nostr PR event, if received (None = git data arrived first) pub event: Option, - + /// Expected commit SHA from 'c' tag (if event exists) /// or actual commit pushed (if git arrived first) pub commit: String, - + /// When added to purgatory pub created_at: Instant, - + /// Expiry deadline (30 min from creation) pub expires_at: Instant, } @@ -151,24 +214,155 @@ pub struct PrPurgatoryEntry { ```rust pub struct Purgatory { + /// Announcement events indexed by (owner, identifier) + announcement_purgatory: DashMap<(PublicKey, String), AnnouncementPurgatoryEntry>, + /// State events indexed by identifier (d tag) /// Multiple state events per identifier allowed (different authors) - state_events: Arc>>, - + state_events: DashMap>, + /// PR events indexed by event_id (hex string) /// Single entry per event ID - pr_events: Arc>, - + pr_events: DashMap, + /// Sync queue for background git data fetching - sync_queue: Arc>, - - _git_data_path: PathBuf, + sync_queue: DashMap, + + /// Events that previously expired without git data (prevents re-sync loops) + expired_events: DashMap, } ``` --- -## Event Flows +## Announcement Purgatory Flows + +### New Announcement Flow + +``` +Announcement arrives + | + v +Is there an active announcement for (pubkey, identifier) in DB? + | + +-- YES --> Accept immediately (replacement, repo already proven) + | + +-- NO --> Is there a purgatory entry for (pubkey, identifier)? + | + +-- YES --> Replace purgatory entry, extend expiry 30 min + | Return OK to client (but don't serve) + | + +-- NO --> Create bare repo + Add to purgatory + Return OK to client (but don't serve) +``` + +### Git Data Arrival → Promotion + +``` +Git push/fetch completes with data + | + v +process_purgatory_announcements() called + | + v +Is there a purgatory announcement for (owner, identifier)? + | + +-- YES --> promote_announcement() removes from purgatory + | Save event to database + | Notify WebSocket clients + | (Sync upgrades to Full automatically via SelfSubscriber) + | + +-- NO --> Normal processing +``` + +### State Event Arrival for Purgatory Announcement + +``` +State event arrives + | + v +fetch_repository_data_with_purgatory() checks DB + purgatory + | + +-- Announcement found in purgatory --> + | Validate authorization against purgatory announcement + | Extend purgatory announcement expiry (reset 30-min timer) + | If soft-expired: recreate bare repo, clear soft_expired flag + | Route state event to state purgatory + | + +-- No announcement anywhere --> Reject +``` + +### Announcement Expiry (Two-Phase Soft Expiry) + +The protocol specifies 30-minute expiry for announcements. We implement a two-phase soft expiry: + +**Phase 1 — Initial 30-minute expiry (`soft_expired == false`):** +- Delete the bare git repo (frees disk space, respects protocol expiry) +- Set `soft_expired = true` +- Extend `expires_at` by 24 hours (`SOFT_EXPIRY_EXTENDED`) +- Continue syncing state events (same as active purgatory) + +**Phase 2 — 24-hour soft expiry (`soft_expired == true`):** +- Add event ID to `expired_events` (prevents re-sync loops) +- Remove entry completely from `announcement_purgatory` + +**Why soft expiry?** Without it, we'd face a dilemma: + +- Add expired announcements to `failed_events` → permanently reject future state events, losing potential revival when state events arrive late +- Re-fetch the announcement event on every sync cycle → wasting bandwidth and creating unnecessary sync traffic + +Soft expiry retains the event for 24 hours so that late-arriving state events (e.g. from a slow sync) can revive the announcement without forcing a full re-announcement flow. + +**Revival:** If a state event arrives for a soft-expired announcement, `extend_announcement_expiry()` recreates the bare repo, clears `soft_expired`, and resets the 30-minute timer. + +### Expiry Extension Triggers + +The 30-minute purgatory timer is reset (extended) in three scenarios: + +| Trigger | Location | Why | +|---------|----------|-----| +| State event arrives | `StatePolicy::process_state_event()` | Repo is actively receiving metadata | +| Git push authorized against purgatory state | `get_state_authorization_for_specific_owner_repo()` | Repo is actively receiving git data | +| Replacement announcement arrives | `AnnouncementPolicy::validate()` | Announcement updated | + +All three call `purgatory.extend_announcement_expiry(owner, identifier, 1800s)`. + +### Purgatory Lifecycle + +``` + ┌─────────────────────────────────────┐ + │ │ + v │ +Announcement ──> ACTIVE ──────────────────────────────────┤ + arrives (bare repo exists) │ + │ │ + ├── Git data ──> PROMOTED (exit) │ + │ │ + ├── Deletion ──> REMOVED (exit) │ + │ │ + v │ + SOFT_EXPIRED ──────────────────────────────┘ + (bare repo deleted, ^ + event retained) │ + │ │ + ├── State event arrives (revival) + │ + └── Extended expiry ──> REMOVED (exit) +``` + +| Exit | Trigger | Action | +|------|---------|--------| +| **Promotion** | Git data arrives | Move to database, sync upgrades to Full | +| **Soft expiry** | Initial 30-min timeout | Delete bare repo, retain event, continue sync | +| **Full expiry** | 24-hour soft expiry | Add to expired_events, remove from purgatory | +| **Deletion** | Kind 5 event | Delete bare repo, remove from purgatory | +| **Replacement** | Newer announcement (same pubkey, identifier) | Replace entry, extend expiry | +| **Service change** | Newer announcement removes our service | Remove from purgatory | + +--- + +## State and PR Event Flows ### State Event Arrival (Kind 30618) @@ -377,11 +571,12 @@ Purgatory includes a background sync system that fetches git data from remote se ▼ ┌─────────────────────────────────────────────────────┐ │ process_newly_available_git_data(repo, oids) │ -│ 1. Find satisfiable state events in purgatory │ -│ 2. Find satisfiable PR events in purgatory │ -│ 3. Save events to database │ -│ 4. Sync git data to other owner repos │ -│ 5. Remove from purgatory │ +│ 1. Find satisfiable announcement in purgatory │ +│ 2. Find satisfiable state events in purgatory │ +│ 3. Find satisfiable PR events in purgatory │ +│ 4. Save events to database │ +│ 5. Sync git data to other owner repos │ +│ 6. Remove from purgatory │ └─────────────────────────────────────────────────────┘ ``` @@ -402,8 +597,8 @@ pub struct SyncQueueEntry { **Backoff strategy:** - First attempt: 20 seconds -- Second attempt: 2 minutes -- Subsequent attempts: 2 minutes +- Second attempt: 40 seconds +- Subsequent attempts: capped at 2 minutes ### Sync Delays @@ -428,7 +623,7 @@ pub struct ThrottleManager { ``` **Rate limiting:** -- Default: 5 requests per domain per 30 seconds +- Default: 5 concurrent requests per domain, 30 requests per minute - Tracks request timestamps in a sliding window - Queues identifiers when domain is throttled - Processes queue when capacity frees up @@ -439,7 +634,47 @@ See [`src/purgatory/sync/throttle.rs`](../../src/purgatory/sync/throttle.rs) for ## Purgatory API -### Adding Entries +### Announcement Purgatory + +```rust +impl Purgatory { + /// Add an announcement to purgatory (bare repo already created by caller) + pub fn add_announcement( + &self, + event: Event, + identifier: String, + owner: PublicKey, + repo_path: PathBuf, + relays: HashSet, + ); + + /// Promote announcement: remove from purgatory, return event for DB save + pub fn promote_announcement( + &self, + owner: &PublicKey, + identifier: &str, + ) -> Option; + + /// Get announcements by identifier (for authorization checks) + pub fn get_announcements_by_identifier( + &self, + identifier: &str, + ) -> Vec; + + /// Extend expiry (and revive soft-expired entries, recreating bare repo) + pub fn extend_announcement_expiry( + &self, + owner: &PublicKey, + identifier: &str, + duration: Duration, + ); + + /// Get all announcements for sync registration + pub fn announcements_for_sync(&self) -> Vec; +} +``` + +### State and PR Purgatory ```rust impl Purgatory { @@ -453,13 +688,7 @@ impl Purgatory { /// Add a PR placeholder (git-data-first scenario) pub fn add_pr_placeholder(&self, event_id: String, commit: String); -} -``` -### Finding Entries - -```rust -impl Purgatory { /// Find state events waiting for an identifier pub fn find_state(&self, identifier: &str) -> Vec; @@ -476,13 +705,7 @@ impl Purgatory { /// Find a PR placeholder specifically (git-data-first) pub fn find_pr_placeholder(&self, event_id: &str) -> Option; -} -``` -### Removing Entries - -```rust -impl Purgatory { /// Remove all state events for an identifier pub fn remove_state(&self, identifier: &str); @@ -499,36 +722,14 @@ impl Purgatory { ```rust impl Purgatory { /// Remove expired entries (called every 60 seconds) - /// Returns (state_removed, pr_removed) - pub fn cleanup(&self) -> (usize, usize); + /// Handles two-phase soft expiry for announcements + pub fn cleanup(&self); - /// Extend expiry for entries about to be processed - /// Ensures at least `duration` remaining + /// Extend expiry for state/PR entries about to be processed pub fn extend_expiry(&self, identifier: &str, event_ids: &[EventId], duration: Duration); - /// Get current counts for metrics - pub fn count(&self) -> (usize, usize); -} -``` - -### Sync Queue Management - -```rust -impl Purgatory { - /// Enqueue identifier for sync with custom delay - pub fn enqueue_sync(&self, identifier: &str, delay: Duration); - - /// Enqueue with default delay (3 minutes) - pub fn enqueue_sync_default(&self, identifier: &str); - - /// Enqueue with immediate delay (500ms) - pub fn enqueue_sync_immediate(&self, identifier: &str); - - /// Check if identifier has pending events - pub fn has_pending_events(&self, identifier: &str) -> bool; - - /// Remove identifier from sync queue - pub fn remove_from_sync_queue(&self, identifier: &str); + /// Check if an event previously expired (prevents re-sync loops) + pub fn is_expired(&self, event_id: &EventId) -> bool; } ``` @@ -558,12 +759,6 @@ pub fn can_apply_state( event: &Event, repo_path: &Path, ) -> Result; - -/// Get refs from state that aren't being pushed -pub fn get_unpushed_refs( - state_refs: &[RefPair], - pushed_refs: &[RefPair], -) -> Vec; ``` See [`src/purgatory/helpers.rs`](../../src/purgatory/helpers.rs) for implementation. @@ -572,123 +767,37 @@ See [`src/purgatory/helpers.rs`](../../src/purgatory/helpers.rs) for implementat ## Integration Points -### 1. Event Policy (Nip34WritePolicy) +### 1. Announcement Policy (`src/nostr/policy/announcement.rs`) -State and PR events are added to purgatory when git data doesn't exist: +Routes new announcements to purgatory or accepts replacements: -```rust -// From src/nostr/policy/state.rs -async fn handle_state(&self, event: &Event) -> WritePolicyResult { - let identifier = extract_identifier(event)?; - - // Check if we have matching git data - if self.has_matching_git_data(&identifier, event).await? { - return WritePolicyResult::Accept; - } - - // Add to purgatory - self.purgatory.add_state( - event.clone(), - identifier.clone(), - event.pubkey, - ); - - WritePolicyResult::Reject { - status: true, // Client sees OK - message: "purgatory: awaiting git data".into() - } -} -``` +- If active DB announcement exists for `(pubkey, identifier)` → `Accept` immediately +- If purgatory entry exists → replace it, extend expiry, return `Accept` +- Otherwise → return `AcceptPurgatory`, caller calls `add_to_purgatory()` which creates bare repo and adds to purgatory -### 2. Git Push Authorization +### 2. State Event Policy (`src/nostr/policy/state.rs`) -Authorization checks both database and purgatory: +Checks purgatory announcements for authorization and extends their expiry: ```rust -// From src/git/authorization.rs -pub async fn authorize_push( - database: &SharedDatabase, - identifier: &str, - owner_pubkey: &str, - request_body: &Bytes, - purgatory: &Arc, // Critical! - repo_path: &std::path::Path, -) -> anyhow::Result { - // Parse pushed refs - let pushed_refs = parse_pushed_refs(request_body); - - // Check database for state events - let db_result = get_authorization_from_db(database, identifier).await?; - - if !db_result.authorized { - // No state in database - check purgatory - let purgatory_result = get_state_authorization_for_specific_owner_repo( - database, - identifier, - owner_pubkey, - purgatory, - &pushed_refs, - repo_path, - ).await?; - - return purgatory_result; - } - - db_result -} +// Fetch announcements from both DB and purgatory +let repo_data = fetch_repository_data_with_purgatory(db, purgatory, identifier).await?; + +// For each authorized owner with a purgatory announcement, extend expiry +purgatory.extend_announcement_expiry(&owner_pk, &identifier, Duration::from_secs(1800)); ``` -### 3. Post-Push Processing +### 3. Git Push Authorization (`src/git/authorization.rs`) -After successful push, events from purgatory are saved to database: +`fetch_repository_data_with_purgatory()` merges DB announcements with purgatory announcements for authorization. On successful authorization via purgatory state events, also extends announcement expiry. -```rust -// From src/git/handlers.rs -if from_purgatory { - if let (Some(db), Some(purg)) = (&database, &purgatory) { - // Save state event to database - db.save_event(&state.event).await?; - - // Remove from purgatory - purg.remove_state_event(identifier, &state.event.id); - } -} -``` +### 4. Git Data Processing (`src/git/sync.rs`) -### 4. Background Sync Loop +`process_purgatory_announcements()` is called after any git push or background sync fetch. It promotes announcements from purgatory to the database and notifies WebSocket clients. -Started during application initialization: +### 5. Sync Registration (`src/sync/`) -```rust -// From src/main.rs -let purgatory = Arc::new(Purgatory::new(git_data_path)); -let ctx = Arc::new(RealSyncContext::new( - database.clone(), - purgatory.clone(), - config.domain.clone(), - git_data_path.clone(), -)); -let throttle_manager = Arc::new(ThrottleManager::new(5, 30)); -throttle_manager.set_context(ctx.clone()); - -// Start sync loop -let sync_handle = purgatory.clone().start_sync_loop(ctx, throttle_manager); - -// Start cleanup task -let cleanup_handle = tokio::spawn(async move { - let mut interval = tokio::time::interval(Duration::from_secs(60)); - loop { - interval.tick().await; - let (state_removed, pr_removed) = purgatory.cleanup(); - if state_removed + pr_removed > 0 { - tracing::debug!( - "Purgatory cleanup removed {} state, {} PR entries", - state_removed, pr_removed - ); - } - } -}); -``` +A background timer (`run_purgatory_announcement_sync`, every 5 seconds) ensures purgatory announcements are registered in `RepoSyncIndex` with `SyncLevel::StateOnly`. When an announcement is promoted, the `SelfSubscriber` upgrades it to `SyncLevel::Full`. --- @@ -698,7 +807,7 @@ let cleanup_handle = tokio::spawn(async move { src/ ├── purgatory/ │ ├── mod.rs # Main Purgatory struct and API -│ ├── types.rs # RefPair, StatePurgatoryEntry, PrPurgatoryEntry +│ ├── types.rs # RefPair, AnnouncementPurgatoryEntry, StatePurgatoryEntry, PrPurgatoryEntry │ ├── helpers.rs # Ref extraction and matching functions │ └── sync/ │ ├── mod.rs # Sync module exports @@ -710,9 +819,10 @@ src/ ├── git/ │ ├── authorization.rs # authorize_push with purgatory checking │ ├── handlers.rs # handle_receive_pack with post-push processing -│ └── sync.rs # process_newly_available_git_data +│ └── sync.rs # process_newly_available_git_data, process_purgatory_announcements └── nostr/ └── policy/ + ├── announcement.rs # Route announcements to purgatory ├── state.rs # State event policy with purgatory └── pr_event.rs # PR event policy with purgatory ``` @@ -725,7 +835,7 @@ src/ Located in each module: -- **[`src/purgatory/mod.rs`](../../src/purgatory/mod.rs)** - Core purgatory operations +- **[`src/purgatory/mod.rs`](../../src/purgatory/mod.rs)** - Core purgatory operations including announcement purgatory - **[`src/purgatory/helpers.rs`](../../src/purgatory/helpers.rs)** - Ref matching logic - **[`src/purgatory/sync/functions.rs`](../../src/purgatory/sync/functions.rs)** - Sync functions with MockSyncContext - **[`src/purgatory/sync/throttle.rs`](../../src/purgatory/sync/throttle.rs)** - Throttle manager @@ -734,6 +844,9 @@ Located in each module: Located in [`tests/`](../../tests/): +- **Announcement purgatory flow** - Announcement enters purgatory, git data promotes it +- **Announcement soft expiry** - Bare repo deleted after 30 min, event retained 24h +- **Announcement revival** - State event revives soft-expired announcement - **State event purgatory flow** - Event arrives, git push releases it - **PR event purgatory flow** - Event arrives, git push releases it - **Git-data-first flow** - Git push creates placeholder, event completes it @@ -744,7 +857,19 @@ Located in [`tests/`](../../tests/): ## Key Learnings -### 1. Purgatory Authorization is Critical +### 1. Announcement Purgatory Prevents Misleading Empty Repos + +Without announcement purgatory, we'd serve announcements for repos with no content. Clients would see the announcement, try to clone, and get nothing. + +**Solution:** Announcements wait in purgatory until git data proves content exists. + +### 2. Soft Expiry Avoids Sync Loops + +The protocol's 30-minute expiry creates a problem: without soft expiry, we'd either permanently block repositories or constantly re-sync expired announcement events. + +**Solution:** Soft expiry retains the event for 24 hours after deleting the bare repo, allowing revival without re-fetching. + +### 3. Purgatory Authorization is Critical Without checking purgatory during authorization, we have a deadlock: - State event goes to purgatory (no git data) @@ -753,7 +878,7 @@ Without checking purgatory during authorization, we have a deadlock: **Solution:** `authorize_push()` checks both database and purgatory. -### 2. Late Binding for State Events +### 4. Late Binding for State Events Extracting refs at event arrival time doesn't work when: - Multiple state events arrive for same identifier @@ -761,7 +886,7 @@ Extracting refs at event arrival time doesn't work when: **Solution:** Extract and match refs at push time via `find_matching_states()`. -### 3. Bidirectional Waiting for PR Events +### 5. Bidirectional Waiting for PR Events PR events can arrive before or after git data: - Event first → Wait for git push @@ -769,26 +894,13 @@ PR events can arrive before or after git data: **Solution:** `PrPurgatoryEntry.event: Option` with `None` = placeholder. -### 4. Sync Queue Debouncing - -When events arrive in bursts (e.g., negentropy sync), we don't want to spawn a sync task for each event. - -**Solution:** `enqueue_sync()` resets `attempt_count` and updates `next_attempt` if already queued. - -### 5. Domain Throttling with Queues - -When a domain is throttled, we still want to eventually sync from it. - -**Solution:** `ThrottleManager` maintains per-domain queues and processes them when capacity frees. - --- ## Related Documentation -- [Inline Authorization](inline-authorization.md) - Why purgatory checking during authorization is essential - [Architecture Overview](architecture.md) - Full system design -- [Background Sync](../how-to/purgatory-sync.md) - How to configure and monitor sync -- [Test Strategy](../reference/test-strategy.md) - How we test purgatory +- [GRASP-02 Proactive Sync](grasp-02-proactive-sync.md) - Relay-to-relay event sync with SyncLevel +- [GRASP-02 Purgatory Git Data Fetching](grasp-02-proactive-sync-purgatory-git-data.md) - Background git data hunting --- -- cgit v1.2.3 From 26f608e5011b9d1ad6036da75b89272835e69695 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Mon, 23 Feb 2026 15:08:37 +0000 Subject: persist and restore announcement events across graceful restarts Extends purgatory persistence to include announcement purgatory entries. On graceful shutdown, non-soft-expired announcements are serialised to purgatory-state.json alongside state/PR/expired events; on startup they are restored, skipping any entry whose bare repo path no longer exists. Updates purgatory-design.md to reflect that purgatory persists through graceful shutdown and documents the new PurgatoryState disk format. Adds create_announcement_event helper to purgatory_helpers and three new integration tests in purgatory_persistence covering the full save/restore cycle, missing-repo skip, and the combined roundtrip with all entry types. --- docs/explanation/purgatory-design.md | 66 ++++++++- src/purgatory/mod.rs | 264 ++++++++++++++++++++++++++++++++++- tests/common/purgatory_helpers.rs | 38 +++++ tests/purgatory_persistence.rs | 135 +++++++++++++++++- 4 files changed, 493 insertions(+), 10 deletions(-) (limited to 'docs') diff --git a/docs/explanation/purgatory-design.md b/docs/explanation/purgatory-design.md index bd792d4..8e7d75c 100644 --- a/docs/explanation/purgatory-design.md +++ b/docs/explanation/purgatory-design.md @@ -39,14 +39,36 @@ This ensures we only serve announcements for repos that actually have content. ## Key Design Principles -### 1. In-Memory Only +### 1. Graceful-Shutdown Persistence -Purgatory data is **not persisted** to disk. On restart, all purgatory entries are lost. This is acceptable because: +Purgatory state is **saved to disk on graceful shutdown** and **restored on startup**. This preserves in-flight work across planned restarts (deployments, reboots). + +On `SIGINT` / Ctrl-C, `main.rs` calls `purgatory.save_to_disk()` before exiting. On startup, if the state file exists, `purgatory.restore_from_disk()` is called before the server begins accepting connections. + +**What is persisted:** + +| Store | Persisted? | Notes | +|-------|-----------|-------| +| `announcement_purgatory` | ✅ Yes | Non-soft-expired entries only (bare repo must exist) | +| `state_events` | ✅ Yes | All active entries | +| `pr_events` | ✅ Yes | Both events and placeholders | +| `expired_events` | ✅ Yes | Prevents re-sync loops after restart | +| `sync_queue` | ❌ No | Rebuilt automatically after restore | + +**What is NOT persisted (unclean shutdown):** + +On a crash or `SIGKILL`, the state file is not written. In that case: - Events are still on other relays (can be re-submitted) - Git data can be re-pushed - 30-minute expiry means data is transient anyway +**State file location:** `/purgatory-state.json` + +**Downtime accounting:** Expiry deadlines are stored as duration offsets from the save timestamp. On restore, elapsed downtime is subtracted from each deadline. Entries that expired during downtime are immediately swept by the next cleanup tick. + +**Soft-expired announcements are excluded:** Their bare repos have already been deleted, so they cannot be meaningfully restored. They will be re-fetched via background sync if needed. + ### 2. Separate Storage for Each Event Type | Store | Index | Purpose | @@ -233,6 +255,31 @@ pub struct Purgatory { } ``` +### Persistence State (Disk Format) + +`Instant` fields cannot be serialized directly. Each entry type has a corresponding `Serializable*` wrapper that stores time fields as `u64` second offsets from a `saved_at: SystemTime` reference point. On restore, elapsed downtime is subtracted to produce the correct remaining TTL. + +```rust +struct PurgatoryState { + version: u32, // currently 1 + saved_at: SystemTime, // reference for offset math + + /// Non-soft-expired announcements indexed by "owner_hex:identifier" + announcement_purgatory: HashMap, + + /// State events indexed by repository identifier + state_events: HashMap>, + + /// PR events (and placeholders) indexed by event ID hex + pr_events: HashMap, + + /// Expired event IDs → approximate expiry SystemTime + expired_events: HashMap, +} +``` + +The `announcement_purgatory` field uses `#[serde(default)]` so that state files written before announcement persistence was added (version 1 without the field) still deserialize correctly. + --- ## Announcement Purgatory Flows @@ -806,8 +853,9 @@ A background timer (`run_purgatory_announcement_sync`, every 5 seconds) ensures ``` src/ ├── purgatory/ -│ ├── mod.rs # Main Purgatory struct and API +│ ├── mod.rs # Main Purgatory struct, API, save_to_disk, restore_from_disk │ ├── types.rs # RefPair, AnnouncementPurgatoryEntry, StatePurgatoryEntry, PrPurgatoryEntry +│ ├── persistence.rs # instant_to_offset / offset_to_instant time conversion utilities │ ├── helpers.rs # Ref extraction and matching functions │ └── sync/ │ ├── mod.rs # Sync module exports @@ -835,7 +883,8 @@ src/ Located in each module: -- **[`src/purgatory/mod.rs`](../../src/purgatory/mod.rs)** - Core purgatory operations including announcement purgatory +- **[`src/purgatory/mod.rs`](../../src/purgatory/mod.rs)** - Core purgatory operations including announcement purgatory; persistence round-trip tests for all entry types (state, PR, announcement, expired events, downtime calculation, soft-expired exclusion, missing-repo skip) +- **[`src/purgatory/persistence.rs`](../../src/purgatory/persistence.rs)** - `instant_to_offset` / `offset_to_instant` round-trip tests - **[`src/purgatory/helpers.rs`](../../src/purgatory/helpers.rs)** - Ref matching logic - **[`src/purgatory/sync/functions.rs`](../../src/purgatory/sync/functions.rs)** - Sync functions with MockSyncContext - **[`src/purgatory/sync/throttle.rs`](../../src/purgatory/sync/throttle.rs)** - Throttle manager @@ -852,6 +901,7 @@ Located in [`tests/`](../../tests/): - **Git-data-first flow** - Git push creates placeholder, event completes it - **Authorization with purgatory** - Push authorized by purgatory state - **Background sync** - Sync fetches git data and releases events +- **Persistence across restart** - Save/restore cycle preserves all entry types including announcements --- @@ -894,6 +944,14 @@ PR events can arrive before or after git data: **Solution:** `PrPurgatoryEntry.event: Option` with `None` = placeholder. +### 6. Persistence Requires Instant → Duration Conversion + +`std::time::Instant` is not serializable and is not meaningful across process boundaries. Expiry deadlines must be converted to a portable form. + +**Solution:** Store each deadline as a `u64` second offset from a `saved_at: SystemTime` reference. On restore, subtract elapsed downtime from each offset to compute the new `Instant`. Entries whose deadline already passed during downtime get `expires_at = now` and are swept by the next cleanup tick. + +**Soft-expired announcements are excluded from persistence** because their bare repos have been deleted. Restoring them would leave purgatory entries pointing at non-existent repos. They are simply dropped; background sync will re-fetch the announcement event if needed. + --- ## Related Documentation diff --git a/src/purgatory/mod.rs b/src/purgatory/mod.rs index f5f8b31..9a63bf6 100644 --- a/src/purgatory/mod.rs +++ b/src/purgatory/mod.rs @@ -83,9 +83,35 @@ struct SerializablePrPurgatoryEntry { expires_at_offset_secs: u64, } +/// Serializable wrapper for `AnnouncementPurgatoryEntry` with time offsets. +/// +/// Stores `Instant` fields as `Duration` offsets from the `saved_at` timestamp +/// in `PurgatoryState`, allowing state to be persisted and restored across restarts. +/// +/// Note: soft-expired entries (bare repo deleted) are NOT persisted — they have +/// no git repo on disk and would be immediately cleaned up on restore anyway. +#[derive(Debug, Clone, Serialize, Deserialize)] +struct SerializableAnnouncementPurgatoryEntry { + /// The nostr announcement event (kind 30617) + event: Event, + /// The repository identifier from the event's 'd' tag + identifier: String, + /// The owner pubkey (event author) + owner: PublicKey, + /// Path to the bare git repository (must exist on disk) + repo_path: PathBuf, + /// Relay URLs from the announcement (for sync registration) + relays: HashSet, + /// Duration offset from saved_at for created_at + created_at_offset_secs: u64, + /// Duration offset from saved_at for expires_at + expires_at_offset_secs: u64, +} + /// Serializable purgatory state for disk persistence. /// /// Contains all purgatory data needed to restore state across restarts: +/// - Announcement events (indexed by (owner, identifier)) — non-soft-expired only /// - State events (indexed by identifier) /// - PR events (indexed by event ID) /// - Expired events (to prevent re-sync loops) @@ -97,6 +123,10 @@ struct PurgatoryState { version: u32, /// When this state was saved to disk saved_at: SystemTime, + /// Announcement events indexed by "owner_hex:identifier" + /// Only non-soft-expired entries are persisted (bare repo must exist). + #[serde(default)] + announcement_purgatory: HashMap, /// State events indexed by repository identifier state_events: HashMap>, /// PR events indexed by event ID (hex string) @@ -1114,6 +1144,34 @@ impl Purgatory { let saved_at = SystemTime::now(); let now_instant = Instant::now(); + // Convert announcement_purgatory to serializable format. + // Skip soft-expired entries: their bare repos have been deleted, so they + // cannot be meaningfully restored (the repo path no longer exists on disk). + let mut announcement_purgatory = HashMap::new(); + for entry in self.announcement_purgatory.iter() { + let e = entry.value(); + if e.soft_expired { + continue; + } + let created_offset = + persistence::instant_to_offset(e.created_at, saved_at, now_instant); + let expires_offset = + persistence::instant_to_offset(e.expires_at, saved_at, now_instant); + let key = format!("{}:{}", e.owner.to_hex(), e.identifier); + announcement_purgatory.insert( + key, + SerializableAnnouncementPurgatoryEntry { + event: e.event.clone(), + identifier: e.identifier.clone(), + owner: e.owner, + repo_path: e.repo_path.clone(), + relays: e.relays.clone(), + created_at_offset_secs: created_offset.as_secs(), + expires_at_offset_secs: expires_offset.as_secs(), + }, + ); + } + // Convert state_events to serializable format let mut state_events = HashMap::new(); for entry in self.state_events.iter() { @@ -1176,6 +1234,7 @@ impl Purgatory { let state = PurgatoryState { version: 1, saved_at, + announcement_purgatory, state_events, pr_events, expired_events, @@ -1187,6 +1246,7 @@ impl Purgatory { tracing::info!( path = %path.display(), + announcements = state.announcement_purgatory.len(), state_events = state.state_events.len(), pr_events = state.pr_events.len(), expired_events = state.expired_events.len(), @@ -1234,6 +1294,45 @@ impl Purgatory { let now_instant = Instant::now(); + // Restore announcement_purgatory. + // Skip entries whose bare repo no longer exists on disk — this can happen + // if the repo was deleted externally between save and restore. + for (_key, e) in state.announcement_purgatory { + if !e.repo_path.exists() { + tracing::warn!( + owner = %e.owner, + identifier = %e.identifier, + repo_path = %e.repo_path.display(), + "Skipping announcement restore: bare repo no longer exists" + ); + continue; + } + let created_at = persistence::offset_to_instant( + Duration::from_secs(e.created_at_offset_secs), + state.saved_at, + now_instant, + ); + let expires_at = persistence::offset_to_instant( + Duration::from_secs(e.expires_at_offset_secs), + state.saved_at, + now_instant, + ); + let key = (e.owner, e.identifier.clone()); + self.announcement_purgatory.insert( + key, + AnnouncementPurgatoryEntry { + event: e.event, + identifier: e.identifier, + owner: e.owner, + repo_path: e.repo_path, + relays: e.relays, + created_at, + expires_at, + soft_expired: false, + }, + ); + } + // Restore state_events for (identifier, entries) in state.state_events { let restored_entries: Vec = entries @@ -1301,6 +1400,7 @@ impl Purgatory { tracing::info!( path = %path.display(), + announcements = self.announcement_purgatory.len(), state_events = self.state_events.len(), pr_events = self.pr_events.len(), expired_events = self.expired_events.len(), @@ -2425,6 +2525,141 @@ async fn test_file_cleanup_after_successful_restore() { assert!(!state_file.exists()); } +#[tokio::test] +async fn test_save_and_restore_announcement_events() { + use tempfile::tempdir; + + let temp_dir = tempdir().unwrap(); + let state_file = temp_dir.path().join("purgatory_state.json"); + + // Create a real bare repo directory so the restore path-existence check passes + let repo_dir = temp_dir.path().join("owner.git"); + std::fs::create_dir_all(&repo_dir).unwrap(); + + let purgatory = Purgatory::new(PathBuf::new()); + let keys = Keys::generate(); + + let ann_event = EventBuilder::text_note("announcement event") + .sign_with_keys(&keys) + .unwrap(); + let ann_event_id = ann_event.id; + + let mut relays = HashSet::new(); + relays.insert("wss://relay.example.com".to_string()); + + purgatory.add_announcement( + ann_event.clone(), + "my-repo".to_string(), + keys.public_key(), + repo_dir.clone(), + relays.clone(), + ); + + // Save to disk + purgatory.save_to_disk(&state_file).unwrap(); + assert!(state_file.exists()); + + // Create new purgatory and restore + let purgatory2 = Purgatory::new(PathBuf::new()); + purgatory2.restore_from_disk(&state_file).unwrap(); + + // File should be deleted after restore + assert!(!state_file.exists()); + + // Verify announcement was restored + let (ann_count, _, _) = purgatory2.count(); + assert_eq!(ann_count, 1); + + let restored = purgatory2 + .find_announcement(&keys.public_key(), "my-repo") + .unwrap(); + assert_eq!(restored.event.id, ann_event_id); + assert_eq!(restored.identifier, "my-repo"); + assert_eq!(restored.owner, keys.public_key()); + assert_eq!(restored.repo_path, repo_dir); + assert_eq!(restored.relays, relays); + assert!(!restored.soft_expired); +} + +#[tokio::test] +async fn test_soft_expired_announcements_not_persisted() { + use tempfile::tempdir; + + let temp_dir = tempdir().unwrap(); + let state_file = temp_dir.path().join("purgatory_state.json"); + + let repo_dir = temp_dir.path().join("owner.git"); + std::fs::create_dir_all(&repo_dir).unwrap(); + + let purgatory = Purgatory::new(PathBuf::new()); + let keys = Keys::generate(); + + let ann_event = EventBuilder::text_note("announcement event") + .sign_with_keys(&keys) + .unwrap(); + + purgatory.add_announcement( + ann_event.clone(), + "my-repo".to_string(), + keys.public_key(), + repo_dir.clone(), + HashSet::new(), + ); + + // Manually mark as soft-expired (bare repo deleted) + let key = (keys.public_key(), "my-repo".to_string()); + if let Some(mut entry) = purgatory.announcement_purgatory.get_mut(&key) { + entry.soft_expired = true; + } + + // Save to disk — soft-expired entry should be excluded + purgatory.save_to_disk(&state_file).unwrap(); + + // Create new purgatory and restore + let purgatory2 = Purgatory::new(PathBuf::new()); + purgatory2.restore_from_disk(&state_file).unwrap(); + + // Soft-expired announcement should NOT be restored + let (ann_count, _, _) = purgatory2.count(); + assert_eq!(ann_count, 0); +} + +#[tokio::test] +async fn test_announcement_with_missing_repo_skipped_on_restore() { + use tempfile::tempdir; + + let temp_dir = tempdir().unwrap(); + let state_file = temp_dir.path().join("purgatory_state.json"); + + // Point to a repo path that does NOT exist + let missing_repo = temp_dir.path().join("nonexistent.git"); + + let purgatory = Purgatory::new(PathBuf::new()); + let keys = Keys::generate(); + + let ann_event = EventBuilder::text_note("announcement event") + .sign_with_keys(&keys) + .unwrap(); + + purgatory.add_announcement( + ann_event.clone(), + "my-repo".to_string(), + keys.public_key(), + missing_repo.clone(), + HashSet::new(), + ); + + // Save to disk (repo path is serialized even though it doesn't exist) + purgatory.save_to_disk(&state_file).unwrap(); + + // Create new purgatory and restore — entry should be skipped + let purgatory2 = Purgatory::new(PathBuf::new()); + purgatory2.restore_from_disk(&state_file).unwrap(); + + let (ann_count, _, _) = purgatory2.count(); + assert_eq!(ann_count, 0); +} + #[tokio::test] async fn test_comprehensive_roundtrip() { use nostr_sdk::{Kind, Tag, TagKind}; @@ -2433,10 +2668,27 @@ async fn test_comprehensive_roundtrip() { let temp_dir = tempdir().unwrap(); let state_file = temp_dir.path().join("purgatory_state.json"); + // Create a real bare repo directory for the announcement + let repo_dir = temp_dir.path().join("owner.git"); + std::fs::create_dir_all(&repo_dir).unwrap(); + let purgatory = Purgatory::new(PathBuf::new()); let keys1 = Keys::generate(); let keys2 = Keys::generate(); + // Add announcement + let ann_event = EventBuilder::text_note("announcement") + .sign_with_keys(&keys1) + .unwrap(); + let ann_event_id = ann_event.id; + purgatory.add_announcement( + ann_event, + "repo1".to_string(), + keys1.public_key(), + repo_dir.clone(), + HashSet::new(), + ); + // Add multiple state events let state1 = EventBuilder::text_note("state 1") .sign_with_keys(&keys1) @@ -2476,7 +2728,8 @@ async fn test_comprehensive_roundtrip() { purgatory.cleanup(); // Verify initial state - let (_, state_count, pr_count) = purgatory.count(); + let (ann_count, state_count, pr_count) = purgatory.count(); + assert_eq!(ann_count, 1); // announcement assert_eq!(state_count, 2); // state1, state2 (expired_event was cleaned up) assert_eq!(pr_count, 2); // pr-1, pr-2 assert_eq!(purgatory.expired_count(), 1); // expired_event @@ -2489,11 +2742,18 @@ async fn test_comprehensive_roundtrip() { purgatory2.restore_from_disk(&state_file).unwrap(); // Verify all data was restored correctly - let (_, state_count2, pr_count2) = purgatory2.count(); + let (ann_count2, state_count2, pr_count2) = purgatory2.count(); + assert_eq!(ann_count2, 1); assert_eq!(state_count2, 2); assert_eq!(pr_count2, 2); assert_eq!(purgatory2.expired_count(), 1); + // Verify announcement + let restored_ann = purgatory2 + .find_announcement(&keys1.public_key(), "repo1") + .unwrap(); + assert_eq!(restored_ann.event.id, ann_event_id); + // Verify state events assert_eq!(purgatory2.find_state("repo1").len(), 1); assert_eq!(purgatory2.find_state("repo2").len(), 1); diff --git a/tests/common/purgatory_helpers.rs b/tests/common/purgatory_helpers.rs index 1d06f22..cfcea1c 100644 --- a/tests/common/purgatory_helpers.rs +++ b/tests/common/purgatory_helpers.rs @@ -338,6 +338,44 @@ pub fn build_repo_coord(keys: &Keys, identifier: &str) -> String { format!("30617:{}:{}", keys.public_key().to_hex(), identifier) } +/// Create a repository announcement event (kind 30617) for purgatory tests. +/// +/// Creates a minimal but valid NIP-34 repository announcement with a `d` tag, +/// optional `clone` URLs, and optional `relays` URLs. +/// +/// # Arguments +/// * `keys` - Keys for signing +/// * `identifier` - Repository identifier (d-tag) +/// * `clone_urls` - Clone URLs to include (may be empty) +/// * `relay_urls` - Relay URLs to include (may be empty) +/// +/// # Returns +/// * `Ok(Event)` - Signed announcement event +/// * `Err(String)` - If signing fails +pub fn create_announcement_event( + keys: &Keys, + identifier: &str, + clone_urls: &[&str], + relay_urls: &[&str], +) -> Result { + let mut tags = vec![Tag::identifier(identifier)]; + + if !clone_urls.is_empty() { + let urls: Vec = clone_urls.iter().map(|s| s.to_string()).collect(); + tags.push(Tag::custom(TagKind::custom("clone"), urls)); + } + + if !relay_urls.is_empty() { + let urls: Vec = relay_urls.iter().map(|s| s.to_string()).collect(); + tags.push(Tag::custom(TagKind::custom("relays"), urls)); + } + + EventBuilder::new(Kind::GitRepoAnnouncement, "") + .tags(tags) + .sign_with_keys(keys) + .map_err(|e| format!("Failed to sign announcement event: {}", e)) +} + /// Wait for an event to be served by a relay (not in purgatory). /// /// Polls the relay until the event is queryable, indicating it has diff --git a/tests/purgatory_persistence.rs b/tests/purgatory_persistence.rs index 5abbf15..05cb44b 100644 --- a/tests/purgatory_persistence.rs +++ b/tests/purgatory_persistence.rs @@ -31,9 +31,11 @@ mod common; +use common::purgatory_helpers::create_announcement_event; use ngit_grasp::purgatory::Purgatory; use ngit_grasp::sync::rejected_index::{EventType, RejectedEventsIndex, RejectionReason}; use nostr_sdk::prelude::*; +use std::collections::HashSet; use std::time::Duration; /// Helper to create a test event @@ -116,12 +118,31 @@ async fn test_full_purgatory_save_restore_cycle() { // Add a PR placeholder (git-data-first scenario) purgatory.add_pr_placeholder("placeholder-id".to_string(), "commit-xyz".to_string()); - // Note: We can't directly test expired events without accessing private fields, - // so we'll focus on testing state and PR events persistence + // Add an announcement to purgatory (requires a real directory for the repo path) + let repo_dir = temp_dir.path().join("repo.git"); + std::fs::create_dir_all(&repo_dir).unwrap(); + let ann_keys = Keys::generate(); + let ann_event = create_announcement_event( + &ann_keys, + "my-repo", + &["http://example.com/my-repo.git"], + &["wss://relay.example.com"], + ) + .unwrap(); + let ann_event_id = ann_event.id; + let mut ann_relays = HashSet::new(); + ann_relays.insert("wss://relay.example.com".to_string()); + purgatory.add_announcement( + ann_event, + "my-repo".to_string(), + ann_keys.public_key(), + repo_dir.clone(), + ann_relays, + ); // Verify initial counts let (announcement_count, state_count, pr_count) = purgatory.count(); - assert_eq!(announcement_count, 0, "Should have 0 announcements"); + assert_eq!(announcement_count, 1, "Should have 1 announcement"); assert_eq!(state_count, 2, "Should have 2 state events"); assert_eq!( pr_count, 3, @@ -144,13 +165,22 @@ async fn test_full_purgatory_save_restore_cycle() { // Verify all data was restored let (announcement_count2, state_count2, pr_count2) = purgatory2.count(); - assert_eq!(announcement_count2, 0, "Should have 0 announcements after restore"); + assert_eq!(announcement_count2, 1, "Should have 1 announcement after restore"); assert_eq!(state_count2, 2, "Should have 2 state events after restore"); assert_eq!( pr_count2, 3, "Should have 3 PR events after restore (2 events + 1 placeholder)" ); + // Verify announcement was restored correctly + let restored_ann = purgatory2 + .find_announcement(&ann_keys.public_key(), "my-repo") + .expect("Announcement should be restored"); + assert_eq!(restored_ann.event.id, ann_event_id); + assert_eq!(restored_ann.identifier, "my-repo"); + assert_eq!(restored_ann.repo_path, repo_dir); + assert!(!restored_ann.soft_expired); + // Verify specific state events let repo1_states = purgatory2.find_state("repo1"); assert_eq!(repo1_states.len(), 1); @@ -748,3 +778,100 @@ async fn test_rejected_cache_entries_expired_during_downtime() { assert_eq!(index2.hot_cache_len(), 0); assert_eq!(index2.cold_index_len(), 1); } + +/// Test 18: Announcement events are saved and restored across restarts +#[tokio::test] +async fn test_announcement_save_restore_cycle() { + let temp_dir = tempfile::tempdir().unwrap(); + let git_data_path = temp_dir.path().join("git"); + let state_path = temp_dir.path().join("purgatory.json"); + + // Create a real bare repo directory (restore skips entries whose path is missing) + let repo_dir = temp_dir.path().join("owner.git"); + std::fs::create_dir_all(&repo_dir).unwrap(); + + let purgatory = Purgatory::new(&git_data_path); + let keys = Keys::generate(); + + let ann_event = create_announcement_event( + &keys, + "my-repo", + &["http://example.com/my-repo.git"], + &["wss://relay.example.com"], + ) + .unwrap(); + let ann_event_id = ann_event.id; + + let mut relays = HashSet::new(); + relays.insert("wss://relay.example.com".to_string()); + + purgatory.add_announcement( + ann_event, + "my-repo".to_string(), + keys.public_key(), + repo_dir.clone(), + relays.clone(), + ); + + let (ann_count, _, _) = purgatory.count(); + assert_eq!(ann_count, 1); + + // Save to disk + purgatory.save_to_disk(&state_path).unwrap(); + assert!(state_path.exists()); + + // Restore into a fresh purgatory + let purgatory2 = Purgatory::new(&git_data_path); + purgatory2.restore_from_disk(&state_path).unwrap(); + + assert!(!state_path.exists(), "State file should be deleted after restore"); + + let (ann_count2, _, _) = purgatory2.count(); + assert_eq!(ann_count2, 1, "Announcement should be restored"); + + let restored = purgatory2 + .find_announcement(&keys.public_key(), "my-repo") + .expect("Announcement should be findable after restore"); + + assert_eq!(restored.event.id, ann_event_id); + assert_eq!(restored.identifier, "my-repo"); + assert_eq!(restored.owner, keys.public_key()); + assert_eq!(restored.repo_path, repo_dir); + assert_eq!(restored.relays, relays); + assert!(!restored.soft_expired); +} + +/// Test 19: Announcement with missing repo path is skipped on restore +#[tokio::test] +async fn test_announcement_missing_repo_skipped_on_restore() { + let temp_dir = tempfile::tempdir().unwrap(); + let git_data_path = temp_dir.path().join("git"); + let state_path = temp_dir.path().join("purgatory.json"); + + // Point to a path that does NOT exist on disk + let missing_repo = temp_dir.path().join("nonexistent.git"); + + let purgatory = Purgatory::new(&git_data_path); + let keys = Keys::generate(); + + let ann_event = create_announcement_event(&keys, "my-repo", &[], &[]).unwrap(); + + purgatory.add_announcement( + ann_event, + "my-repo".to_string(), + keys.public_key(), + missing_repo, + HashSet::new(), + ); + + purgatory.save_to_disk(&state_path).unwrap(); + + let purgatory2 = Purgatory::new(&git_data_path); + purgatory2.restore_from_disk(&state_path).unwrap(); + + let (ann_count, _, _) = purgatory2.count(); + assert_eq!( + ann_count, 0, + "Announcement with missing repo path must be skipped" + ); +} -- cgit v1.2.3