diff options
Diffstat (limited to 'docs/explanation')
| -rw-r--r-- | docs/explanation/state-structure-redesign-proposal.md | 373 |
1 files changed, 0 insertions, 373 deletions
diff --git a/docs/explanation/state-structure-redesign-proposal.md b/docs/explanation/state-structure-redesign-proposal.md deleted file mode 100644 index 0a27cf4..0000000 --- a/docs/explanation/state-structure-redesign-proposal.md +++ /dev/null | |||
| @@ -1,373 +0,0 @@ | |||
| 1 | # State Structure Redesign Proposal v2 | ||
| 2 | |||
| 3 | ## The Core Problem | ||
| 4 | |||
| 5 | We need to transform: | ||
| 6 | - **Repo Announcements** (30617) that list relays | ||
| 7 | - **Root Events** (1617/1618/1619/1621) that tag repos | ||
| 8 | |||
| 9 | Into: | ||
| 10 | - **Per-relay subscriptions**: which repos and root events to sync from each relay | ||
| 11 | |||
| 12 | And generate **RelayActions** when this mapping changes. | ||
| 13 | |||
| 14 | --- | ||
| 15 | |||
| 16 | ## Proposed Data Model | ||
| 17 | |||
| 18 | ### 1. RepoIndex (Primary Source of Truth) | ||
| 19 | |||
| 20 | ```rust | ||
| 21 | /// Everything we know about repos we're tracking | ||
| 22 | /// Key: repo addressable ref ("30617:pubkey:identifier") | ||
| 23 | pub type RepoIndex = Arc<RwLock<HashMap<String, RepoInfo>>>; | ||
| 24 | |||
| 25 | #[derive(Debug, Clone, Default)] | ||
| 26 | pub struct RepoInfo { | ||
| 27 | /// Relay URLs listed in the repo's announcement | ||
| 28 | pub relays: HashSet<String>, | ||
| 29 | /// Root event IDs that reference this repo | ||
| 30 | pub root_events: HashSet<EventId>, | ||
| 31 | } | ||
| 32 | ``` | ||
| 33 | |||
| 34 | **Updated by:** Database init, batch processing of new announcements/root events | ||
| 35 | |||
| 36 | ### 2. RelayIndex (Applied State) | ||
| 37 | |||
| 38 | ```rust | ||
| 39 | /// What we've told each relay to sync | ||
| 40 | /// Key: relay URL | ||
| 41 | pub type RelayIndex = Arc<RwLock<HashMap<String, SyncTarget>>>; | ||
| 42 | |||
| 43 | #[derive(Debug, Clone, Default, PartialEq)] | ||
| 44 | pub struct SyncTarget { | ||
| 45 | /// Repos we're syncing for this relay | ||
| 46 | pub repos: HashSet<String>, | ||
| 47 | /// Root events we're tracking | ||
| 48 | pub root_events: HashSet<EventId>, | ||
| 49 | } | ||
| 50 | ``` | ||
| 51 | |||
| 52 | **Updated by:** SyncManager after RelayActions are applied | ||
| 53 | |||
| 54 | --- | ||
| 55 | |||
| 56 | ## The Transformation | ||
| 57 | |||
| 58 | ```mermaid | ||
| 59 | flowchart LR | ||
| 60 | subgraph Input | ||
| 61 | RA[Repo Announcements] | ||
| 62 | RE[Root Events] | ||
| 63 | end | ||
| 64 | |||
| 65 | subgraph RepoIndex | ||
| 66 | R1[repo_a: relays=X,Y events=1,2] | ||
| 67 | R2[repo_b: relays=Y,Z events=3] | ||
| 68 | end | ||
| 69 | |||
| 70 | subgraph Derived Target | ||
| 71 | T1[relay_X: repos=a events=1,2] | ||
| 72 | T2[relay_Y: repos=a,b events=1,2,3] | ||
| 73 | T3[relay_Z: repos=b events=3] | ||
| 74 | end | ||
| 75 | |||
| 76 | subgraph RelayIndex Applied | ||
| 77 | A1[relay_X: repos=a events=1,2] | ||
| 78 | A2[relay_Y: repos=a events=1,2] | ||
| 79 | end | ||
| 80 | |||
| 81 | RA --> R1 | ||
| 82 | RA --> R2 | ||
| 83 | RE --> R1 | ||
| 84 | RE --> R2 | ||
| 85 | |||
| 86 | R1 --> T1 | ||
| 87 | R1 --> T2 | ||
| 88 | R2 --> T2 | ||
| 89 | R2 --> T3 | ||
| 90 | ``` | ||
| 91 | |||
| 92 | The **diff** between Derived Target and RelayIndex produces RelayActions: | ||
| 93 | - relay_Y needs AddFilters for repo_b and event 3 | ||
| 94 | - relay_Z needs SpawnRelay | ||
| 95 | |||
| 96 | --- | ||
| 97 | |||
| 98 | ## Algorithm: derive_target_from_repo_index | ||
| 99 | |||
| 100 | ```rust | ||
| 101 | /// Derive what we SHOULD be syncing from the repo data | ||
| 102 | fn derive_relay_targets(repo_index: &HashMap<String, RepoInfo>) -> HashMap<String, SyncTarget> { | ||
| 103 | let mut targets: HashMap<String, SyncTarget> = HashMap::new(); | ||
| 104 | |||
| 105 | for (repo_ref, info) in repo_index { | ||
| 106 | // For each relay that lists this repo | ||
| 107 | for relay_url in &info.relays { | ||
| 108 | let target = targets.entry(relay_url.clone()).or_default(); | ||
| 109 | target.repos.insert(repo_ref.clone()); | ||
| 110 | target.root_events.extend(info.root_events.iter().cloned()); | ||
| 111 | } | ||
| 112 | } | ||
| 113 | |||
| 114 | targets | ||
| 115 | } | ||
| 116 | ``` | ||
| 117 | |||
| 118 | --- | ||
| 119 | |||
| 120 | ## Algorithm: process_batch | ||
| 121 | |||
| 122 | ```rust | ||
| 123 | async fn process_batch(&self, pending: &mut PendingUpdates) { | ||
| 124 | // ============================================ | ||
| 125 | // STEP 1: Update RepoIndex from batch | ||
| 126 | // ============================================ | ||
| 127 | |||
| 128 | let mut repo_index = self.repo_index.write().await; | ||
| 129 | |||
| 130 | // 1a. Process root events - add to repo's root_events set | ||
| 131 | for event in pending.root_events.drain(..) { | ||
| 132 | for repo_ref in extract_repo_refs(&event) { | ||
| 133 | repo_index.entry(repo_ref) | ||
| 134 | .or_default() | ||
| 135 | .root_events | ||
| 136 | .insert(event.id); | ||
| 137 | } | ||
| 138 | } | ||
| 139 | |||
| 140 | // 1b. Process announcements - update repo's relay set | ||
| 141 | for event in pending.announcements.drain(..) { | ||
| 142 | if !lists_our_service(&event) { | ||
| 143 | continue; | ||
| 144 | } | ||
| 145 | let repo_ref = build_repo_ref(&event); | ||
| 146 | let relay_urls: HashSet<String> = extract_relay_urls(&event) | ||
| 147 | .into_iter() | ||
| 148 | .filter(|url| !is_own_relay(url)) | ||
| 149 | .collect(); | ||
| 150 | |||
| 151 | // Replace relay set (handles updates that change relays) | ||
| 152 | repo_index.entry(repo_ref) | ||
| 153 | .or_default() | ||
| 154 | .relays = relay_urls; | ||
| 155 | } | ||
| 156 | |||
| 157 | // ============================================ | ||
| 158 | // STEP 2: Derive target state from RepoIndex | ||
| 159 | // ============================================ | ||
| 160 | |||
| 161 | let target = derive_relay_targets(&repo_index); | ||
| 162 | drop(repo_index); // Release write lock | ||
| 163 | |||
| 164 | // ============================================ | ||
| 165 | // STEP 3: Diff target vs applied (RelayIndex) | ||
| 166 | // ============================================ | ||
| 167 | |||
| 168 | let applied = self.relay_index.read().await; | ||
| 169 | let actions = compute_relay_actions(&target, &applied); | ||
| 170 | drop(applied); // Release read lock | ||
| 171 | |||
| 172 | // ============================================ | ||
| 173 | // STEP 4: Send actions & update RelayIndex | ||
| 174 | // ============================================ | ||
| 175 | |||
| 176 | for action in actions { | ||
| 177 | match &action { | ||
| 178 | RelayAction::SpawnRelay { relay_url, repos_and_root_events } => { | ||
| 179 | // Update RelayIndex with new relay | ||
| 180 | let mut applied = self.relay_index.write().await; | ||
| 181 | applied.insert(relay_url.clone(), SyncTarget { | ||
| 182 | repos: repos_and_root_events.keys().cloned().collect(), | ||
| 183 | root_events: repos_and_root_events.values() | ||
| 184 | .flat_map(|e| e.iter().cloned()) | ||
| 185 | .collect(), | ||
| 186 | }); | ||
| 187 | } | ||
| 188 | RelayAction::AddFilters { relay_url, repos_and_new_root_event } => { | ||
| 189 | // Update RelayIndex with additions | ||
| 190 | let mut applied = self.relay_index.write().await; | ||
| 191 | if let Some(target) = applied.get_mut(relay_url) { | ||
| 192 | for (repo, events) in repos_and_new_root_event { | ||
| 193 | target.repos.insert(repo.clone()); | ||
| 194 | target.root_events.extend(events.iter().cloned()); | ||
| 195 | } | ||
| 196 | } | ||
| 197 | } | ||
| 198 | } | ||
| 199 | |||
| 200 | // Send action to SyncManager | ||
| 201 | let _ = self.action_tx.send(action).await; | ||
| 202 | } | ||
| 203 | } | ||
| 204 | ``` | ||
| 205 | |||
| 206 | --- | ||
| 207 | |||
| 208 | ## Algorithm: compute_relay_actions | ||
| 209 | |||
| 210 | ```rust | ||
| 211 | fn compute_relay_actions( | ||
| 212 | target: &HashMap<String, SyncTarget>, | ||
| 213 | applied: &HashMap<String, SyncTarget>, | ||
| 214 | ) -> Vec<RelayAction> { | ||
| 215 | let mut actions = Vec::new(); | ||
| 216 | |||
| 217 | for (relay_url, target_state) in target { | ||
| 218 | match applied.get(relay_url) { | ||
| 219 | None => { | ||
| 220 | // New relay - spawn it | ||
| 221 | let mut repos_and_events = HashMap::new(); | ||
| 222 | for repo in &target_state.repos { | ||
| 223 | // Get events for this specific repo | ||
| 224 | let events = target_state.root_events.clone(); // simplified | ||
| 225 | repos_and_events.insert(repo.clone(), events); | ||
| 226 | } | ||
| 227 | actions.push(RelayAction::SpawnRelay { | ||
| 228 | relay_url: relay_url.clone(), | ||
| 229 | repos_and_root_events: repos_and_events, | ||
| 230 | }); | ||
| 231 | } | ||
| 232 | Some(applied_state) => { | ||
| 233 | // Existing relay - check for new repos/events | ||
| 234 | let new_repos: HashSet<_> = target_state.repos | ||
| 235 | .difference(&applied_state.repos) | ||
| 236 | .cloned() | ||
| 237 | .collect(); | ||
| 238 | let new_events: HashSet<_> = target_state.root_events | ||
| 239 | .difference(&applied_state.root_events) | ||
| 240 | .cloned() | ||
| 241 | .collect(); | ||
| 242 | |||
| 243 | if !new_repos.is_empty() || !new_events.is_empty() { | ||
| 244 | let mut repos_and_events = HashMap::new(); | ||
| 245 | for repo in &new_repos { | ||
| 246 | repos_and_events.insert(repo.clone(), new_events.clone()); | ||
| 247 | } | ||
| 248 | // Also handle new events for existing repos | ||
| 249 | if !new_events.is_empty() && new_repos.is_empty() { | ||
| 250 | for repo in &applied_state.repos { | ||
| 251 | repos_and_events.insert(repo.clone(), new_events.clone()); | ||
| 252 | } | ||
| 253 | } | ||
| 254 | |||
| 255 | actions.push(RelayAction::AddFilters { | ||
| 256 | relay_url: relay_url.clone(), | ||
| 257 | repos_and_new_root_event: repos_and_events, | ||
| 258 | }); | ||
| 259 | } | ||
| 260 | } | ||
| 261 | } | ||
| 262 | } | ||
| 263 | |||
| 264 | // Future: detect relay removal (in applied but not in target) | ||
| 265 | |||
| 266 | actions | ||
| 267 | } | ||
| 268 | ``` | ||
| 269 | |||
| 270 | --- | ||
| 271 | |||
| 272 | ## Handling Announcement Updates | ||
| 273 | |||
| 274 | When an announcement is **updated** and changes its relay list: | ||
| 275 | |||
| 276 | ```mermaid | ||
| 277 | flowchart TD | ||
| 278 | A[repo_a announcement updated] --> B[Old: relays X,Y] | ||
| 279 | B --> C[New: relays Y,Z] | ||
| 280 | C --> D[RepoIndex updated: repo_a.relays = Y,Z] | ||
| 281 | D --> E[derive_relay_targets] | ||
| 282 | E --> F[Target: X=empty, Y=repo_a, Z=repo_a] | ||
| 283 | F --> G[Diff with Applied: X=repo_a, Y=repo_a] | ||
| 284 | G --> H1[X: repo_a removed - future RemoveFilters] | ||
| 285 | G --> H2[Z: new relay - SpawnRelay] | ||
| 286 | ``` | ||
| 287 | |||
| 288 | The current RelayAction types only support growth (SpawnRelay, AddFilters). Removal would need a new `RemoveFilters` action type - this is a future enhancement. | ||
| 289 | |||
| 290 | --- | ||
| 291 | |||
| 292 | ## Name Mappings | ||
| 293 | |||
| 294 | | Current | Proposed | Semantics | | ||
| 295 | |---------|----------|-----------| | ||
| 296 | | `FollowingRepoRootEvents` | `RepoIndex` | Per-repo: relays + root events | | ||
| 297 | | `SyncRelays` | `RelayIndex` | Per-relay: what we're syncing (applied state) | | ||
| 298 | | - | `SyncTarget` | Struct for repos + events | | ||
| 299 | | - | `RepoInfo` | Struct for relay set + event set | | ||
| 300 | |||
| 301 | --- | ||
| 302 | |||
| 303 | ## Data Flow Summary | ||
| 304 | |||
| 305 | ```mermaid | ||
| 306 | flowchart TB | ||
| 307 | subgraph Batch Input | ||
| 308 | RA[30617 Announcements] | ||
| 309 | RE[Root Events 1617-1621] | ||
| 310 | end | ||
| 311 | |||
| 312 | subgraph Step 1: Update Source | ||
| 313 | RI[RepoIndex] | ||
| 314 | end | ||
| 315 | |||
| 316 | subgraph Step 2: Derive Target | ||
| 317 | DT[derive_relay_targets] | ||
| 318 | TGT[Target HashMap] | ||
| 319 | end | ||
| 320 | |||
| 321 | subgraph Step 3: Diff | ||
| 322 | RLI[RelayIndex - Applied] | ||
| 323 | DIFF[compute_relay_actions] | ||
| 324 | end | ||
| 325 | |||
| 326 | subgraph Step 4: Apply | ||
| 327 | ACT[RelayActions] | ||
| 328 | SM[SyncManager] | ||
| 329 | end | ||
| 330 | |||
| 331 | RA --> RI | ||
| 332 | RE --> RI | ||
| 333 | RI --> DT | ||
| 334 | DT --> TGT | ||
| 335 | TGT --> DIFF | ||
| 336 | RLI --> DIFF | ||
| 337 | DIFF --> ACT | ||
| 338 | ACT --> SM | ||
| 339 | ACT --> |update| RLI | ||
| 340 | ``` | ||
| 341 | |||
| 342 | --- | ||
| 343 | |||
| 344 | ## Files to Modify | ||
| 345 | |||
| 346 | | File | Changes | | ||
| 347 | |------|---------| | ||
| 348 | | [`src/sync/mod.rs`](src/sync/mod.rs) | Replace type aliases with RepoIndex/RelayIndex + structs | | ||
| 349 | | [`src/sync/self_subscriber.rs`](src/sync/self_subscriber.rs) | Rewrite process_batch with new algorithm | | ||
| 350 | |||
| 351 | --- | ||
| 352 | |||
| 353 | ## Questions for Approval | ||
| 354 | |||
| 355 | 1. **Naming**: Are `RepoIndex`/`RelayIndex` and `RepoInfo`/`SyncTarget` clear enough? | ||
| 356 | |||
| 357 | 2. **When to update RelayIndex**: Should we: | ||
| 358 | - (a) Update immediately when generating action (optimistic) ← proposed above | ||
| 359 | - (b) Update only after SyncManager confirms action succeeded | ||
| 360 | |||
| 361 | 3. **Bootstrap relay**: Keep special-casing it in RelayIndex (always present)? | ||
| 362 | |||
| 363 | 4. **Future work**: Add `RemoveFilters` action for relay removal, or defer? | ||
| 364 | |||
| 365 | --- | ||
| 366 | |||
| 367 | ## Benefits | ||
| 368 | |||
| 369 | 1. **Logical flow**: Source → Derived → Diff → Actions | ||
| 370 | 2. **Single source of truth**: RepoIndex is the authoritative data | ||
| 371 | 3. **Clear transformation**: `derive_relay_targets()` is a pure function | ||
| 372 | 4. **Handles updates**: Replacing `repo.relays` naturally handles announcement changes | ||
| 373 | 5. **Testable**: Each step can be unit tested independently \ No newline at end of file | ||