diff options
| author | DanConwayDev <DanConwayDev@protonmail.com> | 2025-11-03 17:02:31 +0000 |
|---|---|---|
| committer | DanConwayDev <DanConwayDev@protonmail.com> | 2025-11-03 17:02:31 +0000 |
| commit | d428baf30feec295870fadda2d335d1e7f89507b (patch) | |
| tree | 4d23e3a3fabb2512f903b778fb77fed97b805832 /docs/GIT_PROTOCOL.md | |
docs: one-prompt architecture plan
ok 2 prompts, the second one was about the test strategy so we could
reuse it. I was thinking of a tool like blossom audit. but i didnt
mention it specifically.
Diffstat (limited to 'docs/GIT_PROTOCOL.md')
| -rw-r--r-- | docs/GIT_PROTOCOL.md | 435 |
1 files changed, 435 insertions, 0 deletions
diff --git a/docs/GIT_PROTOCOL.md b/docs/GIT_PROTOCOL.md new file mode 100644 index 0000000..172a7bc --- /dev/null +++ b/docs/GIT_PROTOCOL.md | |||
| @@ -0,0 +1,435 @@ | |||
| 1 | # Git Smart HTTP Protocol Reference | ||
| 2 | |||
| 3 | ## Overview | ||
| 4 | |||
| 5 | This document explains the Git Smart HTTP protocol as it relates to our inline authorization implementation. | ||
| 6 | |||
| 7 | ## Protocol Flow | ||
| 8 | |||
| 9 | ### Clone/Fetch (Upload Pack) | ||
| 10 | |||
| 11 | ``` | ||
| 12 | 1. Client → GET /repo.git/info/refs?service=git-upload-pack | ||
| 13 | Server → 200 OK with pack advertisement | ||
| 14 | |||
| 15 | 2. Client → POST /repo.git/git-upload-pack | ||
| 16 | Body: want/have negotiation | ||
| 17 | Server → 200 OK with pack stream | ||
| 18 | ``` | ||
| 19 | |||
| 20 | **Authorization**: Not needed for public repositories. For GRASP-01, all repos are public. | ||
| 21 | |||
| 22 | ### Push (Receive Pack) | ||
| 23 | |||
| 24 | ``` | ||
| 25 | 1. Client → GET /repo.git/info/refs?service=git-receive-pack | ||
| 26 | Server → 200 OK with ref advertisement | ||
| 27 | |||
| 28 | 2. Client → POST /repo.git/git-receive-pack | ||
| 29 | Body: ref updates + pack data | ||
| 30 | Server → 200 OK with status | ||
| 31 | ``` | ||
| 32 | |||
| 33 | **Authorization**: THIS IS WHERE WE VALIDATE! Step 2 is where inline auth happens. | ||
| 34 | |||
| 35 | ## Receive Pack Request Format | ||
| 36 | |||
| 37 | The POST body to `git-receive-pack` has this structure: | ||
| 38 | |||
| 39 | ``` | ||
| 40 | [ref-updates] | ||
| 41 | [pack-data] | ||
| 42 | ``` | ||
| 43 | |||
| 44 | ### Ref Updates Format | ||
| 45 | |||
| 46 | Each ref update is in **pkt-line** format: | ||
| 47 | |||
| 48 | ``` | ||
| 49 | <4-byte-length><old-oid> <new-oid> <ref-name>\0<capabilities>\n | ||
| 50 | <4-byte-length><old-oid> <new-oid> <ref-name>\n | ||
| 51 | ... | ||
| 52 | 0000 | ||
| 53 | ``` | ||
| 54 | |||
| 55 | **Example** (hex representation): | ||
| 56 | |||
| 57 | ``` | ||
| 58 | 00a20000000000000000000000000000000000000000 a1b2c3d4e5f6... refs/heads/main\0 report-status side-band-64k | ||
| 59 | 003f0000000000000000000000000000000000000000 f6e5d4c3b2a1... refs/heads/dev\n | ||
| 60 | 0000 | ||
| 61 | ``` | ||
| 62 | |||
| 63 | ### Pkt-line Format | ||
| 64 | |||
| 65 | A pkt-line is: | ||
| 66 | - 4 hex digits: length of entire line (including the 4 digits) | ||
| 67 | - Payload data | ||
| 68 | - `0000` = flush packet (end of section) | ||
| 69 | |||
| 70 | **Length calculation**: | ||
| 71 | ``` | ||
| 72 | length = 4 (for length itself) + payload.len() | ||
| 73 | ``` | ||
| 74 | |||
| 75 | **Examples**: | ||
| 76 | ``` | ||
| 77 | "0006a\n" → length=6, payload="a\n" | ||
| 78 | "0000" → flush packet | ||
| 79 | "000bfoobar\n" → length=11, payload="foobar\n" | ||
| 80 | ``` | ||
| 81 | |||
| 82 | ### Parsing Ref Updates | ||
| 83 | |||
| 84 | ```rust | ||
| 85 | pub struct RefUpdate { | ||
| 86 | pub old_oid: String, // 40 hex chars | ||
| 87 | pub new_oid: String, // 40 hex chars | ||
| 88 | pub ref_name: String, // e.g., "refs/heads/main" | ||
| 89 | } | ||
| 90 | |||
| 91 | pub fn parse_ref_updates(body: &[u8]) -> Result<Vec<RefUpdate>> { | ||
| 92 | let mut updates = Vec::new(); | ||
| 93 | let mut offset = 0; | ||
| 94 | |||
| 95 | loop { | ||
| 96 | // Read pkt-line length | ||
| 97 | if offset + 4 > body.len() { | ||
| 98 | break; | ||
| 99 | } | ||
| 100 | |||
| 101 | let length_str = std::str::from_utf8(&body[offset..offset+4])?; | ||
| 102 | let length = u16::from_str_radix(length_str, 16)? as usize; | ||
| 103 | |||
| 104 | // Check for flush packet | ||
| 105 | if length == 0 { | ||
| 106 | break; | ||
| 107 | } | ||
| 108 | |||
| 109 | // Extract payload | ||
| 110 | let payload_end = offset + length; | ||
| 111 | if payload_end > body.len() { | ||
| 112 | return Err(Error::InvalidPktLine); | ||
| 113 | } | ||
| 114 | |||
| 115 | let payload = &body[offset+4..payload_end]; | ||
| 116 | |||
| 117 | // Parse ref update from payload | ||
| 118 | // Format: "<old-oid> <new-oid> <ref-name>[\0<capabilities>]\n" | ||
| 119 | let payload_str = std::str::from_utf8(payload)?; | ||
| 120 | |||
| 121 | // Remove trailing newline | ||
| 122 | let line = payload_str.trim_end_matches('\n'); | ||
| 123 | |||
| 124 | // Split on null byte (first line has capabilities) | ||
| 125 | let parts: Vec<&str> = line.split('\0').collect(); | ||
| 126 | let ref_line = parts[0]; | ||
| 127 | |||
| 128 | // Parse old-oid, new-oid, ref-name | ||
| 129 | let tokens: Vec<&str> = ref_line.split_whitespace().collect(); | ||
| 130 | if tokens.len() != 3 { | ||
| 131 | return Err(Error::InvalidRefUpdate); | ||
| 132 | } | ||
| 133 | |||
| 134 | updates.push(RefUpdate { | ||
| 135 | old_oid: tokens[0].to_string(), | ||
| 136 | new_oid: tokens[1].to_string(), | ||
| 137 | ref_name: tokens[2].to_string(), | ||
| 138 | }); | ||
| 139 | |||
| 140 | offset = payload_end; | ||
| 141 | } | ||
| 142 | |||
| 143 | Ok(updates) | ||
| 144 | } | ||
| 145 | ``` | ||
| 146 | |||
| 147 | ## Special OID Values | ||
| 148 | |||
| 149 | - `0000000000000000000000000000000000000000` (40 zeros) = ref creation | ||
| 150 | - When `old_oid` is all zeros: creating a new ref | ||
| 151 | - When `new_oid` is all zeros: deleting a ref | ||
| 152 | |||
| 153 | ## Validation Requirements | ||
| 154 | |||
| 155 | For GRASP-01, we must validate: | ||
| 156 | |||
| 157 | ### 1. Regular Branches/Tags | ||
| 158 | |||
| 159 | ```rust | ||
| 160 | fn validate_regular_ref( | ||
| 161 | state: &RepositoryState, | ||
| 162 | update: &RefUpdate, | ||
| 163 | ) -> Result<()> { | ||
| 164 | // Extract branch/tag name | ||
| 165 | let (ref_type, name) = if update.ref_name.starts_with("refs/heads/") { | ||
| 166 | ("branch", &update.ref_name[11..]) | ||
| 167 | } else if update.ref_name.starts_with("refs/tags/") { | ||
| 168 | ("tag", &update.ref_name[10..]) | ||
| 169 | } else { | ||
| 170 | return Err(Error::InvalidRefName); | ||
| 171 | }; | ||
| 172 | |||
| 173 | // Check against state | ||
| 174 | let expected = if ref_type == "branch" { | ||
| 175 | state.branches.get(name) | ||
| 176 | } else { | ||
| 177 | state.tags.get(name) | ||
| 178 | }; | ||
| 179 | |||
| 180 | match expected { | ||
| 181 | Some(oid) if oid == &update.new_oid => Ok(()), | ||
| 182 | Some(oid) => Err(Error::StateMismatch { | ||
| 183 | ref_name: update.ref_name.clone(), | ||
| 184 | expected: oid.clone(), | ||
| 185 | got: update.new_oid.clone(), | ||
| 186 | }), | ||
| 187 | None => Err(Error::RefNotInState(update.ref_name.clone())), | ||
| 188 | } | ||
| 189 | } | ||
| 190 | ``` | ||
| 191 | |||
| 192 | ### 2. PR Refs (refs/nostr/<event-id>) | ||
| 193 | |||
| 194 | ```rust | ||
| 195 | fn validate_pr_ref(update: &RefUpdate) -> Result<()> { | ||
| 196 | // Extract event ID | ||
| 197 | let event_id = &update.ref_name[11..]; // Skip "refs/nostr/" | ||
| 198 | |||
| 199 | // Validate it's a valid 32-byte hex | ||
| 200 | if event_id.len() != 64 { | ||
| 201 | return Err(Error::InvalidEventId); | ||
| 202 | } | ||
| 203 | |||
| 204 | if !event_id.chars().all(|c| c.is_ascii_hexdigit()) { | ||
| 205 | return Err(Error::InvalidEventId); | ||
| 206 | } | ||
| 207 | |||
| 208 | // TODO: Could optionally verify event exists on relay | ||
| 209 | // TODO: Could verify event references this repository | ||
| 210 | |||
| 211 | Ok(()) | ||
| 212 | } | ||
| 213 | ``` | ||
| 214 | |||
| 215 | ### 3. Reject pr/* Branches | ||
| 216 | |||
| 217 | ```rust | ||
| 218 | fn reject_pr_branches(update: &RefUpdate) -> Result<()> { | ||
| 219 | if update.ref_name.starts_with("refs/heads/pr/") { | ||
| 220 | return Err(Error::InvalidRef( | ||
| 221 | "pr/* branches must use refs/nostr/<event-id>".into() | ||
| 222 | )); | ||
| 223 | } | ||
| 224 | Ok(()) | ||
| 225 | } | ||
| 226 | ``` | ||
| 227 | |||
| 228 | ## Complete Validation Flow | ||
| 229 | |||
| 230 | ```rust | ||
| 231 | pub async fn validate_push( | ||
| 232 | &self, | ||
| 233 | npub: &str, | ||
| 234 | identifier: &str, | ||
| 235 | ref_updates: Vec<RefUpdate>, | ||
| 236 | ) -> Result<()> { | ||
| 237 | // 1. Fetch events from local relay | ||
| 238 | let events = self.fetch_events(identifier).await?; | ||
| 239 | |||
| 240 | // 2. Get pubkey from npub | ||
| 241 | let pubkey = decode_npub(npub)?; | ||
| 242 | |||
| 243 | // 3. Get maintainer set (recursive) | ||
| 244 | let maintainers = get_maintainers(&events, &pubkey, identifier); | ||
| 245 | if maintainers.is_empty() { | ||
| 246 | return Err(Error::NoAnnouncement); | ||
| 247 | } | ||
| 248 | |||
| 249 | // 4. Get latest state from maintainers | ||
| 250 | let state = get_state_from_maintainers(&events, &maintainers)?; | ||
| 251 | |||
| 252 | // 5. Validate each ref update | ||
| 253 | for update in ref_updates { | ||
| 254 | // Check for pr/* branches (reject) | ||
| 255 | reject_pr_branches(&update)?; | ||
| 256 | |||
| 257 | // Handle refs/nostr/* (allow) | ||
| 258 | if update.ref_name.starts_with("refs/nostr/") { | ||
| 259 | validate_pr_ref(&update)?; | ||
| 260 | continue; | ||
| 261 | } | ||
| 262 | |||
| 263 | // Validate against state | ||
| 264 | validate_regular_ref(&state, &update)?; | ||
| 265 | } | ||
| 266 | |||
| 267 | Ok(()) | ||
| 268 | } | ||
| 269 | ``` | ||
| 270 | |||
| 271 | ## Integration with actix-web | ||
| 272 | |||
| 273 | ```rust | ||
| 274 | pub async fn git_receive_pack( | ||
| 275 | req: HttpRequest, | ||
| 276 | mut payload: web::Payload, | ||
| 277 | state: web::Data<AppState>, | ||
| 278 | ) -> Result<HttpResponse> { | ||
| 279 | // 1. Extract repo info from path | ||
| 280 | let path = req.path(); | ||
| 281 | let (npub, identifier) = parse_repo_path(path)?; | ||
| 282 | |||
| 283 | // 2. Check repository exists | ||
| 284 | if !state.repo_manager.exists(&npub, &identifier).await { | ||
| 285 | return Ok(HttpResponse::NotFound().body("Repository not found")); | ||
| 286 | } | ||
| 287 | |||
| 288 | // 3. Read request body (need to buffer for parsing) | ||
| 289 | let mut body = web::BytesMut::new(); | ||
| 290 | while let Some(chunk) = payload.next().await { | ||
| 291 | body.extend_from_slice(&chunk?); | ||
| 292 | } | ||
| 293 | |||
| 294 | // 4. Parse ref updates from body | ||
| 295 | let ref_updates = parse_ref_updates(&body)?; | ||
| 296 | |||
| 297 | // 5. VALIDATE! | ||
| 298 | let validator = PushValidator::new(state.nostr_client.clone()); | ||
| 299 | if let Err(e) = validator.validate_push(&npub, &identifier, ref_updates).await { | ||
| 300 | return Ok(HttpResponse::Forbidden() | ||
| 301 | .content_type("text/plain") | ||
| 302 | .body(format!("error: {}\n", e))); | ||
| 303 | } | ||
| 304 | |||
| 305 | // 6. Valid! Spawn git-receive-pack | ||
| 306 | let repo_path = state.repo_manager.get_path(&npub, &identifier); | ||
| 307 | let mut cmd = Command::new("git"); | ||
| 308 | cmd.arg("receive-pack") | ||
| 309 | .arg("--stateless-rpc") | ||
| 310 | .arg(&repo_path) | ||
| 311 | .stdin(Stdio::piped()) | ||
| 312 | .stdout(Stdio::piped()) | ||
| 313 | .stderr(Stdio::piped()); | ||
| 314 | |||
| 315 | let mut child = cmd.spawn()?; | ||
| 316 | |||
| 317 | // 7. Write body to git stdin | ||
| 318 | let mut stdin = child.stdin.take().unwrap(); | ||
| 319 | stdin.write_all(&body).await?; | ||
| 320 | drop(stdin); | ||
| 321 | |||
| 322 | // 8. Stream git stdout back to client | ||
| 323 | let stdout = child.stdout.take().unwrap(); | ||
| 324 | let stream = FramedRead::new(stdout, BytesCodec::new()); | ||
| 325 | |||
| 326 | Ok(HttpResponse::Ok() | ||
| 327 | .content_type("application/x-git-receive-pack-result") | ||
| 328 | .streaming(stream)) | ||
| 329 | } | ||
| 330 | ``` | ||
| 331 | |||
| 332 | ## Error Responses | ||
| 333 | |||
| 334 | Git clients expect specific error formats: | ||
| 335 | |||
| 336 | ### Success | ||
| 337 | ``` | ||
| 338 | HTTP/1.1 200 OK | ||
| 339 | Content-Type: application/x-git-receive-pack-result | ||
| 340 | |||
| 341 | [git output stream] | ||
| 342 | ``` | ||
| 343 | |||
| 344 | ### Validation Failure | ||
| 345 | ``` | ||
| 346 | HTTP/1.1 403 Forbidden | ||
| 347 | Content-Type: text/plain | ||
| 348 | |||
| 349 | error: cannot push refs/heads/main to a1b2c3d as nostr state event is at f6e5d4c | ||
| 350 | ``` | ||
| 351 | |||
| 352 | The `error:` prefix makes it display nicely in git clients. | ||
| 353 | |||
| 354 | ## Testing | ||
| 355 | |||
| 356 | ```rust | ||
| 357 | #[test] | ||
| 358 | fn test_parse_ref_updates() { | ||
| 359 | let body = b"00820000000000000000000000000000000000000000 \ | ||
| 360 | a1b2c3d4e5f6789012345678901234567890abcd \ | ||
| 361 | refs/heads/main\0 report-status\n\ | ||
| 362 | 0000"; | ||
| 363 | |||
| 364 | let updates = parse_ref_updates(body).unwrap(); | ||
| 365 | assert_eq!(updates.len(), 1); | ||
| 366 | assert_eq!(updates[0].old_oid, "0000000000000000000000000000000000000000"); | ||
| 367 | assert_eq!(updates[0].new_oid, "a1b2c3d4e5f6789012345678901234567890abcd"); | ||
| 368 | assert_eq!(updates[0].ref_name, "refs/heads/main"); | ||
| 369 | } | ||
| 370 | |||
| 371 | #[tokio::test] | ||
| 372 | async fn test_validate_matching_state() { | ||
| 373 | let state = RepositoryState { | ||
| 374 | branches: HashMap::from([ | ||
| 375 | ("main".into(), "a1b2c3d4...".into()), | ||
| 376 | ]), | ||
| 377 | tags: HashMap::new(), | ||
| 378 | }; | ||
| 379 | |||
| 380 | let update = RefUpdate { | ||
| 381 | old_oid: "0000...".into(), | ||
| 382 | new_oid: "a1b2c3d4...".into(), | ||
| 383 | ref_name: "refs/heads/main".into(), | ||
| 384 | }; | ||
| 385 | |||
| 386 | assert!(validate_regular_ref(&state, &update).is_ok()); | ||
| 387 | } | ||
| 388 | ``` | ||
| 389 | |||
| 390 | ## Performance Considerations | ||
| 391 | |||
| 392 | 1. **Buffering**: We must buffer the entire request body to parse ref updates. For large pushes, this could be memory-intensive. | ||
| 393 | |||
| 394 | **Mitigation**: Limit max request size (e.g., 100MB) | ||
| 395 | |||
| 396 | 2. **Pack Data**: After ref updates, the body contains pack data. We don't need to parse this, just forward it to Git. | ||
| 397 | |||
| 398 | **Optimization**: Could use a streaming parser that only extracts ref updates, then streams the rest | ||
| 399 | |||
| 400 | 3. **Validation Speed**: State lookup and validation should be fast. | ||
| 401 | |||
| 402 | **Optimization**: Cache state events with TTL | ||
| 403 | |||
| 404 | ## Future Enhancements | ||
| 405 | |||
| 406 | ### Streaming Parser | ||
| 407 | |||
| 408 | Instead of buffering entire body: | ||
| 409 | |||
| 410 | ```rust | ||
| 411 | // Read pkt-lines until flush packet | ||
| 412 | let ref_updates = parse_ref_updates_streaming(&mut payload).await?; | ||
| 413 | |||
| 414 | // Now payload is positioned at pack data | ||
| 415 | // Stream directly to git without buffering | ||
| 416 | spawn_git_and_stream(payload, repo_path).await?; | ||
| 417 | ``` | ||
| 418 | |||
| 419 | ### Pack Inspection | ||
| 420 | |||
| 421 | For advanced validation (future): | ||
| 422 | |||
| 423 | ```rust | ||
| 424 | // Parse pack header to get object count | ||
| 425 | let (ref_updates, pack_header) = parse_receive_pack_header(&body)?; | ||
| 426 | |||
| 427 | // Could validate pack contents before accepting | ||
| 428 | validate_pack_contents(&pack_header)?; | ||
| 429 | ``` | ||
| 430 | |||
| 431 | ## References | ||
| 432 | |||
| 433 | - [Git HTTP Protocol Docs](https://git-scm.com/docs/http-protocol) | ||
| 434 | - [Git Pack Protocol](https://git-scm.com/docs/pack-protocol) | ||
| 435 | - [Pkt-line Format](https://git-scm.com/docs/protocol-common#_pkt_line_format) | ||