diff options
| author | DanConwayDev <DanConwayDev@protonmail.com> | 2026-02-18 14:48:20 +0000 |
|---|---|---|
| committer | DanConwayDev <DanConwayDev@protonmail.com> | 2026-02-18 14:48:20 +0000 |
| commit | fcff4541e1f36b6575596c353637b25aeae9bdcf (patch) | |
| tree | d897ce824ca49a8ffef9f55f5f36777687573aab /src/lib/mbox_parser.rs | |
| parent | e6bb9effa194fe63b5e969c090dbe6e93f13d312 (diff) | |
feat: handle missing optional patch tags for pr/ flow
- Add mbox_parser module to extract metadata from patch content
- Extract author/committer from From: and Date: headers when tags missing
- Extract commit message body as fallback for description tag
- Implement best-guess parent commit logic using committer timestamps
- Update patch_supports_commit_ids to accept mbox-parseable patches
- Enable patches without optional tags to appear as pr/ branches
Diffstat (limited to 'src/lib/mbox_parser.rs')
| -rw-r--r-- | src/lib/mbox_parser.rs | 452 |
1 files changed, 452 insertions, 0 deletions
diff --git a/src/lib/mbox_parser.rs b/src/lib/mbox_parser.rs new file mode 100644 index 0000000..40603b1 --- /dev/null +++ b/src/lib/mbox_parser.rs | |||
| @@ -0,0 +1,452 @@ | |||
| 1 | use anyhow::{Context, Result, bail}; | ||
| 2 | use chrono::{DateTime, Datelike}; | ||
| 3 | |||
| 4 | #[derive(Debug, Clone, PartialEq)] | ||
| 5 | pub struct PatchMetadata { | ||
| 6 | pub commit_id: String, | ||
| 7 | pub author_name: String, | ||
| 8 | pub author_email: String, | ||
| 9 | pub author_timestamp: i64, | ||
| 10 | pub author_offset_minutes: i32, | ||
| 11 | pub committer_timestamp: Option<i64>, | ||
| 12 | pub subject: String, | ||
| 13 | pub body: String, | ||
| 14 | } | ||
| 15 | |||
| 16 | pub fn parse_mbox_patch(content: &str) -> Result<PatchMetadata> { | ||
| 17 | let commit_id = extract_commit_id_from_mbox(content)?; | ||
| 18 | let (author_name, author_email) = extract_author_from_from_header(content)?; | ||
| 19 | let (author_timestamp, author_offset_minutes) = extract_date_from_header(content)?; | ||
| 20 | let committer_timestamp = extract_committer_date_from_mbox(content)?; | ||
| 21 | let subject = extract_subject(content)?; | ||
| 22 | let body = extract_commit_message_body(content)?; | ||
| 23 | |||
| 24 | Ok(PatchMetadata { | ||
| 25 | commit_id, | ||
| 26 | author_name, | ||
| 27 | author_email, | ||
| 28 | author_timestamp, | ||
| 29 | author_offset_minutes, | ||
| 30 | committer_timestamp, | ||
| 31 | subject, | ||
| 32 | body, | ||
| 33 | }) | ||
| 34 | } | ||
| 35 | |||
| 36 | fn extract_commit_id_from_mbox(content: &str) -> Result<String> { | ||
| 37 | if !content.starts_with("From ") { | ||
| 38 | bail!("patch does not start with 'From ' - not a valid mbox format"); | ||
| 39 | } | ||
| 40 | |||
| 41 | let first_line = content.lines().next().context("patch content is empty")?; | ||
| 42 | |||
| 43 | let parts: Vec<&str> = first_line.split_whitespace().collect(); | ||
| 44 | if parts.len() < 2 { | ||
| 45 | bail!("mbox 'From ' line does not contain a commit id"); | ||
| 46 | } | ||
| 47 | |||
| 48 | Ok(parts[1].to_string()) | ||
| 49 | } | ||
| 50 | |||
| 51 | fn extract_author_from_from_header(content: &str) -> Result<(String, String)> { | ||
| 52 | let from_line = content | ||
| 53 | .lines() | ||
| 54 | .find(|line| line.starts_with("From:")) | ||
| 55 | .context("patch does not contain a 'From:' header")?; | ||
| 56 | |||
| 57 | let from_value = from_line | ||
| 58 | .strip_prefix("From:") | ||
| 59 | .context("failed to strip 'From:' prefix")? | ||
| 60 | .trim(); | ||
| 61 | |||
| 62 | parse_from_header_value(from_value) | ||
| 63 | } | ||
| 64 | |||
| 65 | fn parse_from_header_value(value: &str) -> Result<(String, String)> { | ||
| 66 | if let Some(start) = value.find('<') { | ||
| 67 | if let Some(end) = value.find('>') { | ||
| 68 | let email = value[start + 1..end].to_string(); | ||
| 69 | let name_part = value[..start].trim(); | ||
| 70 | let name = name_part.trim_matches('"').trim().to_string(); | ||
| 71 | return Ok((name, email)); | ||
| 72 | } | ||
| 73 | } | ||
| 74 | |||
| 75 | if value.contains('@') { | ||
| 76 | let email = value.trim().to_string(); | ||
| 77 | let name = email.split('@').next().unwrap_or("unknown").to_string(); | ||
| 78 | return Ok((name, email)); | ||
| 79 | } | ||
| 80 | |||
| 81 | bail!("could not parse From header: {}", value) | ||
| 82 | } | ||
| 83 | |||
| 84 | fn extract_date_from_header(content: &str) -> Result<(i64, i32)> { | ||
| 85 | let date_line = content | ||
| 86 | .lines() | ||
| 87 | .find(|line| line.starts_with("Date:")) | ||
| 88 | .context("patch does not contain a 'Date:' header")?; | ||
| 89 | |||
| 90 | let date_value = date_line | ||
| 91 | .strip_prefix("Date:") | ||
| 92 | .context("failed to strip 'Date:' prefix")? | ||
| 93 | .trim(); | ||
| 94 | |||
| 95 | parse_rfc2822_date(date_value) | ||
| 96 | } | ||
| 97 | |||
| 98 | fn parse_rfc2822_date(value: &str) -> Result<(i64, i32)> { | ||
| 99 | let parsed = DateTime::parse_from_rfc2822(value) | ||
| 100 | .context(format!("failed to parse RFC2822 date: {}", value))?; | ||
| 101 | |||
| 102 | let timestamp = parsed.timestamp(); | ||
| 103 | let offset_minutes = parsed.offset().local_minus_utc() / 60; | ||
| 104 | |||
| 105 | Ok((timestamp, offset_minutes)) | ||
| 106 | } | ||
| 107 | |||
| 108 | fn extract_committer_date_from_mbox(content: &str) -> Result<Option<i64>> { | ||
| 109 | let first_line = content.lines().next().context("patch content is empty")?; | ||
| 110 | |||
| 111 | let parts: Vec<&str> = first_line.split_whitespace().collect(); | ||
| 112 | |||
| 113 | if parts.len() >= 6 { | ||
| 114 | let date_str = parts[3..6].join(" "); | ||
| 115 | if let Ok(dt) = DateTime::parse_from_rfc2822(&date_str) { | ||
| 116 | return Ok(Some(dt.timestamp())); | ||
| 117 | } | ||
| 118 | } | ||
| 119 | |||
| 120 | if parts.len() >= 7 { | ||
| 121 | let date_str = format!("{} {} {}", parts[3], parts[4], parts[5]); | ||
| 122 | if let Ok(dt) = chrono::DateTime::parse_from_str(&date_str, "%a %b %d") { | ||
| 123 | if let Ok(year) = parts[6].parse::<i32>() { | ||
| 124 | let with_year = dt.with_year(year); | ||
| 125 | if let Some(dt_with_year) = with_year { | ||
| 126 | return Ok(Some(dt_with_year.timestamp())); | ||
| 127 | } | ||
| 128 | } | ||
| 129 | } | ||
| 130 | } | ||
| 131 | |||
| 132 | Ok(None) | ||
| 133 | } | ||
| 134 | |||
| 135 | fn extract_subject(content: &str) -> Result<String> { | ||
| 136 | let subject_line = content | ||
| 137 | .lines() | ||
| 138 | .find(|line| line.starts_with("Subject:")) | ||
| 139 | .context("patch does not contain a 'Subject:' header")?; | ||
| 140 | |||
| 141 | let subject_value = subject_line | ||
| 142 | .strip_prefix("Subject:") | ||
| 143 | .context("failed to strip 'Subject:' prefix")? | ||
| 144 | .trim(); | ||
| 145 | |||
| 146 | Ok(cleanup_subject(subject_value)) | ||
| 147 | } | ||
| 148 | |||
| 149 | fn cleanup_subject(subject: &str) -> String { | ||
| 150 | let mut result = subject.to_string(); | ||
| 151 | |||
| 152 | loop { | ||
| 153 | let trimmed = result.trim(); | ||
| 154 | |||
| 155 | if trimmed.starts_with("Re:") || trimmed.starts_with("re:") { | ||
| 156 | result = trimmed[3..].trim().to_string(); | ||
| 157 | continue; | ||
| 158 | } | ||
| 159 | |||
| 160 | if let Some(stripped) = trimmed.strip_prefix(':') { | ||
| 161 | result = stripped.trim().to_string(); | ||
| 162 | continue; | ||
| 163 | } | ||
| 164 | |||
| 165 | if trimmed.starts_with('[') { | ||
| 166 | if let Some(end) = trimmed.find(']') { | ||
| 167 | result = trimmed[end + 1..].trim().to_string(); | ||
| 168 | continue; | ||
| 169 | } | ||
| 170 | } | ||
| 171 | |||
| 172 | break; | ||
| 173 | } | ||
| 174 | |||
| 175 | result | ||
| 176 | } | ||
| 177 | |||
| 178 | fn extract_commit_message_body(content: &str) -> Result<String> { | ||
| 179 | let mut in_body = false; | ||
| 180 | let mut body_lines: Vec<String> = Vec::new(); | ||
| 181 | let mut found_first_content = false; | ||
| 182 | |||
| 183 | for line in content.lines() { | ||
| 184 | if !in_body { | ||
| 185 | if line.is_empty() { | ||
| 186 | in_body = true; | ||
| 187 | } | ||
| 188 | continue; | ||
| 189 | } | ||
| 190 | |||
| 191 | if line.starts_with("diff --git ") | ||
| 192 | || line.starts_with("Index: ") | ||
| 193 | || line.starts_with("--- ") | ||
| 194 | || line.starts_with("From ") | ||
| 195 | { | ||
| 196 | break; | ||
| 197 | } | ||
| 198 | |||
| 199 | if line.starts_with("---") && line.trim().eq("---") { | ||
| 200 | break; | ||
| 201 | } | ||
| 202 | |||
| 203 | if line.starts_with("-- ") || line.starts_with("--\n") { | ||
| 204 | break; | ||
| 205 | } | ||
| 206 | |||
| 207 | if !found_first_content && line.trim().is_empty() { | ||
| 208 | continue; | ||
| 209 | } | ||
| 210 | |||
| 211 | found_first_content = true; | ||
| 212 | body_lines.push(line.to_string()); | ||
| 213 | } | ||
| 214 | |||
| 215 | while body_lines.last().is_some_and(|l| l.trim().is_empty()) { | ||
| 216 | body_lines.pop(); | ||
| 217 | } | ||
| 218 | |||
| 219 | Ok(body_lines.join("\n").trim().to_string()) | ||
| 220 | } | ||
| 221 | |||
| 222 | pub fn extract_description_from_patch(content: &str) -> Result<String> { | ||
| 223 | let subject = extract_subject(content)?; | ||
| 224 | let body = extract_commit_message_body(content)?; | ||
| 225 | |||
| 226 | if body.is_empty() { | ||
| 227 | Ok(subject) | ||
| 228 | } else { | ||
| 229 | Ok(format!("{}\n\n{}", subject, body)) | ||
| 230 | } | ||
| 231 | } | ||
| 232 | |||
| 233 | #[cfg(test)] | ||
| 234 | mod tests { | ||
| 235 | use super::*; | ||
| 236 | |||
| 237 | fn sample_patch() -> String { | ||
| 238 | "\ | ||
| 239 | From 431b84edc0d2fa118d63faa3c2db9c73d630a5ae Mon Sep 17 00:00:00 2001 | ||
| 240 | From: Joe Bloggs <joe.bloggs@pm.me> | ||
| 241 | Date: Thu, 1 Jan 1970 00:00:00 +0000 | ||
| 242 | Subject: [PATCH] add t2.md | ||
| 243 | |||
| 244 | This is the commit message body. | ||
| 245 | |||
| 246 | It can have multiple lines. | ||
| 247 | |||
| 248 | --- | ||
| 249 | t2.md | 1 + | ||
| 250 | 1 file changed, 1 insertion(+) | ||
| 251 | create mode 100644 t2.md | ||
| 252 | |||
| 253 | diff --git a/t2.md b/t2.md | ||
| 254 | new file mode 100644 | ||
| 255 | index 0000000..a66525d | ||
| 256 | --- /dev/null | ||
| 257 | +++ b/t2.md | ||
| 258 | @@ -0,0 +1 @@ | ||
| 259 | +some content1 | ||
| 260 | \\ No newline at end of file | ||
| 261 | -- | ||
| 262 | libgit2 1.9.1 | ||
| 263 | |||
| 264 | " | ||
| 265 | .to_string() | ||
| 266 | } | ||
| 267 | |||
| 268 | #[test] | ||
| 269 | fn parse_commit_id() { | ||
| 270 | let patch = sample_patch(); | ||
| 271 | let result = extract_commit_id_from_mbox(&patch).unwrap(); | ||
| 272 | assert_eq!(result, "431b84edc0d2fa118d63faa3c2db9c73d630a5ae"); | ||
| 273 | } | ||
| 274 | |||
| 275 | #[test] | ||
| 276 | fn parse_author() { | ||
| 277 | let patch = sample_patch(); | ||
| 278 | let (name, email) = extract_author_from_from_header(&patch).unwrap(); | ||
| 279 | assert_eq!(name, "Joe Bloggs"); | ||
| 280 | assert_eq!(email, "joe.bloggs@pm.me"); | ||
| 281 | } | ||
| 282 | |||
| 283 | #[test] | ||
| 284 | fn parse_author_with_quoted_name() { | ||
| 285 | let patch = "\ | ||
| 286 | From abc123 Mon Sep 17 00:00:00 2001 | ||
| 287 | From: \"John (nickname) Doe\" <john.doe@example.com> | ||
| 288 | Date: Thu, 1 Jan 1970 00:00:00 +0000 | ||
| 289 | Subject: test | ||
| 290 | |||
| 291 | Body | ||
| 292 | "; | ||
| 293 | let (name, email) = extract_author_from_from_header(patch).unwrap(); | ||
| 294 | assert_eq!(name, "John (nickname) Doe"); | ||
| 295 | assert_eq!(email, "john.doe@example.com"); | ||
| 296 | } | ||
| 297 | |||
| 298 | #[test] | ||
| 299 | fn parse_author_email_only() { | ||
| 300 | let patch = "\ | ||
| 301 | From abc123 Mon Sep 17 00:00:00 2001 | ||
| 302 | From: john.doe@example.com | ||
| 303 | Date: Thu, 1 Jan 1970 00:00:00 +0000 | ||
| 304 | Subject: test | ||
| 305 | |||
| 306 | Body | ||
| 307 | "; | ||
| 308 | let (name, email) = extract_author_from_from_header(patch).unwrap(); | ||
| 309 | assert_eq!(name, "john.doe"); | ||
| 310 | assert_eq!(email, "john.doe@example.com"); | ||
| 311 | } | ||
| 312 | |||
| 313 | #[test] | ||
| 314 | fn parse_date() { | ||
| 315 | let patch = sample_patch(); | ||
| 316 | let (timestamp, offset) = extract_date_from_header(&patch).unwrap(); | ||
| 317 | assert_eq!(timestamp, 0); | ||
| 318 | assert_eq!(offset, 0); | ||
| 319 | } | ||
| 320 | |||
| 321 | #[test] | ||
| 322 | fn parse_date_with_timezone() { | ||
| 323 | let patch = "\ | ||
| 324 | From abc123 Mon Sep 17 00:00:00 2001 | ||
| 325 | From: Joe <joe@example.com> | ||
| 326 | Date: Thu, 1 Jan 1970 00:00:00 +0500 | ||
| 327 | Subject: test | ||
| 328 | |||
| 329 | Body | ||
| 330 | "; | ||
| 331 | let (timestamp, offset) = extract_date_from_header(patch).unwrap(); | ||
| 332 | assert_eq!(timestamp, -18000); | ||
| 333 | assert_eq!(offset, 300); | ||
| 334 | } | ||
| 335 | |||
| 336 | #[test] | ||
| 337 | fn parse_subject() { | ||
| 338 | let patch = sample_patch(); | ||
| 339 | let subject = extract_subject(&patch).unwrap(); | ||
| 340 | assert_eq!(subject, "add t2.md"); | ||
| 341 | } | ||
| 342 | |||
| 343 | #[test] | ||
| 344 | fn parse_subject_with_patch_prefix() { | ||
| 345 | let patch = "\ | ||
| 346 | From abc123 Mon Sep 17 00:00:00 2001 | ||
| 347 | From: Joe <joe@example.com> | ||
| 348 | Date: Thu, 1 Jan 1970 00:00:00 +0000 | ||
| 349 | Subject: [PATCH v2 3/5] fix: important bug | ||
| 350 | |||
| 351 | Body | ||
| 352 | "; | ||
| 353 | let subject = extract_subject(patch).unwrap(); | ||
| 354 | assert_eq!(subject, "fix: important bug"); | ||
| 355 | } | ||
| 356 | |||
| 357 | #[test] | ||
| 358 | fn parse_subject_with_re_prefix() { | ||
| 359 | let patch = "\ | ||
| 360 | From abc123 Mon Sep 17 00:00:00 2001 | ||
| 361 | From: Joe <joe@example.com> | ||
| 362 | Date: Thu, 1 Jan 1970 00:00:00 +0000 | ||
| 363 | Subject: Re: [PATCH] fix: important bug | ||
| 364 | |||
| 365 | Body | ||
| 366 | "; | ||
| 367 | let subject = extract_subject(patch).unwrap(); | ||
| 368 | assert_eq!(subject, "fix: important bug"); | ||
| 369 | } | ||
| 370 | |||
| 371 | #[test] | ||
| 372 | fn parse_body() { | ||
| 373 | let patch = sample_patch(); | ||
| 374 | let body = extract_commit_message_body(&patch).unwrap(); | ||
| 375 | assert_eq!( | ||
| 376 | body, | ||
| 377 | "This is the commit message body.\n\nIt can have multiple lines." | ||
| 378 | ); | ||
| 379 | } | ||
| 380 | |||
| 381 | #[test] | ||
| 382 | fn parse_body_empty() { | ||
| 383 | let patch = "\ | ||
| 384 | From abc123 Mon Sep 17 00:00:00 2001 | ||
| 385 | From: Joe <joe@example.com> | ||
| 386 | Date: Thu, 1 Jan 1970 00:00:00 +0000 | ||
| 387 | Subject: test | ||
| 388 | |||
| 389 | --- | ||
| 390 | file.txt | 1 + | ||
| 391 | diff --git a/file.txt b/file.txt | ||
| 392 | "; | ||
| 393 | let body = extract_commit_message_body(patch).unwrap(); | ||
| 394 | assert_eq!(body, ""); | ||
| 395 | } | ||
| 396 | |||
| 397 | #[test] | ||
| 398 | fn parse_full_metadata() { | ||
| 399 | let patch = sample_patch(); | ||
| 400 | let metadata = parse_mbox_patch(&patch).unwrap(); | ||
| 401 | |||
| 402 | assert_eq!( | ||
| 403 | metadata.commit_id, | ||
| 404 | "431b84edc0d2fa118d63faa3c2db9c73d630a5ae" | ||
| 405 | ); | ||
| 406 | assert_eq!(metadata.author_name, "Joe Bloggs"); | ||
| 407 | assert_eq!(metadata.author_email, "joe.bloggs@pm.me"); | ||
| 408 | assert_eq!(metadata.author_timestamp, 0); | ||
| 409 | assert_eq!(metadata.author_offset_minutes, 0); | ||
| 410 | assert_eq!(metadata.subject, "add t2.md"); | ||
| 411 | assert_eq!( | ||
| 412 | metadata.body, | ||
| 413 | "This is the commit message body.\n\nIt can have multiple lines." | ||
| 414 | ); | ||
| 415 | } | ||
| 416 | |||
| 417 | #[test] | ||
| 418 | fn extract_description_combines_subject_and_body() { | ||
| 419 | let patch = sample_patch(); | ||
| 420 | let description = extract_description_from_patch(&patch).unwrap(); | ||
| 421 | assert_eq!( | ||
| 422 | description, | ||
| 423 | "add t2.md\n\nThis is the commit message body.\n\nIt can have multiple lines." | ||
| 424 | ); | ||
| 425 | } | ||
| 426 | |||
| 427 | #[test] | ||
| 428 | fn extract_description_subject_only() { | ||
| 429 | let patch = "\ | ||
| 430 | From abc123 Mon Sep 17 00:00:00 2001 | ||
| 431 | From: Joe <joe@example.com> | ||
| 432 | Date: Thu, 1 Jan 1970 00:00:00 +0000 | ||
| 433 | Subject: [PATCH] simple fix | ||
| 434 | |||
| 435 | --- | ||
| 436 | file.txt | 1 + | ||
| 437 | "; | ||
| 438 | let description = extract_description_from_patch(patch).unwrap(); | ||
| 439 | assert_eq!(description, "simple fix"); | ||
| 440 | } | ||
| 441 | |||
| 442 | #[test] | ||
| 443 | fn cleanup_subject_strips_patch_prefixes() { | ||
| 444 | assert_eq!(cleanup_subject("[PATCH] test"), "test"); | ||
| 445 | assert_eq!(cleanup_subject("[PATCH v2] test"), "test"); | ||
| 446 | assert_eq!(cleanup_subject("[PATCH 1/3] test"), "test"); | ||
| 447 | assert_eq!(cleanup_subject("[PATCH v2 1/3] test"), "test"); | ||
| 448 | assert_eq!(cleanup_subject("Re: [PATCH] test"), "test"); | ||
| 449 | assert_eq!(cleanup_subject("re: test"), "test"); | ||
| 450 | assert_eq!(cleanup_subject(":test"), "test"); | ||
| 451 | } | ||
| 452 | } | ||