upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/src/lib/mbox_parser.rs
diff options
context:
space:
mode:
authorDanConwayDev <DanConwayDev@protonmail.com>2026-02-18 14:48:20 +0000
committerDanConwayDev <DanConwayDev@protonmail.com>2026-02-18 14:48:20 +0000
commitfcff4541e1f36b6575596c353637b25aeae9bdcf (patch)
treed897ce824ca49a8ffef9f55f5f36777687573aab /src/lib/mbox_parser.rs
parente6bb9effa194fe63b5e969c090dbe6e93f13d312 (diff)
feat: handle missing optional patch tags for pr/ flow
- Add mbox_parser module to extract metadata from patch content - Extract author/committer from From: and Date: headers when tags missing - Extract commit message body as fallback for description tag - Implement best-guess parent commit logic using committer timestamps - Update patch_supports_commit_ids to accept mbox-parseable patches - Enable patches without optional tags to appear as pr/ branches
Diffstat (limited to 'src/lib/mbox_parser.rs')
-rw-r--r--src/lib/mbox_parser.rs452
1 files changed, 452 insertions, 0 deletions
diff --git a/src/lib/mbox_parser.rs b/src/lib/mbox_parser.rs
new file mode 100644
index 0000000..40603b1
--- /dev/null
+++ b/src/lib/mbox_parser.rs
@@ -0,0 +1,452 @@
1use anyhow::{Context, Result, bail};
2use chrono::{DateTime, Datelike};
3
4#[derive(Debug, Clone, PartialEq)]
5pub struct PatchMetadata {
6 pub commit_id: String,
7 pub author_name: String,
8 pub author_email: String,
9 pub author_timestamp: i64,
10 pub author_offset_minutes: i32,
11 pub committer_timestamp: Option<i64>,
12 pub subject: String,
13 pub body: String,
14}
15
16pub fn parse_mbox_patch(content: &str) -> Result<PatchMetadata> {
17 let commit_id = extract_commit_id_from_mbox(content)?;
18 let (author_name, author_email) = extract_author_from_from_header(content)?;
19 let (author_timestamp, author_offset_minutes) = extract_date_from_header(content)?;
20 let committer_timestamp = extract_committer_date_from_mbox(content)?;
21 let subject = extract_subject(content)?;
22 let body = extract_commit_message_body(content)?;
23
24 Ok(PatchMetadata {
25 commit_id,
26 author_name,
27 author_email,
28 author_timestamp,
29 author_offset_minutes,
30 committer_timestamp,
31 subject,
32 body,
33 })
34}
35
36fn extract_commit_id_from_mbox(content: &str) -> Result<String> {
37 if !content.starts_with("From ") {
38 bail!("patch does not start with 'From ' - not a valid mbox format");
39 }
40
41 let first_line = content.lines().next().context("patch content is empty")?;
42
43 let parts: Vec<&str> = first_line.split_whitespace().collect();
44 if parts.len() < 2 {
45 bail!("mbox 'From ' line does not contain a commit id");
46 }
47
48 Ok(parts[1].to_string())
49}
50
51fn extract_author_from_from_header(content: &str) -> Result<(String, String)> {
52 let from_line = content
53 .lines()
54 .find(|line| line.starts_with("From:"))
55 .context("patch does not contain a 'From:' header")?;
56
57 let from_value = from_line
58 .strip_prefix("From:")
59 .context("failed to strip 'From:' prefix")?
60 .trim();
61
62 parse_from_header_value(from_value)
63}
64
65fn parse_from_header_value(value: &str) -> Result<(String, String)> {
66 if let Some(start) = value.find('<') {
67 if let Some(end) = value.find('>') {
68 let email = value[start + 1..end].to_string();
69 let name_part = value[..start].trim();
70 let name = name_part.trim_matches('"').trim().to_string();
71 return Ok((name, email));
72 }
73 }
74
75 if value.contains('@') {
76 let email = value.trim().to_string();
77 let name = email.split('@').next().unwrap_or("unknown").to_string();
78 return Ok((name, email));
79 }
80
81 bail!("could not parse From header: {}", value)
82}
83
84fn extract_date_from_header(content: &str) -> Result<(i64, i32)> {
85 let date_line = content
86 .lines()
87 .find(|line| line.starts_with("Date:"))
88 .context("patch does not contain a 'Date:' header")?;
89
90 let date_value = date_line
91 .strip_prefix("Date:")
92 .context("failed to strip 'Date:' prefix")?
93 .trim();
94
95 parse_rfc2822_date(date_value)
96}
97
98fn parse_rfc2822_date(value: &str) -> Result<(i64, i32)> {
99 let parsed = DateTime::parse_from_rfc2822(value)
100 .context(format!("failed to parse RFC2822 date: {}", value))?;
101
102 let timestamp = parsed.timestamp();
103 let offset_minutes = parsed.offset().local_minus_utc() / 60;
104
105 Ok((timestamp, offset_minutes))
106}
107
108fn extract_committer_date_from_mbox(content: &str) -> Result<Option<i64>> {
109 let first_line = content.lines().next().context("patch content is empty")?;
110
111 let parts: Vec<&str> = first_line.split_whitespace().collect();
112
113 if parts.len() >= 6 {
114 let date_str = parts[3..6].join(" ");
115 if let Ok(dt) = DateTime::parse_from_rfc2822(&date_str) {
116 return Ok(Some(dt.timestamp()));
117 }
118 }
119
120 if parts.len() >= 7 {
121 let date_str = format!("{} {} {}", parts[3], parts[4], parts[5]);
122 if let Ok(dt) = chrono::DateTime::parse_from_str(&date_str, "%a %b %d") {
123 if let Ok(year) = parts[6].parse::<i32>() {
124 let with_year = dt.with_year(year);
125 if let Some(dt_with_year) = with_year {
126 return Ok(Some(dt_with_year.timestamp()));
127 }
128 }
129 }
130 }
131
132 Ok(None)
133}
134
135fn extract_subject(content: &str) -> Result<String> {
136 let subject_line = content
137 .lines()
138 .find(|line| line.starts_with("Subject:"))
139 .context("patch does not contain a 'Subject:' header")?;
140
141 let subject_value = subject_line
142 .strip_prefix("Subject:")
143 .context("failed to strip 'Subject:' prefix")?
144 .trim();
145
146 Ok(cleanup_subject(subject_value))
147}
148
149fn cleanup_subject(subject: &str) -> String {
150 let mut result = subject.to_string();
151
152 loop {
153 let trimmed = result.trim();
154
155 if trimmed.starts_with("Re:") || trimmed.starts_with("re:") {
156 result = trimmed[3..].trim().to_string();
157 continue;
158 }
159
160 if let Some(stripped) = trimmed.strip_prefix(':') {
161 result = stripped.trim().to_string();
162 continue;
163 }
164
165 if trimmed.starts_with('[') {
166 if let Some(end) = trimmed.find(']') {
167 result = trimmed[end + 1..].trim().to_string();
168 continue;
169 }
170 }
171
172 break;
173 }
174
175 result
176}
177
178fn extract_commit_message_body(content: &str) -> Result<String> {
179 let mut in_body = false;
180 let mut body_lines: Vec<String> = Vec::new();
181 let mut found_first_content = false;
182
183 for line in content.lines() {
184 if !in_body {
185 if line.is_empty() {
186 in_body = true;
187 }
188 continue;
189 }
190
191 if line.starts_with("diff --git ")
192 || line.starts_with("Index: ")
193 || line.starts_with("--- ")
194 || line.starts_with("From ")
195 {
196 break;
197 }
198
199 if line.starts_with("---") && line.trim().eq("---") {
200 break;
201 }
202
203 if line.starts_with("-- ") || line.starts_with("--\n") {
204 break;
205 }
206
207 if !found_first_content && line.trim().is_empty() {
208 continue;
209 }
210
211 found_first_content = true;
212 body_lines.push(line.to_string());
213 }
214
215 while body_lines.last().is_some_and(|l| l.trim().is_empty()) {
216 body_lines.pop();
217 }
218
219 Ok(body_lines.join("\n").trim().to_string())
220}
221
222pub fn extract_description_from_patch(content: &str) -> Result<String> {
223 let subject = extract_subject(content)?;
224 let body = extract_commit_message_body(content)?;
225
226 if body.is_empty() {
227 Ok(subject)
228 } else {
229 Ok(format!("{}\n\n{}", subject, body))
230 }
231}
232
233#[cfg(test)]
234mod tests {
235 use super::*;
236
237 fn sample_patch() -> String {
238 "\
239From 431b84edc0d2fa118d63faa3c2db9c73d630a5ae Mon Sep 17 00:00:00 2001
240From: Joe Bloggs <joe.bloggs@pm.me>
241Date: Thu, 1 Jan 1970 00:00:00 +0000
242Subject: [PATCH] add t2.md
243
244This is the commit message body.
245
246It can have multiple lines.
247
248---
249 t2.md | 1 +
250 1 file changed, 1 insertion(+)
251 create mode 100644 t2.md
252
253diff --git a/t2.md b/t2.md
254new file mode 100644
255index 0000000..a66525d
256--- /dev/null
257+++ b/t2.md
258@@ -0,0 +1 @@
259+some content1
260\\ No newline at end of file
261--
262libgit2 1.9.1
263
264"
265 .to_string()
266 }
267
268 #[test]
269 fn parse_commit_id() {
270 let patch = sample_patch();
271 let result = extract_commit_id_from_mbox(&patch).unwrap();
272 assert_eq!(result, "431b84edc0d2fa118d63faa3c2db9c73d630a5ae");
273 }
274
275 #[test]
276 fn parse_author() {
277 let patch = sample_patch();
278 let (name, email) = extract_author_from_from_header(&patch).unwrap();
279 assert_eq!(name, "Joe Bloggs");
280 assert_eq!(email, "joe.bloggs@pm.me");
281 }
282
283 #[test]
284 fn parse_author_with_quoted_name() {
285 let patch = "\
286From abc123 Mon Sep 17 00:00:00 2001
287From: \"John (nickname) Doe\" <john.doe@example.com>
288Date: Thu, 1 Jan 1970 00:00:00 +0000
289Subject: test
290
291Body
292";
293 let (name, email) = extract_author_from_from_header(patch).unwrap();
294 assert_eq!(name, "John (nickname) Doe");
295 assert_eq!(email, "john.doe@example.com");
296 }
297
298 #[test]
299 fn parse_author_email_only() {
300 let patch = "\
301From abc123 Mon Sep 17 00:00:00 2001
302From: john.doe@example.com
303Date: Thu, 1 Jan 1970 00:00:00 +0000
304Subject: test
305
306Body
307";
308 let (name, email) = extract_author_from_from_header(patch).unwrap();
309 assert_eq!(name, "john.doe");
310 assert_eq!(email, "john.doe@example.com");
311 }
312
313 #[test]
314 fn parse_date() {
315 let patch = sample_patch();
316 let (timestamp, offset) = extract_date_from_header(&patch).unwrap();
317 assert_eq!(timestamp, 0);
318 assert_eq!(offset, 0);
319 }
320
321 #[test]
322 fn parse_date_with_timezone() {
323 let patch = "\
324From abc123 Mon Sep 17 00:00:00 2001
325From: Joe <joe@example.com>
326Date: Thu, 1 Jan 1970 00:00:00 +0500
327Subject: test
328
329Body
330";
331 let (timestamp, offset) = extract_date_from_header(patch).unwrap();
332 assert_eq!(timestamp, -18000);
333 assert_eq!(offset, 300);
334 }
335
336 #[test]
337 fn parse_subject() {
338 let patch = sample_patch();
339 let subject = extract_subject(&patch).unwrap();
340 assert_eq!(subject, "add t2.md");
341 }
342
343 #[test]
344 fn parse_subject_with_patch_prefix() {
345 let patch = "\
346From abc123 Mon Sep 17 00:00:00 2001
347From: Joe <joe@example.com>
348Date: Thu, 1 Jan 1970 00:00:00 +0000
349Subject: [PATCH v2 3/5] fix: important bug
350
351Body
352";
353 let subject = extract_subject(patch).unwrap();
354 assert_eq!(subject, "fix: important bug");
355 }
356
357 #[test]
358 fn parse_subject_with_re_prefix() {
359 let patch = "\
360From abc123 Mon Sep 17 00:00:00 2001
361From: Joe <joe@example.com>
362Date: Thu, 1 Jan 1970 00:00:00 +0000
363Subject: Re: [PATCH] fix: important bug
364
365Body
366";
367 let subject = extract_subject(patch).unwrap();
368 assert_eq!(subject, "fix: important bug");
369 }
370
371 #[test]
372 fn parse_body() {
373 let patch = sample_patch();
374 let body = extract_commit_message_body(&patch).unwrap();
375 assert_eq!(
376 body,
377 "This is the commit message body.\n\nIt can have multiple lines."
378 );
379 }
380
381 #[test]
382 fn parse_body_empty() {
383 let patch = "\
384From abc123 Mon Sep 17 00:00:00 2001
385From: Joe <joe@example.com>
386Date: Thu, 1 Jan 1970 00:00:00 +0000
387Subject: test
388
389---
390 file.txt | 1 +
391diff --git a/file.txt b/file.txt
392";
393 let body = extract_commit_message_body(patch).unwrap();
394 assert_eq!(body, "");
395 }
396
397 #[test]
398 fn parse_full_metadata() {
399 let patch = sample_patch();
400 let metadata = parse_mbox_patch(&patch).unwrap();
401
402 assert_eq!(
403 metadata.commit_id,
404 "431b84edc0d2fa118d63faa3c2db9c73d630a5ae"
405 );
406 assert_eq!(metadata.author_name, "Joe Bloggs");
407 assert_eq!(metadata.author_email, "joe.bloggs@pm.me");
408 assert_eq!(metadata.author_timestamp, 0);
409 assert_eq!(metadata.author_offset_minutes, 0);
410 assert_eq!(metadata.subject, "add t2.md");
411 assert_eq!(
412 metadata.body,
413 "This is the commit message body.\n\nIt can have multiple lines."
414 );
415 }
416
417 #[test]
418 fn extract_description_combines_subject_and_body() {
419 let patch = sample_patch();
420 let description = extract_description_from_patch(&patch).unwrap();
421 assert_eq!(
422 description,
423 "add t2.md\n\nThis is the commit message body.\n\nIt can have multiple lines."
424 );
425 }
426
427 #[test]
428 fn extract_description_subject_only() {
429 let patch = "\
430From abc123 Mon Sep 17 00:00:00 2001
431From: Joe <joe@example.com>
432Date: Thu, 1 Jan 1970 00:00:00 +0000
433Subject: [PATCH] simple fix
434
435---
436 file.txt | 1 +
437";
438 let description = extract_description_from_patch(patch).unwrap();
439 assert_eq!(description, "simple fix");
440 }
441
442 #[test]
443 fn cleanup_subject_strips_patch_prefixes() {
444 assert_eq!(cleanup_subject("[PATCH] test"), "test");
445 assert_eq!(cleanup_subject("[PATCH v2] test"), "test");
446 assert_eq!(cleanup_subject("[PATCH 1/3] test"), "test");
447 assert_eq!(cleanup_subject("[PATCH v2 1/3] test"), "test");
448 assert_eq!(cleanup_subject("Re: [PATCH] test"), "test");
449 assert_eq!(cleanup_subject("re: test"), "test");
450 assert_eq!(cleanup_subject(":test"), "test");
451 }
452}