diff options
| author | DanConwayDev <DanConwayDev@protonmail.com> | 2026-02-18 21:04:59 +0000 |
|---|---|---|
| committer | DanConwayDev <DanConwayDev@protonmail.com> | 2026-02-18 21:04:59 +0000 |
| commit | 73d829b916d87626f33ea2adead0c48f1d9d737d (patch) | |
| tree | b3adc223df90184e430ac6cb153b9a6e986f8097 /src/lib/mbox_parser.rs | |
| parent | 061589cd88d0480dc7cb0b9eb19a3910293ceb56 (diff) | |
fix: harden apply_patch_chain when optional patch tags absent
- Replace .unwrap() calls in filter closure with pattern-let guards
so a missing/invalid mbox commit id conservatively includes the patch
- Use the OID returned by create_commit_from_patch as branch tip instead
of the tag commit id, which may differ for GPG-signed commits
- Add module-level doc comment to mbox_parser explaining design rationale
and known limitations around the mbox envelope SHA1
Diffstat (limited to 'src/lib/mbox_parser.rs')
| -rw-r--r-- | src/lib/mbox_parser.rs | 62 |
1 files changed, 62 insertions, 0 deletions
diff --git a/src/lib/mbox_parser.rs b/src/lib/mbox_parser.rs index fd2f8ed..48190ba 100644 --- a/src/lib/mbox_parser.rs +++ b/src/lib/mbox_parser.rs | |||
| @@ -1,3 +1,57 @@ | |||
| 1 | //! Parser for mbox-format git patch content. | ||
| 2 | //! | ||
| 3 | //! This module is a **fallback path** used only when nostr patch events are | ||
| 4 | //! missing optional tags (`author`, `committer`, `description`, | ||
| 5 | //! `parent-commit`). When those tags are present they always take precedence — | ||
| 6 | //! see [`crate::git::RepoActions::apply_patch_chain`]. | ||
| 7 | //! | ||
| 8 | //! ## Why hand-rolled rather than a library? | ||
| 9 | //! | ||
| 10 | //! Neither libgit2 (via the `git2` crate) nor gitoxide (`gix`) exposes a | ||
| 11 | //! mailinfo-style parser. libgit2's email API is output-only | ||
| 12 | //! (`git_email_create_from_commit`); there is no `git_mailinfo` equivalent. The | ||
| 13 | //! gitoxide monorepo has no `gix-patch` crate, not even as a placeholder. No | ||
| 14 | //! production-quality standalone Rust mbox/git-patch parser crate exists. | ||
| 15 | //! | ||
| 16 | //! The genuinely hard parts of RFC 2822 parsing (header folding, RFC 2047 MIME | ||
| 17 | //! encoded-words for non-ASCII author names and subjects) are delegated to the | ||
| 18 | //! `mailparse` crate. The git-specific overlay (mbox envelope line, `[PATCH]` | ||
| 19 | //! prefix stripping, commit-message body extraction up to the `---` diffstat | ||
| 20 | //! separator) is implemented here, matching the behaviour of `git am`'s | ||
| 21 | //! `patchbreak()` function in `mailinfo.c`. | ||
| 22 | //! | ||
| 23 | //! ## If edge cases are reported | ||
| 24 | //! | ||
| 25 | //! If real-world patches produce incorrect metadata through this parser, the | ||
| 26 | //! escape hatch is to shell out to `git mailinfo` directly: | ||
| 27 | //! ```text | ||
| 28 | //! git mailinfo /tmp/msg /tmp/patch < input.patch | ||
| 29 | //! ``` | ||
| 30 | //! This prints `Author:`, `Email:`, `Subject:`, `Date:` to stdout and writes | ||
| 31 | //! the commit body to `/tmp/msg`. Since ngit already requires `git` in PATH (it | ||
| 32 | //! is a git plugin), this is always available. It is not the primary approach | ||
| 33 | //! because it requires two temp files and a process spawn per patch, which is | ||
| 34 | //! acceptable cost but unnecessary given that most patches in the ngit `pr/` | ||
| 35 | //! flow will have the optional nostr tags and never reach this code. | ||
| 36 | //! | ||
| 37 | //! ## Known limitation: `---` in commit message body | ||
| 38 | //! | ||
| 39 | //! The `---` line that separates the commit message from the diffstat is | ||
| 40 | //! ambiguous when the commit message itself contains `---` (e.g. Markdown | ||
| 41 | //! horizontal rules). This parser stops at the first `---`-only line, matching | ||
| 42 | //! git am's own behaviour — `git am` has the same limitation and documents it. | ||
| 43 | //! This is not a bug we can fix without lookahead into the diff structure. | ||
| 44 | //! | ||
| 45 | //! ## Commit ID from mbox envelope | ||
| 46 | //! | ||
| 47 | //! The SHA1 in the mbox `From <sha1> <date>` envelope line is extracted but | ||
| 48 | //! **must not be assumed correct**. libgit2 generates this ID from the commit | ||
| 49 | //! object, but if the original commit was GPG-signed, or if the patch was | ||
| 50 | //! generated by a different tool, the reconstructed commit (applied via | ||
| 51 | //! `apply_to_tree` + `commit_create_buffer`) will have a different OID. | ||
| 52 | //! The `commit` nostr tag is the authoritative source for commit identity when | ||
| 53 | //! present. | ||
| 54 | |||
| 1 | use anyhow::{Context, Result, bail}; | 55 | use anyhow::{Context, Result, bail}; |
| 2 | use chrono::DateTime; | 56 | use chrono::DateTime; |
| 3 | use mailparse::{MailHeaderMap, parse_headers}; | 57 | use mailparse::{MailHeaderMap, parse_headers}; |
| @@ -34,6 +88,14 @@ pub fn parse_mbox_patch(content: &str) -> Result<PatchMetadata> { | |||
| 34 | }) | 88 | }) |
| 35 | } | 89 | } |
| 36 | 90 | ||
| 91 | /// Extract the SHA1 from the mbox `From <sha1> <date>` envelope line. | ||
| 92 | /// | ||
| 93 | /// **This value should not be assumed correct for the reconstructed commit.** | ||
| 94 | /// If the original commit was GPG-signed, or the patch was generated by a | ||
| 95 | /// different tool (e.g. `git format-patch` vs libgit2), the commit recreated | ||
| 96 | /// by applying this patch via `commit_create_buffer` will have a different OID. | ||
| 97 | /// Use the `commit` nostr event tag as the authoritative commit identity when | ||
| 98 | /// present. | ||
| 37 | fn extract_commit_id_from_mbox(content: &str) -> Result<String> { | 99 | fn extract_commit_id_from_mbox(content: &str) -> Result<String> { |
| 38 | if !content.starts_with("From ") { | 100 | if !content.starts_with("From ") { |
| 39 | bail!("patch does not start with 'From ' - not a valid mbox format"); | 101 | bail!("patch does not start with 'From ' - not a valid mbox format"); |