diff options
Diffstat (limited to 'src/lib')
| -rw-r--r-- | src/lib/git/mod.rs | 72 | ||||
| -rw-r--r-- | src/lib/mbox_parser.rs | 62 |
2 files changed, 113 insertions, 21 deletions
diff --git a/src/lib/git/mod.rs b/src/lib/git/mod.rs index 57e8403..9f86b5f 100644 --- a/src/lib/git/mod.rs +++ b/src/lib/git/mod.rs | |||
| @@ -576,15 +576,23 @@ impl RepoActions for Repo { | |||
| 576 | let mut patches_to_apply: Vec<nostr::Event> = patch_and_ancestors | 576 | let mut patches_to_apply: Vec<nostr::Event> = patch_and_ancestors |
| 577 | .into_iter() | 577 | .into_iter() |
| 578 | .filter(|e| { | 578 | .filter(|e| { |
| 579 | let commit_id = get_commit_id_from_patch(e).unwrap(); | 579 | // When the commit tag is absent, the commit id from the mbox envelope |
| 580 | if let Ok(branch_tip) = branch_tip_result { | 580 | // may not match the reconstructed commit's OID (e.g. GPG-signed commits). |
| 581 | !branch_tip.to_string().eq(&commit_id) | 581 | // In that case we conservatively include the patch for application — |
| 582 | && !self | 582 | // create_commit_from_patch handles idempotency via content-addressed storage. |
| 583 | .ancestor_of(&branch_tip, &str_to_sha1(&commit_id).unwrap()) | 583 | let Ok(commit_id) = get_commit_id_from_patch(e) else { |
| 584 | .unwrap() | 584 | return true; |
| 585 | } else { | 585 | }; |
| 586 | true | 586 | let Ok(branch_tip) = branch_tip_result else { |
| 587 | } | 587 | return true; |
| 588 | }; | ||
| 589 | let Ok(commit_sha1) = str_to_sha1(&commit_id) else { | ||
| 590 | // Commit id is not a valid SHA1 (e.g. placeholder from mbox envelope). | ||
| 591 | // Include conservatively. | ||
| 592 | return true; | ||
| 593 | }; | ||
| 594 | !branch_tip.to_string().eq(&commit_id) | ||
| 595 | && !self.ancestor_of(&branch_tip, &commit_sha1).unwrap_or(false) | ||
| 588 | }) | 596 | }) |
| 589 | .collect(); | 597 | .collect(); |
| 590 | 598 | ||
| @@ -612,12 +620,26 @@ impl RepoActions for Repo { | |||
| 612 | patches_to_apply.reverse(); | 620 | patches_to_apply.reverse(); |
| 613 | 621 | ||
| 614 | for patch in &patches_to_apply { | 622 | for patch in &patches_to_apply { |
| 615 | let commit_id = get_commit_id_from_patch(patch)?; | 623 | // The commit id from the tag (or mbox envelope) is the authoritative id |
| 616 | // only create new commits - otherwise make them the tip | 624 | // when the optional `commit` nostr tag is present. When it is absent the |
| 617 | if !self.does_commit_exist(&commit_id)? { | 625 | // mbox envelope SHA1 is used as a best-effort value — it will often differ |
| 618 | self.create_commit_from_patch(patch, None)?; | 626 | // from the reconstructed commit's actual OID (e.g. GPG-signed commits). |
| 619 | } | 627 | // We therefore always use the OID returned by create_commit_from_patch as |
| 620 | self.create_branch_at_commit(branch_name, &commit_id)?; | 628 | // the branch tip, falling back to the tag commit id only when the commit |
| 629 | // already exists in the repo (meaning it was previously applied correctly). | ||
| 630 | let tag_commit_id = get_commit_id_from_patch(patch).ok(); | ||
| 631 | let applied_oid = if let Some(ref id) = tag_commit_id { | ||
| 632 | if self.does_commit_exist(id)? { | ||
| 633 | // Commit already exists (e.g. previously fetched), use it directly. | ||
| 634 | id.clone() | ||
| 635 | } else { | ||
| 636 | self.create_commit_from_patch(patch, None)?.to_string() | ||
| 637 | } | ||
| 638 | } else { | ||
| 639 | // No commit id available at all — apply and use the resulting OID. | ||
| 640 | self.create_commit_from_patch(patch, None)?.to_string() | ||
| 641 | }; | ||
| 642 | self.create_branch_at_commit(branch_name, &applied_oid)?; | ||
| 621 | self.checkout(branch_name)?; | 643 | self.checkout(branch_name)?; |
| 622 | } | 644 | } |
| 623 | Ok(patches_to_apply) | 645 | Ok(patches_to_apply) |
| @@ -641,8 +663,10 @@ impl RepoActions for Repo { | |||
| 641 | } else { | 663 | } else { |
| 642 | let metadata = crate::mbox_parser::parse_mbox_patch(&patch.content) | 664 | let metadata = crate::mbox_parser::parse_mbox_patch(&patch.content) |
| 643 | .context("failed to parse patch for timestamp")?; | 665 | .context("failed to parse patch for timestamp")?; |
| 644 | let timestamp = metadata.committer_timestamp.unwrap_or(metadata.author_timestamp); | 666 | let timestamp = metadata |
| 645 | 667 | .committer_timestamp | |
| 668 | .unwrap_or(metadata.author_timestamp); | ||
| 669 | |||
| 646 | let best_guess = self | 670 | let best_guess = self |
| 647 | .find_best_guess_parent_commit(timestamp) | 671 | .find_best_guess_parent_commit(timestamp) |
| 648 | .context("failed to find best guess parent commit")?; | 672 | .context("failed to find best guess parent commit")?; |
| @@ -684,8 +708,10 @@ impl RepoActions for Repo { | |||
| 684 | None | 708 | None |
| 685 | }; | 709 | }; |
| 686 | 710 | ||
| 687 | let author_data = extract_signature_data_with_fallback(&patch.tags, "author", &patch.content)?; | 711 | let author_data = |
| 688 | let committer_data = extract_signature_data_with_fallback(&patch.tags, "committer", &patch.content)?; | 712 | extract_signature_data_with_fallback(&patch.tags, "author", &patch.content)?; |
| 713 | let committer_data = | ||
| 714 | extract_signature_data_with_fallback(&patch.tags, "committer", &patch.content)?; | ||
| 689 | let author_sig = author_data.to_signature()?; | 715 | let author_sig = author_data.to_signature()?; |
| 690 | let committer_sig = committer_data.to_signature()?; | 716 | let committer_sig = committer_data.to_signature()?; |
| 691 | 717 | ||
| @@ -983,7 +1009,9 @@ fn extract_signature_data_from_tags(tags: &Tags, tag_name: &str) -> Result<Signa | |||
| 983 | name: v[1].clone(), | 1009 | name: v[1].clone(), |
| 984 | email: v[2].clone(), | 1010 | email: v[2].clone(), |
| 985 | timestamp: v[3].parse().context("tag time is incorrectly formatted")?, | 1011 | timestamp: v[3].parse().context("tag time is incorrectly formatted")?, |
| 986 | offset_minutes: v[4].parse().context("tag time offset is incorrectly formatted")?, | 1012 | offset_minutes: v[4] |
| 1013 | .parse() | ||
| 1014 | .context("tag time offset is incorrectly formatted")?, | ||
| 987 | }) | 1015 | }) |
| 988 | } | 1016 | } |
| 989 | 1017 | ||
| @@ -1007,7 +1035,9 @@ fn extract_signature_data_with_fallback( | |||
| 1007 | offset_minutes: metadata.author_offset_minutes, | 1035 | offset_minutes: metadata.author_offset_minutes, |
| 1008 | }) | 1036 | }) |
| 1009 | } else if tag_name == "committer" { | 1037 | } else if tag_name == "committer" { |
| 1010 | let timestamp = metadata.committer_timestamp.unwrap_or(metadata.author_timestamp); | 1038 | let timestamp = metadata |
| 1039 | .committer_timestamp | ||
| 1040 | .unwrap_or(metadata.author_timestamp); | ||
| 1011 | Ok(SignatureData { | 1041 | Ok(SignatureData { |
| 1012 | name: metadata.author_name, | 1042 | name: metadata.author_name, |
| 1013 | email: metadata.author_email, | 1043 | email: metadata.author_email, |
diff --git a/src/lib/mbox_parser.rs b/src/lib/mbox_parser.rs index fd2f8ed..48190ba 100644 --- a/src/lib/mbox_parser.rs +++ b/src/lib/mbox_parser.rs | |||
| @@ -1,3 +1,57 @@ | |||
| 1 | //! Parser for mbox-format git patch content. | ||
| 2 | //! | ||
| 3 | //! This module is a **fallback path** used only when nostr patch events are | ||
| 4 | //! missing optional tags (`author`, `committer`, `description`, | ||
| 5 | //! `parent-commit`). When those tags are present they always take precedence — | ||
| 6 | //! see [`crate::git::RepoActions::apply_patch_chain`]. | ||
| 7 | //! | ||
| 8 | //! ## Why hand-rolled rather than a library? | ||
| 9 | //! | ||
| 10 | //! Neither libgit2 (via the `git2` crate) nor gitoxide (`gix`) exposes a | ||
| 11 | //! mailinfo-style parser. libgit2's email API is output-only | ||
| 12 | //! (`git_email_create_from_commit`); there is no `git_mailinfo` equivalent. The | ||
| 13 | //! gitoxide monorepo has no `gix-patch` crate, not even as a placeholder. No | ||
| 14 | //! production-quality standalone Rust mbox/git-patch parser crate exists. | ||
| 15 | //! | ||
| 16 | //! The genuinely hard parts of RFC 2822 parsing (header folding, RFC 2047 MIME | ||
| 17 | //! encoded-words for non-ASCII author names and subjects) are delegated to the | ||
| 18 | //! `mailparse` crate. The git-specific overlay (mbox envelope line, `[PATCH]` | ||
| 19 | //! prefix stripping, commit-message body extraction up to the `---` diffstat | ||
| 20 | //! separator) is implemented here, matching the behaviour of `git am`'s | ||
| 21 | //! `patchbreak()` function in `mailinfo.c`. | ||
| 22 | //! | ||
| 23 | //! ## If edge cases are reported | ||
| 24 | //! | ||
| 25 | //! If real-world patches produce incorrect metadata through this parser, the | ||
| 26 | //! escape hatch is to shell out to `git mailinfo` directly: | ||
| 27 | //! ```text | ||
| 28 | //! git mailinfo /tmp/msg /tmp/patch < input.patch | ||
| 29 | //! ``` | ||
| 30 | //! This prints `Author:`, `Email:`, `Subject:`, `Date:` to stdout and writes | ||
| 31 | //! the commit body to `/tmp/msg`. Since ngit already requires `git` in PATH (it | ||
| 32 | //! is a git plugin), this is always available. It is not the primary approach | ||
| 33 | //! because it requires two temp files and a process spawn per patch, which is | ||
| 34 | //! acceptable cost but unnecessary given that most patches in the ngit `pr/` | ||
| 35 | //! flow will have the optional nostr tags and never reach this code. | ||
| 36 | //! | ||
| 37 | //! ## Known limitation: `---` in commit message body | ||
| 38 | //! | ||
| 39 | //! The `---` line that separates the commit message from the diffstat is | ||
| 40 | //! ambiguous when the commit message itself contains `---` (e.g. Markdown | ||
| 41 | //! horizontal rules). This parser stops at the first `---`-only line, matching | ||
| 42 | //! git am's own behaviour — `git am` has the same limitation and documents it. | ||
| 43 | //! This is not a bug we can fix without lookahead into the diff structure. | ||
| 44 | //! | ||
| 45 | //! ## Commit ID from mbox envelope | ||
| 46 | //! | ||
| 47 | //! The SHA1 in the mbox `From <sha1> <date>` envelope line is extracted but | ||
| 48 | //! **must not be assumed correct**. libgit2 generates this ID from the commit | ||
| 49 | //! object, but if the original commit was GPG-signed, or if the patch was | ||
| 50 | //! generated by a different tool, the reconstructed commit (applied via | ||
| 51 | //! `apply_to_tree` + `commit_create_buffer`) will have a different OID. | ||
| 52 | //! The `commit` nostr tag is the authoritative source for commit identity when | ||
| 53 | //! present. | ||
| 54 | |||
| 1 | use anyhow::{Context, Result, bail}; | 55 | use anyhow::{Context, Result, bail}; |
| 2 | use chrono::DateTime; | 56 | use chrono::DateTime; |
| 3 | use mailparse::{MailHeaderMap, parse_headers}; | 57 | use mailparse::{MailHeaderMap, parse_headers}; |
| @@ -34,6 +88,14 @@ pub fn parse_mbox_patch(content: &str) -> Result<PatchMetadata> { | |||
| 34 | }) | 88 | }) |
| 35 | } | 89 | } |
| 36 | 90 | ||
| 91 | /// Extract the SHA1 from the mbox `From <sha1> <date>` envelope line. | ||
| 92 | /// | ||
| 93 | /// **This value should not be assumed correct for the reconstructed commit.** | ||
| 94 | /// If the original commit was GPG-signed, or the patch was generated by a | ||
| 95 | /// different tool (e.g. `git format-patch` vs libgit2), the commit recreated | ||
| 96 | /// by applying this patch via `commit_create_buffer` will have a different OID. | ||
| 97 | /// Use the `commit` nostr event tag as the authoritative commit identity when | ||
| 98 | /// present. | ||
| 37 | fn extract_commit_id_from_mbox(content: &str) -> Result<String> { | 99 | fn extract_commit_id_from_mbox(content: &str) -> Result<String> { |
| 38 | if !content.starts_with("From ") { | 100 | if !content.starts_with("From ") { |
| 39 | bail!("patch does not start with 'From ' - not a valid mbox format"); | 101 | bail!("patch does not start with 'From ' - not a valid mbox format"); |