upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/src/lib
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/git/mod.rs72
-rw-r--r--src/lib/mbox_parser.rs62
2 files changed, 113 insertions, 21 deletions
diff --git a/src/lib/git/mod.rs b/src/lib/git/mod.rs
index 57e8403..9f86b5f 100644
--- a/src/lib/git/mod.rs
+++ b/src/lib/git/mod.rs
@@ -576,15 +576,23 @@ impl RepoActions for Repo {
576 let mut patches_to_apply: Vec<nostr::Event> = patch_and_ancestors 576 let mut patches_to_apply: Vec<nostr::Event> = patch_and_ancestors
577 .into_iter() 577 .into_iter()
578 .filter(|e| { 578 .filter(|e| {
579 let commit_id = get_commit_id_from_patch(e).unwrap(); 579 // When the commit tag is absent, the commit id from the mbox envelope
580 if let Ok(branch_tip) = branch_tip_result { 580 // may not match the reconstructed commit's OID (e.g. GPG-signed commits).
581 !branch_tip.to_string().eq(&commit_id) 581 // In that case we conservatively include the patch for application —
582 && !self 582 // create_commit_from_patch handles idempotency via content-addressed storage.
583 .ancestor_of(&branch_tip, &str_to_sha1(&commit_id).unwrap()) 583 let Ok(commit_id) = get_commit_id_from_patch(e) else {
584 .unwrap() 584 return true;
585 } else { 585 };
586 true 586 let Ok(branch_tip) = branch_tip_result else {
587 } 587 return true;
588 };
589 let Ok(commit_sha1) = str_to_sha1(&commit_id) else {
590 // Commit id is not a valid SHA1 (e.g. placeholder from mbox envelope).
591 // Include conservatively.
592 return true;
593 };
594 !branch_tip.to_string().eq(&commit_id)
595 && !self.ancestor_of(&branch_tip, &commit_sha1).unwrap_or(false)
588 }) 596 })
589 .collect(); 597 .collect();
590 598
@@ -612,12 +620,26 @@ impl RepoActions for Repo {
612 patches_to_apply.reverse(); 620 patches_to_apply.reverse();
613 621
614 for patch in &patches_to_apply { 622 for patch in &patches_to_apply {
615 let commit_id = get_commit_id_from_patch(patch)?; 623 // The commit id from the tag (or mbox envelope) is the authoritative id
616 // only create new commits - otherwise make them the tip 624 // when the optional `commit` nostr tag is present. When it is absent the
617 if !self.does_commit_exist(&commit_id)? { 625 // mbox envelope SHA1 is used as a best-effort value — it will often differ
618 self.create_commit_from_patch(patch, None)?; 626 // from the reconstructed commit's actual OID (e.g. GPG-signed commits).
619 } 627 // We therefore always use the OID returned by create_commit_from_patch as
620 self.create_branch_at_commit(branch_name, &commit_id)?; 628 // the branch tip, falling back to the tag commit id only when the commit
629 // already exists in the repo (meaning it was previously applied correctly).
630 let tag_commit_id = get_commit_id_from_patch(patch).ok();
631 let applied_oid = if let Some(ref id) = tag_commit_id {
632 if self.does_commit_exist(id)? {
633 // Commit already exists (e.g. previously fetched), use it directly.
634 id.clone()
635 } else {
636 self.create_commit_from_patch(patch, None)?.to_string()
637 }
638 } else {
639 // No commit id available at all — apply and use the resulting OID.
640 self.create_commit_from_patch(patch, None)?.to_string()
641 };
642 self.create_branch_at_commit(branch_name, &applied_oid)?;
621 self.checkout(branch_name)?; 643 self.checkout(branch_name)?;
622 } 644 }
623 Ok(patches_to_apply) 645 Ok(patches_to_apply)
@@ -641,8 +663,10 @@ impl RepoActions for Repo {
641 } else { 663 } else {
642 let metadata = crate::mbox_parser::parse_mbox_patch(&patch.content) 664 let metadata = crate::mbox_parser::parse_mbox_patch(&patch.content)
643 .context("failed to parse patch for timestamp")?; 665 .context("failed to parse patch for timestamp")?;
644 let timestamp = metadata.committer_timestamp.unwrap_or(metadata.author_timestamp); 666 let timestamp = metadata
645 667 .committer_timestamp
668 .unwrap_or(metadata.author_timestamp);
669
646 let best_guess = self 670 let best_guess = self
647 .find_best_guess_parent_commit(timestamp) 671 .find_best_guess_parent_commit(timestamp)
648 .context("failed to find best guess parent commit")?; 672 .context("failed to find best guess parent commit")?;
@@ -684,8 +708,10 @@ impl RepoActions for Repo {
684 None 708 None
685 }; 709 };
686 710
687 let author_data = extract_signature_data_with_fallback(&patch.tags, "author", &patch.content)?; 711 let author_data =
688 let committer_data = extract_signature_data_with_fallback(&patch.tags, "committer", &patch.content)?; 712 extract_signature_data_with_fallback(&patch.tags, "author", &patch.content)?;
713 let committer_data =
714 extract_signature_data_with_fallback(&patch.tags, "committer", &patch.content)?;
689 let author_sig = author_data.to_signature()?; 715 let author_sig = author_data.to_signature()?;
690 let committer_sig = committer_data.to_signature()?; 716 let committer_sig = committer_data.to_signature()?;
691 717
@@ -983,7 +1009,9 @@ fn extract_signature_data_from_tags(tags: &Tags, tag_name: &str) -> Result<Signa
983 name: v[1].clone(), 1009 name: v[1].clone(),
984 email: v[2].clone(), 1010 email: v[2].clone(),
985 timestamp: v[3].parse().context("tag time is incorrectly formatted")?, 1011 timestamp: v[3].parse().context("tag time is incorrectly formatted")?,
986 offset_minutes: v[4].parse().context("tag time offset is incorrectly formatted")?, 1012 offset_minutes: v[4]
1013 .parse()
1014 .context("tag time offset is incorrectly formatted")?,
987 }) 1015 })
988} 1016}
989 1017
@@ -1007,7 +1035,9 @@ fn extract_signature_data_with_fallback(
1007 offset_minutes: metadata.author_offset_minutes, 1035 offset_minutes: metadata.author_offset_minutes,
1008 }) 1036 })
1009 } else if tag_name == "committer" { 1037 } else if tag_name == "committer" {
1010 let timestamp = metadata.committer_timestamp.unwrap_or(metadata.author_timestamp); 1038 let timestamp = metadata
1039 .committer_timestamp
1040 .unwrap_or(metadata.author_timestamp);
1011 Ok(SignatureData { 1041 Ok(SignatureData {
1012 name: metadata.author_name, 1042 name: metadata.author_name,
1013 email: metadata.author_email, 1043 email: metadata.author_email,
diff --git a/src/lib/mbox_parser.rs b/src/lib/mbox_parser.rs
index fd2f8ed..48190ba 100644
--- a/src/lib/mbox_parser.rs
+++ b/src/lib/mbox_parser.rs
@@ -1,3 +1,57 @@
1//! Parser for mbox-format git patch content.
2//!
3//! This module is a **fallback path** used only when nostr patch events are
4//! missing optional tags (`author`, `committer`, `description`,
5//! `parent-commit`). When those tags are present they always take precedence —
6//! see [`crate::git::RepoActions::apply_patch_chain`].
7//!
8//! ## Why hand-rolled rather than a library?
9//!
10//! Neither libgit2 (via the `git2` crate) nor gitoxide (`gix`) exposes a
11//! mailinfo-style parser. libgit2's email API is output-only
12//! (`git_email_create_from_commit`); there is no `git_mailinfo` equivalent. The
13//! gitoxide monorepo has no `gix-patch` crate, not even as a placeholder. No
14//! production-quality standalone Rust mbox/git-patch parser crate exists.
15//!
16//! The genuinely hard parts of RFC 2822 parsing (header folding, RFC 2047 MIME
17//! encoded-words for non-ASCII author names and subjects) are delegated to the
18//! `mailparse` crate. The git-specific overlay (mbox envelope line, `[PATCH]`
19//! prefix stripping, commit-message body extraction up to the `---` diffstat
20//! separator) is implemented here, matching the behaviour of `git am`'s
21//! `patchbreak()` function in `mailinfo.c`.
22//!
23//! ## If edge cases are reported
24//!
25//! If real-world patches produce incorrect metadata through this parser, the
26//! escape hatch is to shell out to `git mailinfo` directly:
27//! ```text
28//! git mailinfo /tmp/msg /tmp/patch < input.patch
29//! ```
30//! This prints `Author:`, `Email:`, `Subject:`, `Date:` to stdout and writes
31//! the commit body to `/tmp/msg`. Since ngit already requires `git` in PATH (it
32//! is a git plugin), this is always available. It is not the primary approach
33//! because it requires two temp files and a process spawn per patch, which is
34//! acceptable cost but unnecessary given that most patches in the ngit `pr/`
35//! flow will have the optional nostr tags and never reach this code.
36//!
37//! ## Known limitation: `---` in commit message body
38//!
39//! The `---` line that separates the commit message from the diffstat is
40//! ambiguous when the commit message itself contains `---` (e.g. Markdown
41//! horizontal rules). This parser stops at the first `---`-only line, matching
42//! git am's own behaviour — `git am` has the same limitation and documents it.
43//! This is not a bug we can fix without lookahead into the diff structure.
44//!
45//! ## Commit ID from mbox envelope
46//!
47//! The SHA1 in the mbox `From <sha1> <date>` envelope line is extracted but
48//! **must not be assumed correct**. libgit2 generates this ID from the commit
49//! object, but if the original commit was GPG-signed, or if the patch was
50//! generated by a different tool, the reconstructed commit (applied via
51//! `apply_to_tree` + `commit_create_buffer`) will have a different OID.
52//! The `commit` nostr tag is the authoritative source for commit identity when
53//! present.
54
1use anyhow::{Context, Result, bail}; 55use anyhow::{Context, Result, bail};
2use chrono::DateTime; 56use chrono::DateTime;
3use mailparse::{MailHeaderMap, parse_headers}; 57use mailparse::{MailHeaderMap, parse_headers};
@@ -34,6 +88,14 @@ pub fn parse_mbox_patch(content: &str) -> Result<PatchMetadata> {
34 }) 88 })
35} 89}
36 90
91/// Extract the SHA1 from the mbox `From <sha1> <date>` envelope line.
92///
93/// **This value should not be assumed correct for the reconstructed commit.**
94/// If the original commit was GPG-signed, or the patch was generated by a
95/// different tool (e.g. `git format-patch` vs libgit2), the commit recreated
96/// by applying this patch via `commit_create_buffer` will have a different OID.
97/// Use the `commit` nostr event tag as the authoritative commit identity when
98/// present.
37fn extract_commit_id_from_mbox(content: &str) -> Result<String> { 99fn extract_commit_id_from_mbox(content: &str) -> Result<String> {
38 if !content.starts_with("From ") { 100 if !content.starts_with("From ") {
39 bail!("patch does not start with 'From ' - not a valid mbox format"); 101 bail!("patch does not start with 'From ' - not a valid mbox format");