From ac4f4fad93e43e8f27bc4b42ace3f8d1bc327a7d Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 08:35:51 +0000 Subject: docs: add initial migration how-to with starter content --- docs/how-to/migrate-ngit-relay-to-ngit-grasp.md | 46 +++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 docs/how-to/migrate-ngit-relay-to-ngit-grasp.md diff --git a/docs/how-to/migrate-ngit-relay-to-ngit-grasp.md b/docs/how-to/migrate-ngit-relay-to-ngit-grasp.md new file mode 100644 index 0000000..e17ba0a --- /dev/null +++ b/docs/how-to/migrate-ngit-relay-to-ngit-grasp.md @@ -0,0 +1,46 @@ +# Migrate ngit-relay to ngit-grasp on NixOS VPS + +**Goal:** Replace an ngit-relay instance on a VPS running NixOS with ngit-grasp. + +**Specifics:** VPS running NixOS. + +## Approach + +1. Deploy ngit-grasp with 'domain' of `.internal` and an `archiveService` of `` running on a different port. This will gather all the events and git data from the production service and relays/git servers/grasp servers that for repositories that list the service in their announcement event. To sync all git data may take an hour. + +2. Analyze the data to see which repositories have not been moved with complete data. Understand why and for each decide if action is needed / not needed to move it. + +3. Set the 'domain' to production URL, turn off archive mode, and point your reverse proxy at the new port. + +## Challenges + +- **ngit-relay accepts any commits/annotated tags** that were at that point of time referenced in the latest state event. **ngit-grasp requires all the git data** to reproduce the latest state. So if the git data is incomplete, it won't accept the repository. + +- **ngit-relay doesn't clear out refs/nostr/** where it doesn't have a PR event. Fortunately the 'PR' (as opposed to patches) functionality is not widely used so we just need to check a few repositories (shakespeare, ngit and gitworkshop). + +## Analysis Categories + +### No action required: + +- **Git Data Complete - Moved** (state event exists in archive and git data reflects it) +- **Invalid Repositories Announcement** (Won't Parse) +- **Deletion Request** (kind 5) tagging announcement event in archive +- **Announcement Not on Production But In Archive** that lists service + +### Action/decision required: + +- **Invalid State Event** (Won't Parse) +- **Incomplete Git Data** (at source and destination) And No State Event at Destination +- **No Announcement In Archive** (and no related delete event) +- **Complete Git Data at source, Announcement but no State Event in Archive** and empty bare git repo +- **State event but incomplete git data in Archive** + +## Analysis Approach + +This analysis and categorization should be scripted to facilitate easy review and decision making. + +There are already some scripts that we need to build on in the old issue worktree to help facilitate this. + +## Gotchas + +Always use `nak req` with `--paginate` flag so we don't miss any events. If we receive increments of 250 eg 500 then it's a red flag that we are not paginating and there are probably more events. -- cgit v1.2.3 From a9ea3d2bf6c96bb7c23344d203e971f9c41269c0 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 10:38:51 +0000 Subject: docs: migration update --- docs/how-to/migrate-ngit-relay-to-ngit-grasp.md | 180 +++++++++++++++++++++--- 1 file changed, 164 insertions(+), 16 deletions(-) diff --git a/docs/how-to/migrate-ngit-relay-to-ngit-grasp.md b/docs/how-to/migrate-ngit-relay-to-ngit-grasp.md index e17ba0a..d01bbf2 100644 --- a/docs/how-to/migrate-ngit-relay-to-ngit-grasp.md +++ b/docs/how-to/migrate-ngit-relay-to-ngit-grasp.md @@ -22,25 +22,173 @@ ### No action required: -- **Git Data Complete - Moved** (state event exists in archive and git data reflects it) -- **Invalid Repositories Announcement** (Won't Parse) -- **Deletion Request** (kind 5) tagging announcement event in archive -- **Announcement Not on Production But In Archive** that lists service +| Category | How to Detect | Source | +|----------|---------------|--------| +| **Git Data Complete - Moved** | prod cat1 AND archive cat1 (same repo) | Git sync check | +| **Invalid Announcement** (Won't Parse) | Log: `[PARSE_FAIL] kind=30617` | Archive logs | +| **Deletion Request** | kind 5 event tagging announcement | Event fetch | +| **Announcement Not on Prod But In Archive** | In archive announcements, not in prod | Event comparison | ### Action/decision required: -- **Invalid State Event** (Won't Parse) -- **Incomplete Git Data** (at source and destination) And No State Event at Destination -- **No Announcement In Archive** (and no related delete event) -- **Complete Git Data at source, Announcement but no State Event in Archive** and empty bare git repo -- **State event but incomplete git data in Archive** - -## Analysis Approach - -This analysis and categorization should be scripted to facilitate easy review and decision making. - -There are already some scripts that we need to build on in the old issue worktree to help facilitate this. +| Category | How to Detect | Source | +|----------|---------------|--------| +| **Invalid State Event** (Won't Parse) | Log: `[PARSE_FAIL] kind=30618` | Archive logs | +| **Purgatory Expired** (sync should have worked) | Log: `[PURGATORY_EXPIRED]` | Archive logs | +| **Incomplete Git Data** (both relays) | prod cat2/3/4 AND archive cat2/3/4 | Git sync check | +| **No Announcement In Archive** | In prod, not in archive, no deletion | Event comparison | +| **State but incomplete git in Archive** | archive cat3 or cat4 | Git sync check | + +### Manual investigation required: + +- Repos that don't fit above categories +- Repos with unexpected state (e.g., complete in prod, missing in archive, no log entries) + +## Analysis Script Architecture + +The analysis is split into modular phases for fast iteration. Phases 1-3 and 5 can run locally; Phase 2 and 4 require VPS access. + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ PHASE 1: Fetch Events (~30s, local) │ +│ scripts/migration/01-fetch-events.sh │ +├─────────────────────────────────────────────────────────────────┤ +│ Fetches from relay: │ +│ - kind 30618 (state events) │ +│ - kind 30617 (announcements) │ +│ - kind 5 (deletion requests) │ +│ │ +│ Run twice: once for prod (relay.ngit.dev), once for archive │ +│ Output: /{state,announcements,deletions}.json │ +└─────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────┐ +│ PHASE 2: Git Sync Check (~20 mins, VPS required) │ +│ scripts/migration/10-check-git-sync.sh │ +├─────────────────────────────────────────────────────────────────┤ +│ For each state event, compares refs to actual git data on disk. │ +│ │ +│ Run twice: │ +│ - prod: GIT_BASE=/persistent/relay-ngit-dev-ngit-relay/... │ +│ - archive: GIT_BASE=/persistent/grasp/sync-archive/git │ +│ │ +│ Output: git-sync-status.tsv │ +│ repo|npub|state_refs|git_refs|matches|status │ +└─────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────┐ +│ PHASE 3: Categorize & Compare (fast, local) │ +│ scripts/migration/20-categorize.sh │ +│ scripts/migration/21-compare-relays.sh │ +├─────────────────────────────────────────────────────────────────┤ +│ 20-categorize.sh applies 4-category logic: │ +│ - cat1: complete match (all refs match) │ +│ - cat2: empty/blank (no git data) │ +│ - cat3: partial match (some refs match) │ +│ - cat4: no match (git exists but refs don't match) │ +│ │ +│ 21-compare-relays.sh finds gaps: │ +│ - in prod but not archive │ +│ - in archive but not prod │ +│ - different status between relays │ +│ │ +│ Output: category-{1,2,3,4}.txt, relay-gaps.txt │ +└─────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────┐ +│ PHASE 4: Log-Based Categories (VPS required) │ +│ scripts/migration/30-extract-parse-failures.sh │ +│ scripts/migration/31-extract-purgatory-expiry.sh │ +├─────────────────────────────────────────────────────────────────┤ +│ Extracts structured log entries from journalctl: │ +│ - Parse failures: [PARSE_FAIL] kind=X event_id=Y reason=Z │ +│ - Purgatory expiry: [PURGATORY_EXPIRED] repo=X npub=Y │ +│ │ +│ NOTE: Requires logging improvements in ngit-grasp to emit │ +│ these structured log entries. See issue: TBD │ +│ │ +│ Output: parse-failures.txt, purgatory-expired.txt │ +└─────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────┐ +│ PHASE 5: Final Classification (fast, local) │ +│ scripts/migration/40-classify-actions.sh │ +├─────────────────────────────────────────────────────────────────┤ +│ Combines all data sources to produce final classification: │ +│ │ +│ Inputs: │ +│ - category files (prod and archive) │ +│ - relay-gaps.txt │ +│ - parse-failures.txt │ +│ - purgatory-expired.txt │ +│ - deletions.json │ +│ │ +│ Output: │ +│ - no-action-required.txt (repo|reason) │ +│ - action-required.txt (repo|reason|suggested_action) │ +│ - manual-investigation.txt (repo|notes) │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## Directory Structure + +``` +work/migration-analysis-YYYYMMDD-HHMM/ +├── prod/ +│ ├── raw/ +│ │ ├── state-events.json +│ │ ├── announcements.json +│ │ └── deletions.json +│ ├── git-sync-status.tsv +│ └── category-{1,2,3,4}.txt +├── archive/ +│ ├── raw/ +│ │ ├── state-events.json +│ │ ├── announcements.json +│ │ └── deletions.json +│ ├── git-sync-status.tsv +│ └── category-{1,2,3,4}.txt +├── logs/ +│ ├── parse-failures.txt +│ └── purgatory-expired.txt +├── comparison/ +│ └── relay-gaps.txt +└── results/ + ├── no-action-required.txt + ├── action-required.txt + └── manual-investigation.txt +``` + +## Prerequisites + +- `nak` - Nostr Army Knife for fetching events +- `jq` - JSON processing +- SSH access to VPS for Phase 2 and 4 +- Logging improvements in ngit-grasp for Phase 4 (see Dependencies) + +## Dependencies + +Phase 4 requires structured logging in ngit-grasp. Create a separate issue to add: + +```rust +// On parse failure: +tracing::warn!( + target: "migration", + "[PARSE_FAIL] kind={} event_id={} reason=\"{}\"", + event.kind, event.id, reason +); + +// On purgatory expiry: +tracing::warn!( + target: "migration", + "[PURGATORY_EXPIRED] repo={} npub={}", + identifier, npub +); +``` ## Gotchas -Always use `nak req` with `--paginate` flag so we don't miss any events. If we receive increments of 250 eg 500 then it's a red flag that we are not paginating and there are probably more events. +- Always use `nak req` with `--paginate` flag so we don't miss any events. If we receive increments of 250 (e.g., exactly 500) then it's a red flag that we are not paginating and there are probably more events. +- Phase 1 and 2 should run back-to-back for an accurate snapshot. +- The git sync check (Phase 2) takes ~20 minutes per relay - this is the slow part. +- Existing analysis data from Jan 22 can be used for developing Phase 3/5 logic before re-running Phase 2. -- cgit v1.2.3 From 7536160c0ab1b64090ba9b5ab8ea6aef4747bb48 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 10:56:12 +0000 Subject: Add Phase 1 migration script to fetch events from relay - Fetches kind 30618 (state), 30617 (announcement), 5 (deletion) events - Uses nak req --paginate for complete event retrieval - Outputs JSONL format for downstream processing - Includes error handling and timing information --- docs/how-to/migration-scripts/01-fetch-events.sh | 206 +++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100755 docs/how-to/migration-scripts/01-fetch-events.sh diff --git a/docs/how-to/migration-scripts/01-fetch-events.sh b/docs/how-to/migration-scripts/01-fetch-events.sh new file mode 100755 index 0000000..6870659 --- /dev/null +++ b/docs/how-to/migration-scripts/01-fetch-events.sh @@ -0,0 +1,206 @@ +#!/usr/bin/env bash +# +# 01-fetch-events.sh - Fetch nostr events from a relay for migration analysis +# +# PHASE 1 of the ngit-relay to ngit-grasp migration analysis pipeline. +# Fetches kind 30618 (state), 30617 (announcement), and 5 (deletion) events. +# +# USAGE: +# ./01-fetch-events.sh +# +# EXAMPLES: +# # Fetch from production relay +# ./01-fetch-events.sh wss://relay.ngit.dev output/prod +# +# # Fetch from archive relay +# ./01-fetch-events.sh wss://archive.relay.ngit.dev output/archive +# +# # Full migration analysis setup +# mkdir -p work/migration-analysis-$(date +%Y%m%d-%H%M) +# ./01-fetch-events.sh wss://relay.ngit.dev work/migration-analysis-*/prod +# ./01-fetch-events.sh wss://archive.relay.ngit.dev work/migration-analysis-*/archive +# +# OUTPUT: +# /raw/state-events.json - kind 30618 events (one per line, JSONL) +# /raw/announcements.json - kind 30617 events (one per line, JSONL) +# /raw/deletions.json - kind 5 events (one per line, JSONL) +# +# OUTPUT FORMAT: +# Each file contains one JSON event per line (JSONL format). +# Events are the raw nostr event objects as returned by the relay. +# +# PREREQUISITES: +# - nak (Nostr Army Knife) - https://github.com/fiatjaf/nak +# - jq (for counting/validation) +# +# RUNTIME: ~30 seconds per relay (depends on network and event count) +# +# NOTES: +# - Uses --paginate to ensure all events are fetched (not just first page) +# - If event counts are exact multiples of 250, pagination may have failed +# - Run Phase 1 and Phase 2 back-to-back for accurate snapshot +# +# SEE ALSO: +# docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide +# + +set -euo pipefail + +# Colors for output (disabled if not a terminal) +if [[ -t 1 ]]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[0;33m' + BLUE='\033[0;34m' + NC='\033[0m' # No Color +else + RED='' + GREEN='' + YELLOW='' + BLUE='' + NC='' +fi + +log_info() { + echo -e "${BLUE}[INFO]${NC} $*" >&2 +} + +log_success() { + echo -e "${GREEN}[OK]${NC} $*" >&2 +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $*" >&2 +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +usage() { + echo "Usage: $0 " + echo "" + echo "Arguments:" + echo " relay-url WebSocket URL of the relay (e.g., wss://relay.ngit.dev)" + echo " output-dir Directory to store fetched events (e.g., output/prod)" + echo "" + echo "Examples:" + echo " $0 wss://relay.ngit.dev output/prod" + echo " $0 wss://archive.relay.ngit.dev output/archive" + exit 1 +} + +# Check prerequisites +check_prerequisites() { + local missing=0 + + if ! command -v nak &> /dev/null; then + log_error "nak not found. Install from: https://github.com/fiatjaf/nak" + missing=1 + fi + + if ! command -v jq &> /dev/null; then + log_error "jq not found. Install with your package manager." + missing=1 + fi + + if [[ $missing -eq 1 ]]; then + exit 1 + fi +} + +# Fetch events of a specific kind +# Args: $1=relay, $2=kind, $3=output_file, $4=description +fetch_kind() { + local relay="$1" + local kind="$2" + local output_file="$3" + local description="$4" + + log_info "Fetching $description (kind $kind) from $relay..." + + local start_time + start_time=$(date +%s) + + # Use --paginate to ensure we get all events, not just first page + # nak outputs one event per line (JSONL format) + if ! nak req -k "$kind" --paginate "$relay" > "$output_file" 2>/dev/null; then + log_error "Failed to fetch $description from $relay" + return 1 + fi + + local end_time + end_time=$(date +%s) + local duration=$((end_time - start_time)) + + # Count events + local count + count=$(wc -l < "$output_file" | tr -d ' ') + + # Warn if count is suspicious (exact multiple of 250 suggests pagination issue) + if [[ $count -gt 0 ]] && [[ $((count % 250)) -eq 0 ]]; then + log_warn "$description count ($count) is exact multiple of 250 - pagination may have failed!" + fi + + log_success "Fetched $count $description in ${duration}s -> $output_file" + + echo "$count" +} + +# Main +main() { + if [[ $# -ne 2 ]]; then + usage + fi + + local relay="$1" + local output_dir="$2" + + # Validate relay URL + if [[ ! "$relay" =~ ^wss?:// ]]; then + log_error "Invalid relay URL: $relay (must start with ws:// or wss://)" + exit 1 + fi + + check_prerequisites + + log_info "Starting event fetch from $relay" + log_info "Output directory: $output_dir" + + # Create output directory structure + local raw_dir="$output_dir/raw" + mkdir -p "$raw_dir" + + local total_start + total_start=$(date +%s) + + # Fetch each event type + local state_count announcement_count deletion_count + + state_count=$(fetch_kind "$relay" 30618 "$raw_dir/state-events.json" "state events") + announcement_count=$(fetch_kind "$relay" 30617 "$raw_dir/announcements.json" "announcements") + deletion_count=$(fetch_kind "$relay" 5 "$raw_dir/deletions.json" "deletion requests") + + local total_end + total_end=$(date +%s) + local total_duration=$((total_end - total_start)) + + # Summary + echo "" + log_info "=== Fetch Summary ===" + log_info "Relay: $relay" + log_info "Output: $output_dir" + log_info "State events (30618): $state_count" + log_info "Announcements (30617): $announcement_count" + log_info "Deletions (5): $deletion_count" + log_info "Total time: ${total_duration}s" + echo "" + + # Output file listing for easy copy/paste + log_info "Output files:" + echo " $raw_dir/state-events.json" + echo " $raw_dir/announcements.json" + echo " $raw_dir/deletions.json" +} + +main "$@" -- cgit v1.2.3 From a5504395c946bdf28b5ad0e0148ff371ca33d4d3 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 11:06:12 +0000 Subject: Add Phase 3 migration scripts for categorization and comparison - 20-categorize.sh: Categorizes git sync status into 4 categories - 21-compare-relays.sh: Compares prod vs archive to find gaps - Updated how-to doc with detailed Phase 3 outputs and directory structure - Tested with Jan 22 data: 231 complete in both, 276 complete in prod but missing from archive --- docs/how-to/migrate-ngit-relay-to-ngit-grasp.md | 61 +++-- docs/how-to/migration-scripts/20-categorize.sh | 212 +++++++++++++++ docs/how-to/migration-scripts/21-compare-relays.sh | 294 +++++++++++++++++++++ 3 files changed, 543 insertions(+), 24 deletions(-) create mode 100755 docs/how-to/migration-scripts/20-categorize.sh create mode 100755 docs/how-to/migration-scripts/21-compare-relays.sh diff --git a/docs/how-to/migrate-ngit-relay-to-ngit-grasp.md b/docs/how-to/migrate-ngit-relay-to-ngit-grasp.md index d01bbf2..4c3a4ba 100644 --- a/docs/how-to/migrate-ngit-relay-to-ngit-grasp.md +++ b/docs/how-to/migrate-ngit-relay-to-ngit-grasp.md @@ -51,7 +51,7 @@ The analysis is split into modular phases for fast iteration. Phases 1-3 and 5 c ``` ┌─────────────────────────────────────────────────────────────────┐ │ PHASE 1: Fetch Events (~30s, local) │ -│ scripts/migration/01-fetch-events.sh │ +│ migration-scripts/01-fetch-events.sh │ ├─────────────────────────────────────────────────────────────────┤ │ Fetches from relay: │ │ - kind 30618 (state events) │ @@ -64,7 +64,7 @@ The analysis is split into modular phases for fast iteration. Phases 1-3 and 5 c ↓ ┌─────────────────────────────────────────────────────────────────┐ │ PHASE 2: Git Sync Check (~20 mins, VPS required) │ -│ scripts/migration/10-check-git-sync.sh │ +│ migration-scripts/10-check-git-sync.sh │ ├─────────────────────────────────────────────────────────────────┤ │ For each state event, compares refs to actual git data on disk. │ │ │ @@ -78,8 +78,8 @@ The analysis is split into modular phases for fast iteration. Phases 1-3 and 5 c ↓ ┌─────────────────────────────────────────────────────────────────┐ │ PHASE 3: Categorize & Compare (fast, local) │ -│ scripts/migration/20-categorize.sh │ -│ scripts/migration/21-compare-relays.sh │ +│ migration-scripts/20-categorize.sh │ +│ migration-scripts/21-compare-relays.sh │ ├─────────────────────────────────────────────────────────────────┤ │ 20-categorize.sh applies 4-category logic: │ │ - cat1: complete match (all refs match) │ @@ -87,18 +87,20 @@ The analysis is split into modular phases for fast iteration. Phases 1-3 and 5 c │ - cat3: partial match (some refs match) │ │ - cat4: no match (git exists but refs don't match) │ │ │ -│ 21-compare-relays.sh finds gaps: │ -│ - in prod but not archive │ -│ - in archive but not prod │ -│ - different status between relays │ +│ 21-compare-relays.sh compares prod vs archive: │ +│ - complete-in-both.txt (no action needed) │ +│ - complete-prod-missing-archive.txt (needs investigation) │ +│ - complete-prod-incomplete-archive.txt (sync in progress?) │ +│ - incomplete-in-both.txt (git data incomplete) │ +│ - in-archive-not-prod.txt (deleted or new) │ │ │ -│ Output: category-{1,2,3,4}.txt, relay-gaps.txt │ +│ Output: category-{1,2,3,4}.txt, comparison/*.txt, summary.txt │ └─────────────────────────────────────────────────────────────────┘ ↓ ┌─────────────────────────────────────────────────────────────────┐ │ PHASE 4: Log-Based Categories (VPS required) │ -│ scripts/migration/30-extract-parse-failures.sh │ -│ scripts/migration/31-extract-purgatory-expiry.sh │ +│ migration-scripts/30-extract-parse-failures.sh │ +│ migration-scripts/31-extract-purgatory-expiry.sh │ ├─────────────────────────────────────────────────────────────────┤ │ Extracts structured log entries from journalctl: │ │ - Parse failures: [PARSE_FAIL] kind=X event_id=Y reason=Z │ @@ -112,7 +114,7 @@ The analysis is split into modular phases for fast iteration. Phases 1-3 and 5 c ↓ ┌─────────────────────────────────────────────────────────────────┐ │ PHASE 5: Final Classification (fast, local) │ -│ scripts/migration/40-classify-actions.sh │ +│ migration-scripts/40-classify-actions.sh │ ├─────────────────────────────────────────────────────────────────┤ │ Combines all data sources to produce final classification: │ │ │ @@ -136,27 +138,38 @@ The analysis is split into modular phases for fast iteration. Phases 1-3 and 5 c work/migration-analysis-YYYYMMDD-HHMM/ ├── prod/ │ ├── raw/ -│ │ ├── state-events.json -│ │ ├── announcements.json -│ │ └── deletions.json -│ ├── git-sync-status.tsv -│ └── category-{1,2,3,4}.txt +│ │ ├── state-events.json # Phase 1 output +│ │ ├── announcements.json # Phase 1 output +│ │ └── deletions.json # Phase 1 output +│ ├── git-sync-status.tsv # Phase 2 output (optional) +│ ├── category1-complete-match.txt # Phase 2/3 output +│ ├── category2-empty-blank.txt # Phase 2/3 output +│ ├── category3-partial-match.txt # Phase 2/3 output +│ └── category4-no-match.txt # Phase 2/3 output ├── archive/ │ ├── raw/ │ │ ├── state-events.json │ │ ├── announcements.json │ │ └── deletions.json │ ├── git-sync-status.tsv -│ └── category-{1,2,3,4}.txt +│ ├── category1-complete-match.txt +│ ├── category2-empty-blank.txt +│ ├── category3-partial-match.txt +│ └── category4-no-match.txt ├── logs/ -│ ├── parse-failures.txt -│ └── purgatory-expired.txt +│ ├── parse-failures.txt # Phase 4 output +│ └── purgatory-expired.txt # Phase 4 output ├── comparison/ -│ └── relay-gaps.txt +│ ├── complete-in-both.txt # Phase 3 output (no action) +│ ├── complete-prod-missing-archive.txt # Phase 3 output (investigate) +│ ├── complete-prod-incomplete-archive.txt # Phase 3 output (sync in progress?) +│ ├── incomplete-in-both.txt # Phase 3 output (git incomplete) +│ ├── in-archive-not-prod.txt # Phase 3 output (deleted/new) +│ └── summary.txt # Phase 3 output (human-readable) └── results/ - ├── no-action-required.txt - ├── action-required.txt - └── manual-investigation.txt + ├── no-action-required.txt # Phase 5 output + ├── action-required.txt # Phase 5 output + └── manual-investigation.txt # Phase 5 output ``` ## Prerequisites diff --git a/docs/how-to/migration-scripts/20-categorize.sh b/docs/how-to/migration-scripts/20-categorize.sh new file mode 100755 index 0000000..f47eb55 --- /dev/null +++ b/docs/how-to/migration-scripts/20-categorize.sh @@ -0,0 +1,212 @@ +#!/usr/bin/env bash +# +# 20-categorize.sh - Categorize git sync status into 4 categories +# +# PHASE 3a of the ngit-relay to ngit-grasp migration analysis pipeline. +# Takes git-sync-status.tsv from Phase 2 and categorizes into 4 files. +# +# USAGE: +# ./20-categorize.sh +# +# EXAMPLES: +# ./20-categorize.sh output/prod/git-sync-status.tsv output/prod +# ./20-categorize.sh output/archive/git-sync-status.tsv output/archive +# +# INPUT FORMAT (git-sync-status.tsv): +# Tab-separated values with columns: +# reponpubstate_refsgit_refsmatchesreason +# +# Where reason is optional and can be: no_git_dir, empty_refs, no_state_refs +# +# OUTPUT: +# /category1-complete-match.txt - All refs match perfectly +# /category2-empty-blank.txt - No git data available +# /category3-partial-match.txt - Some refs match +# /category4-no-match.txt - Git exists but refs don't match +# +# OUTPUT FORMAT: +# repo | npub | state_refs=N | git_refs=N | matches=N [| reason=X] +# +# CATEGORIES: +# 1. Complete Match: state_refs == git_refs == matches (all > 0) +# 2. Empty/Blank: git_refs == 0 OR reason in (no_git_dir, empty_refs, no_state_refs) +# 3. Partial Match: matches > 0 AND matches < state_refs +# 4. No Match: git_refs > 0 AND matches == 0 +# +# PREREQUISITES: +# - awk (standard Unix tool) +# +# RUNTIME: < 1 second (local processing only) +# +# SEE ALSO: +# docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide +# 10-check-git-sync.sh - Phase 2 script that produces input for this script +# + +set -euo pipefail + +# Colors for output (disabled if not a terminal) +if [[ -t 1 ]]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[0;33m' + BLUE='\033[0;34m' + NC='\033[0m' +else + RED='' + GREEN='' + YELLOW='' + BLUE='' + NC='' +fi + +log_info() { + echo -e "${BLUE}[INFO]${NC} $*" >&2 +} + +log_success() { + echo -e "${GREEN}[OK]${NC} $*" >&2 +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $*" >&2 +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +usage() { + echo "Usage: $0 " + echo "" + echo "Arguments:" + echo " git-sync-status.tsv TSV file from Phase 2 (10-check-git-sync.sh)" + echo " output-dir Directory to store categorized output" + echo "" + echo "Examples:" + echo " $0 output/prod/git-sync-status.tsv output/prod" + echo " $0 output/archive/git-sync-status.tsv output/archive" + echo "" + echo "Input format (TSV):" + echo " reponpubstate_refsgit_refsmatchesreason" + echo "" + echo "Output files:" + echo " category1-complete-match.txt - All refs match" + echo " category2-empty-blank.txt - No git data" + echo " category3-partial-match.txt - Some refs match" + echo " category4-no-match.txt - Git exists, refs don't match" + exit 1 +} + +# Main +main() { + if [[ $# -ne 2 ]]; then + usage + fi + + local input_file="$1" + local output_dir="$2" + + # Validate input file + if [[ ! -f "$input_file" ]]; then + log_error "Input file not found: $input_file" + exit 1 + fi + + log_info "Categorizing git sync status" + log_info "Input: $input_file" + log_info "Output: $output_dir" + + # Create output directory + mkdir -p "$output_dir" + + # Output files + local cat1="$output_dir/category1-complete-match.txt" + local cat2="$output_dir/category2-empty-blank.txt" + local cat3="$output_dir/category3-partial-match.txt" + local cat4="$output_dir/category4-no-match.txt" + + # Clear previous results + > "$cat1" + > "$cat2" + > "$cat3" + > "$cat4" + + # Process input file with awk + # Input: reponpubstate_refsgit_refsmatchesreason + awk -F'\t' -v cat1="$cat1" -v cat2="$cat2" -v cat3="$cat3" -v cat4="$cat4" ' + BEGIN { + count1 = 0; count2 = 0; count3 = 0; count4 = 0 + } + NR == 1 && /^repo/ { next } # Skip header if present + NF >= 5 { + repo = $1 + npub = $2 + state_refs = int($3) + git_refs = int($4) + matches = int($5) + reason = (NF >= 6) ? $6 : "" + + # Format output line + if (reason != "") { + line = repo " | " npub " | state_refs=" state_refs " | git_refs=" git_refs " | matches=" matches " | reason=" reason + } else { + line = repo " | " npub " | state_refs=" state_refs " | git_refs=" git_refs " | matches=" matches + } + + # Categorize + if (reason == "no_git_dir" || reason == "empty_refs" || reason == "no_state_refs" || git_refs == 0) { + # Category 2: Empty/Blank + print line >> cat2 + count2++ + } else if (state_refs > 0 && state_refs == git_refs && matches == state_refs) { + # Category 1: Complete Match + print line >> cat1 + count1++ + } else if (matches > 0 && matches < state_refs) { + # Category 3: Partial Match + print line >> cat3 + count3++ + } else if (git_refs > 0 && matches == 0) { + # Category 4: No Match + print line >> cat4 + count4++ + } else if (matches > 0) { + # Edge case: matches > 0 but does not fit other categories + # This can happen when git_refs > state_refs but all state refs match + # Treat as partial match + print line >> cat3 + count3++ + } else { + # Fallback: treat as category 2 (empty/blank) + print line >> cat2 + count2++ + } + } + END { + total = count1 + count2 + count3 + count4 + print "COUNTS:" count1 ":" count2 ":" count3 ":" count4 ":" total + } + ' "$input_file" 2>&1 | while IFS= read -r line; do + if [[ "$line" =~ ^COUNTS: ]]; then + # Parse counts from awk output + IFS=':' read -r _ c1 c2 c3 c4 total <<< "$line" + + echo "" + log_info "=== Categorization Summary ===" + log_info "Total entries: $total" + log_success "Category 1 (Complete Match): $c1" + log_warn "Category 2 (Empty/Blank): $c2" + log_warn "Category 3 (Partial Match): $c3" + log_error "Category 4 (No Match): $c4" + echo "" + log_info "Output files:" + echo " $cat1" + echo " $cat2" + echo " $cat3" + echo " $cat4" + fi + done +} + +main "$@" diff --git a/docs/how-to/migration-scripts/21-compare-relays.sh b/docs/how-to/migration-scripts/21-compare-relays.sh new file mode 100755 index 0000000..6b40dc8 --- /dev/null +++ b/docs/how-to/migration-scripts/21-compare-relays.sh @@ -0,0 +1,294 @@ +#!/usr/bin/env bash +# +# 21-compare-relays.sh - Compare prod vs archive category files to find gaps +# +# PHASE 3b of the ngit-relay to ngit-grasp migration analysis pipeline. +# Compares categorized output from prod and archive to identify: +# - Repos complete in prod but missing/incomplete in archive +# - Repos in archive but not in prod +# - Status differences between relays +# +# USAGE: +# ./21-compare-relays.sh +# +# EXAMPLES: +# ./21-compare-relays.sh output/prod output/archive output/comparison +# +# INPUT: +# Both prod-dir and archive-dir must contain: +# - category1-complete-match.txt +# - category2-empty-blank.txt +# - category3-partial-match.txt +# - category4-no-match.txt +# +# OUTPUT: +# /complete-in-both.txt - Repos complete in both relays (no action) +# /complete-prod-missing-archive.txt - Complete in prod, not in archive cat1 +# /complete-prod-incomplete-archive.txt - Complete in prod, incomplete in archive +# /incomplete-in-both.txt - Incomplete in both relays +# /in-archive-not-prod.txt - In archive but not in prod +# /summary.txt - Human-readable summary +# +# OUTPUT FORMAT: +# Each file contains lines in the format: +# repo | npub | prod_status | archive_status +# +# PREREQUISITES: +# - awk, sort, comm (standard Unix tools) +# +# RUNTIME: < 1 second (local processing only) +# +# SEE ALSO: +# docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide +# 20-categorize.sh - Phase 3a script that produces input for this script +# + +set -euo pipefail + +# Colors for output (disabled if not a terminal) +if [[ -t 1 ]]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[0;33m' + BLUE='\033[0;34m' + NC='\033[0m' +else + RED='' + GREEN='' + YELLOW='' + BLUE='' + NC='' +fi + +log_info() { + echo -e "${BLUE}[INFO]${NC} $*" >&2 +} + +log_success() { + echo -e "${GREEN}[OK]${NC} $*" >&2 +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $*" >&2 +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +usage() { + echo "Usage: $0 " + echo "" + echo "Arguments:" + echo " prod-dir Directory containing prod category files" + echo " archive-dir Directory containing archive category files" + echo " output-dir Directory to store comparison results" + echo "" + echo "Examples:" + echo " $0 output/prod output/archive output/comparison" + echo "" + echo "Required input files in each directory:" + echo " category1-complete-match.txt" + echo " category2-empty-blank.txt" + echo " category3-partial-match.txt" + echo " category4-no-match.txt" + exit 1 +} + +# Extract repo|npub key from category line +# Input: "repo | npub | state_refs=N | ..." +# Output: "repo|npub" +extract_key() { + awk -F' \\| ' '{print $1 "|" $2}' +} + +# Build lookup table from category files +# Args: $1=directory, $2=output_file +build_lookup() { + local dir="$1" + local output="$2" + + # Process all 4 category files + for cat in 1 2 3 4; do + local file="$dir/category${cat}-*.txt" + # shellcheck disable=SC2086 + if ls $file 1>/dev/null 2>&1; then + # shellcheck disable=SC2086 + cat $file | while IFS= read -r line; do + key=$(echo "$line" | extract_key) + echo "${key}|cat${cat}|${line}" + done + fi + done | sort -t'|' -k1,2 > "$output" +} + +# Main +main() { + if [[ $# -ne 3 ]]; then + usage + fi + + local prod_dir="$1" + local archive_dir="$2" + local output_dir="$3" + + # Validate input directories + for dir in "$prod_dir" "$archive_dir"; do + if [[ ! -d "$dir" ]]; then + log_error "Directory not found: $dir" + exit 1 + fi + if [[ ! -f "$dir/category1-complete-match.txt" ]]; then + log_error "Missing category1-complete-match.txt in $dir" + exit 1 + fi + done + + log_info "Comparing relay categories" + log_info "Prod: $prod_dir" + log_info "Archive: $archive_dir" + log_info "Output: $output_dir" + + # Create output directory + mkdir -p "$output_dir" + + # Create temp files for processing + local tmp_dir + tmp_dir=$(mktemp -d) + # shellcheck disable=SC2064 + trap "rm -rf '$tmp_dir'" EXIT + + log_info "Building lookup tables..." + + # Build lookup tables: key|category|full_line + build_lookup "$prod_dir" "$tmp_dir/prod_lookup.txt" + build_lookup "$archive_dir" "$tmp_dir/archive_lookup.txt" + + # Extract just keys for comparison + cut -d'|' -f1,2 "$tmp_dir/prod_lookup.txt" | sort -u > "$tmp_dir/prod_keys.txt" + cut -d'|' -f1,2 "$tmp_dir/archive_lookup.txt" | sort -u > "$tmp_dir/archive_keys.txt" + + log_info "Comparing categories..." + + # Initialize output files + > "$output_dir/complete-in-both.txt" + > "$output_dir/complete-prod-missing-archive.txt" + > "$output_dir/complete-prod-incomplete-archive.txt" + > "$output_dir/incomplete-in-both.txt" + > "$output_dir/in-archive-not-prod.txt" + + # Process prod category 1 (complete) entries + while IFS='|' read -r repo npub cat full_line; do + key="${repo}|${npub}" + + # Look up in archive + archive_entry=$(grep "^${key}|" "$tmp_dir/archive_lookup.txt" 2>/dev/null | head -1 || echo "") + + if [[ -z "$archive_entry" ]]; then + # Not in archive at all + echo "$repo | $npub | prod=complete | archive=missing" >> "$output_dir/complete-prod-missing-archive.txt" + else + archive_cat=$(echo "$archive_entry" | cut -d'|' -f3) + if [[ "$archive_cat" == "cat1" ]]; then + # Complete in both + echo "$repo | $npub | prod=complete | archive=complete" >> "$output_dir/complete-in-both.txt" + else + # Complete in prod, incomplete in archive + echo "$repo | $npub | prod=complete | archive=$archive_cat" >> "$output_dir/complete-prod-incomplete-archive.txt" + fi + fi + done < <(grep '|cat1|' "$tmp_dir/prod_lookup.txt" | sed 's/|cat1|/|cat1|/') + + # Process prod categories 2-4 (incomplete) entries + for cat in cat2 cat3 cat4; do + while IFS='|' read -r repo npub _ full_line; do + key="${repo}|${npub}" + + # Look up in archive + archive_entry=$(grep "^${key}|" "$tmp_dir/archive_lookup.txt" 2>/dev/null | head -1 || echo "") + + if [[ -z "$archive_entry" ]]; then + # Incomplete in prod, missing in archive + echo "$repo | $npub | prod=$cat | archive=missing" >> "$output_dir/incomplete-in-both.txt" + else + archive_cat=$(echo "$archive_entry" | cut -d'|' -f3) + if [[ "$archive_cat" != "cat1" ]]; then + # Incomplete in both + echo "$repo | $npub | prod=$cat | archive=$archive_cat" >> "$output_dir/incomplete-in-both.txt" + fi + # If archive is complete but prod is not, that's unusual but not an error + fi + done < <(grep "|${cat}|" "$tmp_dir/prod_lookup.txt") + done + + # Find entries in archive but not in prod + comm -23 "$tmp_dir/archive_keys.txt" "$tmp_dir/prod_keys.txt" | while IFS='|' read -r repo npub; do + key="${repo}|${npub}" + archive_entry=$(grep "^${key}|" "$tmp_dir/archive_lookup.txt" 2>/dev/null | head -1 || echo "") + archive_cat=$(echo "$archive_entry" | cut -d'|' -f3) + echo "$repo | $npub | prod=missing | archive=$archive_cat" >> "$output_dir/in-archive-not-prod.txt" + done + + # Count results + local count_both count_missing count_incomplete count_both_incomplete count_archive_only + count_both=$(wc -l < "$output_dir/complete-in-both.txt" | tr -d ' ') + count_missing=$(wc -l < "$output_dir/complete-prod-missing-archive.txt" | tr -d ' ') + count_incomplete=$(wc -l < "$output_dir/complete-prod-incomplete-archive.txt" | tr -d ' ') + count_both_incomplete=$(wc -l < "$output_dir/incomplete-in-both.txt" | tr -d ' ') + count_archive_only=$(wc -l < "$output_dir/in-archive-not-prod.txt" | tr -d ' ') + + # Generate summary + cat > "$output_dir/summary.txt" << EOF +# Relay Comparison Summary +Generated: $(date -Iseconds) + +## Input +- Prod: $prod_dir +- Archive: $archive_dir + +## Results + +### No Action Required +- Complete in both relays: $count_both + +### Action/Decision Required +- Complete in prod, MISSING from archive: $count_missing +- Complete in prod, INCOMPLETE in archive: $count_incomplete +- Incomplete in BOTH relays: $count_both_incomplete + +### For Reference +- In archive but not in prod: $count_archive_only + +## Files +- complete-in-both.txt: Repos successfully migrated (no action) +- complete-prod-missing-archive.txt: Need investigation - why not in archive? +- complete-prod-incomplete-archive.txt: Archive sync may still be in progress +- incomplete-in-both.txt: Git data incomplete on both relays +- in-archive-not-prod.txt: May be deleted from prod or new to archive + +## Next Steps +1. Review complete-prod-missing-archive.txt - these repos need attention +2. Check if archive sync is still running for incomplete entries +3. Cross-reference with deletion events (kind 5) from Phase 1 +4. Use Phase 4 logs to understand parse failures and purgatory expiry +EOF + + # Display summary + echo "" + log_info "=== Comparison Summary ===" + log_success "Complete in both: $count_both (no action needed)" + log_error "Complete in prod, MISSING from archive: $count_missing" + log_warn "Complete in prod, incomplete in archive: $count_incomplete" + log_warn "Incomplete in both: $count_both_incomplete" + log_info "In archive only: $count_archive_only" + echo "" + log_info "Output files:" + echo " $output_dir/complete-in-both.txt" + echo " $output_dir/complete-prod-missing-archive.txt" + echo " $output_dir/complete-prod-incomplete-archive.txt" + echo " $output_dir/incomplete-in-both.txt" + echo " $output_dir/in-archive-not-prod.txt" + echo " $output_dir/summary.txt" +} + +main "$@" -- cgit v1.2.3 From 800dbfaa82428b897e271d0eb5d9e4c0f107f80b Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 11:10:55 +0000 Subject: Add Phase 2 migration script for git sync verification - Compares state event refs to actual git data on disk - Uses git show-ref to handle both loose and packed refs - Outputs TSV format compatible with Phase 3 categorization - Optional --categorize flag for inline categorization - Includes progress indicators and ETA (~20 min runtime on VPS) - Improved error handling and validation over original script --- docs/how-to/migration-scripts/10-check-git-sync.sh | 557 +++++++++++++++++++++ 1 file changed, 557 insertions(+) create mode 100755 docs/how-to/migration-scripts/10-check-git-sync.sh diff --git a/docs/how-to/migration-scripts/10-check-git-sync.sh b/docs/how-to/migration-scripts/10-check-git-sync.sh new file mode 100755 index 0000000..493d50a --- /dev/null +++ b/docs/how-to/migration-scripts/10-check-git-sync.sh @@ -0,0 +1,557 @@ +#!/usr/bin/env bash +# +# 10-check-git-sync.sh - Compare state events to actual git data on disk +# +# PHASE 2 of the ngit-relay to ngit-grasp migration analysis pipeline. +# Compares kind 30618 state events against actual git refs on disk. +# +# USAGE: +# ./10-check-git-sync.sh [--categorize] +# +# EXAMPLES: +# # Check prod relay against prod git data +# ./10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/ngit-relay/git output/prod +# +# # Check archive relay against archive git data +# ./10-check-git-sync.sh output/archive/raw/state-events.json /var/lib/ngit-relay-archive/git output/archive +# +# # Check and categorize in one step (convenience mode) +# ./10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/ngit-relay/git output/prod --categorize +# +# INPUT: +# state-events.json - JSONL file from Phase 1 (01-fetch-events.sh) +# One kind 30618 event per line +# git-base-dir - Base directory containing git repos +# Structure: //.git/ +# +# OUTPUT: +# /git-sync-status.tsv - Tab-separated values: +# reponpubstate_refsgit_refsmatchesreason +# +# With --categorize flag, also outputs: +# /category1-complete-match.txt +# /category2-empty-blank.txt +# /category3-partial-match.txt +# /category4-no-match.txt +# +# CATEGORIES: +# 1. Complete Match - All refs in state event match git data perfectly +# 2. Empty/Blank - No git data available (directory missing or empty) +# 3. Partial Match - Some refs match, some don't +# 4. No Match - Git data exists but commit hashes don't match +# +# PREREQUISITES: +# - nak (for npub encoding) - https://github.com/fiatjaf/nak +# - jq (for JSON parsing) +# - Read access to git directories (may need sudo) +# +# RUNTIME: ~20 minutes on VPS (git operations are slow) +# +# NOTES: +# - Must run on VPS with access to git directories +# - Progress indicator updates every 10 events +# - Handles packed refs (git show-ref) and loose refs +# +# SEE ALSO: +# docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide +# 01-fetch-events.sh - Phase 1 script that produces input for this script +# 20-categorize.sh - Phase 3a script that consumes output from this script +# + +set -euo pipefail + +# Colors for output (disabled if not a terminal) +if [[ -t 1 ]]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[0;33m' + BLUE='\033[0;34m' + NC='\033[0m' +else + RED='' + GREEN='' + YELLOW='' + BLUE='' + NC='' +fi + +log_info() { + echo -e "${BLUE}[INFO]${NC} $*" >&2 +} + +log_success() { + echo -e "${GREEN}[OK]${NC} $*" >&2 +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $*" >&2 +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +log_progress() { + # Overwrite current line for progress updates + echo -ne "\r${BLUE}[PROGRESS]${NC} $*" >&2 +} + +usage() { + echo "Usage: $0 [--categorize]" + echo "" + echo "Arguments:" + echo " state-events.json JSONL file from Phase 1 (kind 30618 events)" + echo " git-base-dir Base directory for git repos (e.g., /var/lib/ngit-relay/git)" + echo " output-dir Directory to store output files" + echo " --categorize Optional: also output category files (like Phase 3)" + echo "" + echo "Examples:" + echo " $0 output/prod/raw/state-events.json /var/lib/ngit-relay/git output/prod" + echo " $0 output/archive/raw/state-events.json /var/lib/ngit-relay-archive/git output/archive" + echo "" + echo "Output:" + echo " git-sync-status.tsv - TSV with: repo, npub, state_refs, git_refs, matches, reason" + exit 1 +} + +# Check prerequisites +check_prerequisites() { + local missing=0 + + if ! command -v nak &> /dev/null; then + log_error "nak not found. Install from: https://github.com/fiatjaf/nak" + log_error "Or run: nix-shell -p nak jq --run \"$0 $*\"" + missing=1 + fi + + if ! command -v jq &> /dev/null; then + log_error "jq not found. Install with your package manager." + missing=1 + fi + + if [[ $missing -eq 1 ]]; then + exit 1 + fi +} + +# Convert hex pubkey to npub +# Args: $1=hex_pubkey +# Returns: npub string or empty on error +hex_to_npub() { + local hex="$1" + nak encode npub "$hex" 2>/dev/null || echo "" +} + +# Count refs in state event (only refs/heads/) +# Args: $1=event_json +# Returns: count +count_state_refs() { + local event="$1" + echo "$event" | jq '[.tags[] | select(.[0] | startswith("refs/heads/"))] | length' 2>/dev/null || echo "0" +} + +# Get git refs from disk +# Args: $1=git_dir +# Returns: count of refs/heads/ refs +count_git_refs() { + local git_dir="$1" + + if [[ ! -d "$git_dir" ]]; then + echo "0" + return + fi + + # Use git show-ref to handle both packed and loose refs + # Fall back to counting loose refs if git show-ref fails + if git --git-dir="$git_dir" show-ref --heads 2>/dev/null | wc -l | tr -d ' '; then + return + fi + + # Fallback: count loose refs + if [[ -d "$git_dir/refs/heads" ]]; then + find "$git_dir/refs/heads" -type f 2>/dev/null | wc -l | tr -d ' ' + else + echo "0" + fi +} + +# Get ref hash from git directory +# Args: $1=git_dir, $2=ref_path (e.g., refs/heads/main) +# Returns: commit hash or empty +get_git_ref_hash() { + local git_dir="$1" + local ref_path="$2" + + # Try git show-ref first (handles packed refs) + local hash + hash=$(git --git-dir="$git_dir" show-ref --hash "$ref_path" 2>/dev/null | head -1 || echo "") + + if [[ -n "$hash" ]]; then + echo "$hash" + return + fi + + # Fallback: read loose ref file + local ref_file="$git_dir/$ref_path" + if [[ -f "$ref_file" ]]; then + cat "$ref_file" 2>/dev/null | tr -d '\n' || echo "" + else + echo "" + fi +} + +# Compare state event refs to git refs +# Args: $1=event_json, $2=git_dir +# Returns: count of matching refs +count_matching_refs() { + local event="$1" + local git_dir="$2" + local matching=0 + + # Extract refs/heads/ tags and compare + while IFS= read -r ref_tag; do + [[ -z "$ref_tag" ]] && continue + + local ref_path expected_hash + ref_path=$(echo "$ref_tag" | jq -r '.[0]' 2>/dev/null || echo "") + expected_hash=$(echo "$ref_tag" | jq -r '.[1]' 2>/dev/null || echo "") + + # Skip if not a heads ref or hash is missing + [[ ! "$ref_path" =~ ^refs/heads/ ]] && continue + [[ -z "$expected_hash" || "$expected_hash" == "null" ]] && continue + + # Get actual hash from git + local actual_hash + actual_hash=$(get_git_ref_hash "$git_dir" "$ref_path") + + if [[ "$expected_hash" == "$actual_hash" ]]; then + matching=$((matching + 1)) + fi + done < <(echo "$event" | jq -c '.tags[] | select(.[0] | startswith("refs/heads/"))' 2>/dev/null) + + echo "$matching" +} + +# Categorize a single entry +# Args: $1=state_refs, $2=git_refs, $3=matches, $4=reason +# Returns: category number (1-4) +categorize_entry() { + local state_refs="$1" + local git_refs="$2" + local matches="$3" + local reason="$4" + + # Category 2: Empty/Blank + if [[ -n "$reason" ]] || [[ "$git_refs" -eq 0 ]]; then + echo "2" + return + fi + + # Category 1: Complete Match + if [[ "$state_refs" -gt 0 ]] && [[ "$state_refs" -eq "$git_refs" ]] && [[ "$matches" -eq "$state_refs" ]]; then + echo "1" + return + fi + + # Category 4: No Match + if [[ "$git_refs" -gt 0 ]] && [[ "$matches" -eq 0 ]]; then + echo "4" + return + fi + + # Category 3: Partial Match (default for anything else with matches > 0) + if [[ "$matches" -gt 0 ]]; then + echo "3" + return + fi + + # Fallback to category 2 + echo "2" +} + +# Format entry for category file +# Args: $1=repo, $2=npub, $3=state_refs, $4=git_refs, $5=matches, $6=reason +format_category_line() { + local repo="$1" + local npub="$2" + local state_refs="$3" + local git_refs="$4" + local matches="$5" + local reason="$6" + + if [[ -n "$reason" ]]; then + echo "$repo | $npub | state_refs=$state_refs | git_refs=$git_refs | matches=$matches | reason=$reason" + else + echo "$repo | $npub | state_refs=$state_refs | git_refs=$git_refs | matches=$matches" + fi +} + +# Process a single state event +# Args: $1=event_json, $2=git_base +# Outputs: TSV line to stdout +process_event() { + local event="$1" + local git_base="$2" + + # Extract repository identifier (d tag) + local identifier + identifier=$(echo "$event" | jq -r '.tags[] | select(.[0] == "d") | .[1]' 2>/dev/null | head -1 || echo "") + + if [[ -z "$identifier" ]]; then + return 1 + fi + + # Extract maintainer pubkey (hex) + local hex_pubkey + hex_pubkey=$(echo "$event" | jq -r '.pubkey' 2>/dev/null || echo "") + + if [[ -z "$hex_pubkey" ]]; then + return 1 + fi + + # Convert to npub + local npub + npub=$(hex_to_npub "$hex_pubkey") + + if [[ -z "$npub" ]]; then + return 1 + fi + + # Count state refs + local state_refs + state_refs=$(count_state_refs "$event") + + # Find git directory + local git_dir="$git_base/${npub}/${identifier}.git" + + # Check git directory status + local git_refs=0 + local matches=0 + local reason="" + + if [[ ! -d "$git_dir" ]]; then + reason="no_git_dir" + elif [[ ! -d "$git_dir/refs/heads" ]] && [[ ! -f "$git_dir/packed-refs" ]]; then + reason="empty_refs" + else + git_refs=$(count_git_refs "$git_dir") + + if [[ "$git_refs" -eq 0 ]]; then + reason="empty_refs" + elif [[ "$state_refs" -eq 0 ]]; then + reason="no_state_refs" + else + matches=$(count_matching_refs "$event" "$git_dir") + fi + fi + + # Output TSV line: repo, npub, state_refs, git_refs, matches, reason + printf '%s\t%s\t%s\t%s\t%s\t%s\n' "$identifier" "$npub" "$state_refs" "$git_refs" "$matches" "$reason" +} + +# Main +main() { + local do_categorize=0 + local args=() + + # Parse arguments + for arg in "$@"; do + if [[ "$arg" == "--categorize" ]]; then + do_categorize=1 + else + args+=("$arg") + fi + done + + if [[ ${#args[@]} -ne 3 ]]; then + usage + fi + + local state_events_file="${args[0]}" + local git_base="${args[1]}" + local output_dir="${args[2]}" + + # Validate inputs + if [[ ! -f "$state_events_file" ]]; then + log_error "State events file not found: $state_events_file" + exit 1 + fi + + if [[ ! -d "$git_base" ]]; then + log_error "Git base directory not found: $git_base" + log_error "This script must run on the VPS with access to git directories." + exit 1 + fi + + # Check read permissions + if ! ls "$git_base" >/dev/null 2>&1; then + log_error "Cannot read git base directory (permission denied): $git_base" + log_error "Try running with sudo or grant read permissions." + exit 1 + fi + + check_prerequisites + + log_info "=== Git State Synchronization Check ===" + log_info "State events: $state_events_file" + log_info "Git base: $git_base" + log_info "Output: $output_dir" + if [[ $do_categorize -eq 1 ]]; then + log_info "Mode: TSV + categorization" + else + log_info "Mode: TSV only (use 20-categorize.sh for categories)" + fi + log_info "Started: $(date)" + echo "" + + # Create output directory + mkdir -p "$output_dir" + + # Output files + local tsv_file="$output_dir/git-sync-status.tsv" + + # Initialize TSV with header + echo -e "repo\tnpub\tstate_refs\tgit_refs\tmatches\treason" > "$tsv_file" + + # Initialize category files if categorizing + local cat1="" cat2="" cat3="" cat4="" + if [[ $do_categorize -eq 1 ]]; then + cat1="$output_dir/category1-complete-match.txt" + cat2="$output_dir/category2-empty-blank.txt" + cat3="$output_dir/category3-partial-match.txt" + cat4="$output_dir/category4-no-match.txt" + > "$cat1" + > "$cat2" + > "$cat3" + > "$cat4" + fi + + # Count total events + local total_events + total_events=$(wc -l < "$state_events_file" | tr -d ' ') + log_info "Processing $total_events state events..." + echo "" + + # Process each event + local count=0 + local processed=0 + local skipped=0 + local count_cat1=0 count_cat2=0 count_cat3=0 count_cat4=0 + local start_time + start_time=$(date +%s) + + while IFS= read -r event; do + count=$((count + 1)) + + # Skip empty lines + [[ -z "$event" ]] && continue + + # Process event + local result + if result=$(process_event "$event" "$git_base"); then + processed=$((processed + 1)) + + # Write to TSV (skip header line) + echo "$result" >> "$tsv_file" + + # Categorize if requested + if [[ $do_categorize -eq 1 ]]; then + # Parse result + IFS=$'\t' read -r repo npub state_refs git_refs matches reason <<< "$result" + + local category + category=$(categorize_entry "$state_refs" "$git_refs" "$matches" "$reason") + + local cat_line + cat_line=$(format_category_line "$repo" "$npub" "$state_refs" "$git_refs" "$matches" "$reason") + + case "$category" in + 1) echo "$cat_line" >> "$cat1"; count_cat1=$((count_cat1 + 1)) ;; + 2) echo "$cat_line" >> "$cat2"; count_cat2=$((count_cat2 + 1)) ;; + 3) echo "$cat_line" >> "$cat3"; count_cat3=$((count_cat3 + 1)) ;; + 4) echo "$cat_line" >> "$cat4"; count_cat4=$((count_cat4 + 1)) ;; + esac + fi + else + skipped=$((skipped + 1)) + fi + + # Progress indicator every 10 events + if [[ $((count % 10)) -eq 0 ]]; then + local elapsed=$(($(date +%s) - start_time)) + local rate=0 + if [[ $elapsed -gt 0 ]]; then + rate=$((count / elapsed)) + fi + local eta="?" + if [[ $rate -gt 0 ]]; then + eta=$(( (total_events - count) / rate )) + fi + log_progress "Processed $count/$total_events events (~${rate}/s, ETA: ${eta}s)..." + fi + done < "$state_events_file" + + # Clear progress line + echo "" >&2 + + local end_time + end_time=$(date +%s) + local duration=$((end_time - start_time)) + + # Summary + echo "" + log_info "=== Analysis Complete ===" + log_info "Finished: $(date)" + log_info "Duration: ${duration}s" + log_info "Processed: $processed events" + if [[ $skipped -gt 0 ]]; then + log_warn "Skipped: $skipped events (missing identifier or pubkey)" + fi + echo "" + + if [[ $do_categorize -eq 1 ]]; then + # Calculate percentages + local total=$((count_cat1 + count_cat2 + count_cat3 + count_cat4)) + local pct1=0 pct2=0 pct3=0 pct4=0 + if [[ $total -gt 0 ]]; then + pct1=$(awk "BEGIN {printf \"%.1f\", ($count_cat1/$total)*100}") + pct2=$(awk "BEGIN {printf \"%.1f\", ($count_cat2/$total)*100}") + pct3=$(awk "BEGIN {printf \"%.1f\", ($count_cat3/$total)*100}") + pct4=$(awk "BEGIN {printf \"%.1f\", ($count_cat4/$total)*100}") + fi + + log_info "=== Category Summary ===" + log_success "Category 1 (Complete Match): $count_cat1 ($pct1%)" + log_warn "Category 2 (Empty/Blank): $count_cat2 ($pct2%)" + log_warn "Category 3 (Partial Match): $count_cat3 ($pct3%)" + log_error "Category 4 (No Match): $count_cat4 ($pct4%)" + echo "" + + # Validation warning + if [[ $count_cat2 -eq $total ]] && [[ $total -gt 0 ]]; then + log_error "WARNING: 100% of repos categorized as Empty/Blank" + log_error "This usually indicates a permission or path issue." + echo "" + log_info "Troubleshooting:" + echo " 1. Verify git data exists: sudo ls -la $git_base | head -10" + echo " 2. Check sample repo: sudo find $git_base -name '*.git' -type d | head -1" + echo " 3. Re-run with sudo if not already using it" + echo "" + fi + fi + + log_info "Output files:" + echo " $tsv_file" + if [[ $do_categorize -eq 1 ]]; then + echo " $cat1" + echo " $cat2" + echo " $cat3" + echo " $cat4" + else + echo "" + log_info "Next step: Run 20-categorize.sh to categorize results" + echo " ./20-categorize.sh $tsv_file $output_dir" + fi +} + +main "$@" -- cgit v1.2.3 From 28cc7820953efeafb2bc4d41ebcf3d682da86711 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 11:16:50 +0000 Subject: Add Phase 4 migration scripts for log extraction - 30-extract-parse-failures.sh: Extracts parse failure events from logs - 31-extract-purgatory-expiry.sh: Extracts purgatory expiry events from logs - Both support time range filtering (--since, --until) - Includes dry-run mode for testing - Gracefully handles missing logs with dependency notes - TSV output format for Phase 5 consumption - Ready for when structured logging is implemented in ngit-grasp --- .../migration-scripts/30-extract-parse-failures.sh | 328 +++++++++++++++++++ .../31-extract-purgatory-expiry.sh | 346 +++++++++++++++++++++ 2 files changed, 674 insertions(+) create mode 100755 docs/how-to/migration-scripts/30-extract-parse-failures.sh create mode 100755 docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh new file mode 100755 index 0000000..753fd3e --- /dev/null +++ b/docs/how-to/migration-scripts/30-extract-parse-failures.sh @@ -0,0 +1,328 @@ +#!/usr/bin/env bash +# +# 30-extract-parse-failures.sh - Extract parse failure events from systemd logs +# +# PHASE 4a of the ngit-relay to ngit-grasp migration analysis pipeline. +# Extracts structured [PARSE_FAIL] log entries from journalctl. +# +# USAGE: +# ./30-extract-parse-failures.sh [options] +# +# EXAMPLES: +# # Extract from ngit-grasp service (last 30 days, default) +# ./30-extract-parse-failures.sh ngit-grasp.service output/logs +# +# # Extract with custom time range +# ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-01" +# +# # Extract from specific time window +# ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-15" --until "2026-01-22" +# +# OPTIONS: +# --since Start date for log extraction (default: 30 days ago) +# --until End date for log extraction (default: now) +# --dry-run Show what would be extracted without writing files +# +# OUTPUT: +# /parse-failures.txt +# +# OUTPUT FORMAT (TSV): +# reponpubkindevent_idreason +# +# EXPECTED LOG FORMAT: +# The script looks for structured log entries in this format: +# +# 2026-01-22T10:30:45Z ngit-grasp[1234]: [PARSE_FAIL] kind=30618 event_id=abc123... reason="invalid refs format" repo=myrepo npub=npub1... +# +# Required fields: kind, event_id, reason +# Optional fields: repo, npub (may not be available if parsing failed early) +# +# DEPENDENCY: +# This script requires logging improvements in ngit-grasp to emit structured +# [PARSE_FAIL] log entries. Until those are implemented, this script will +# find no matching entries (which is handled gracefully). +# +# See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section) +# +# Expected Rust logging code: +# tracing::warn!( +# target: "migration", +# "[PARSE_FAIL] kind={} event_id={} reason=\"{}\" repo={} npub={}", +# event.kind, event.id, reason, identifier, npub +# ); +# +# PREREQUISITES: +# - journalctl (systemd) +# - grep, awk (standard Unix tools) +# - Access to systemd journal (may require sudo or journal group membership) +# +# RUNTIME: Depends on log volume, typically < 30 seconds +# +# SEE ALSO: +# docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide +# 31-extract-purgatory-expiry.sh - Companion script for purgatory expiry logs +# + +set -euo pipefail + +# Colors for output (disabled if not a terminal) +if [[ -t 1 ]]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[0;33m' + BLUE='\033[0;34m' + NC='\033[0m' +else + RED='' + GREEN='' + YELLOW='' + BLUE='' + NC='' +fi + +log_info() { + echo -e "${BLUE}[INFO]${NC} $*" >&2 +} + +log_success() { + echo -e "${GREEN}[OK]${NC} $*" >&2 +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $*" >&2 +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +usage() { + echo "Usage: $0 [options]" + echo "" + echo "Arguments:" + echo " service-name Systemd service name (e.g., ngit-grasp.service)" + echo " output-dir Directory to store extracted log data" + echo "" + echo "Options:" + echo " --since Start date (default: 30 days ago)" + echo " --until End date (default: now)" + echo " --dry-run Show what would be extracted without writing" + echo "" + echo "Examples:" + echo " $0 ngit-grasp.service output/logs" + echo " $0 ngit-grasp.service output/logs --since '2026-01-01'" + echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'" + echo "" + echo "Expected log format:" + echo " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." + exit 1 +} + +# Parse a single log line and extract fields +# Input: log line containing [PARSE_FAIL] +# Output: TSV line: reponpubkindevent_idreason +parse_log_line() { + local line="$1" + + # Extract fields using grep -oP (Perl regex) or awk + # Fields: kind, event_id, reason, repo (optional), npub (optional) + + local kind event_id reason repo npub + + # Extract kind=VALUE + kind=$(echo "$line" | grep -oP 'kind=\K[0-9]+' || echo "") + + # Extract event_id=VALUE (hex string, possibly truncated with ...) + event_id=$(echo "$line" | grep -oP 'event_id=\K[a-f0-9]+' || echo "") + + # Extract reason="VALUE" (quoted string) + reason=$(echo "$line" | grep -oP 'reason="\K[^"]*' || echo "") + + # Extract repo=VALUE (optional, unquoted identifier) + repo=$(echo "$line" | grep -oP 'repo=\K[^ ]+' || echo "") + + # Extract npub=VALUE (optional, npub1... format) + npub=$(echo "$line" | grep -oP 'npub=\K[^ ]+' || echo "") + + # Only output if we have the required fields + if [[ -n "$kind" && -n "$event_id" && -n "$reason" ]]; then + printf '%s\t%s\t%s\t%s\t%s\n' "$repo" "$npub" "$kind" "$event_id" "$reason" + fi +} + +# Main +main() { + if [[ $# -lt 2 ]]; then + usage + fi + + local service="$1" + local output_dir="$2" + shift 2 + + # Default time range: last 30 days + local since_date + since_date=$(date -d "30 days ago" "+%Y-%m-%d" 2>/dev/null || date -v-30d "+%Y-%m-%d" 2>/dev/null || echo "") + local until_date="" + local dry_run=false + + # Parse options + while [[ $# -gt 0 ]]; do + case "$1" in + --since) + since_date="$2" + shift 2 + ;; + --until) + until_date="$2" + shift 2 + ;; + --dry-run) + dry_run=true + shift + ;; + *) + log_error "Unknown option: $1" + usage + ;; + esac + done + + # Validate service name + if [[ ! "$service" =~ \.service$ ]]; then + service="${service}.service" + fi + + log_info "Extracting parse failures from systemd logs" + log_info "Service: $service" + log_info "Output: $output_dir" + log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" + + # Check if journalctl is available + if ! command -v journalctl &> /dev/null; then + log_error "journalctl not found. This script requires systemd." + exit 1 + fi + + # Build journalctl command + local journal_cmd="journalctl -u $service --no-pager -o short-iso" + + if [[ -n "$since_date" ]]; then + journal_cmd="$journal_cmd --since '$since_date'" + fi + + if [[ -n "$until_date" ]]; then + journal_cmd="$journal_cmd --until '$until_date'" + fi + + log_info "Running: $journal_cmd | grep '\\[PARSE_FAIL\\]'" + + if [[ "$dry_run" == true ]]; then + log_info "[DRY RUN] Would extract to: $output_dir/parse-failures.txt" + + # Show sample of what would be extracted + log_info "Checking for matching log entries..." + local sample_count + sample_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0") + sample_count="${sample_count//[^0-9]/}" # Strip non-numeric characters + sample_count="${sample_count:-0}" + log_info "Found $sample_count matching log entries" + + if [[ "$sample_count" -eq 0 ]]; then + log_warn "No [PARSE_FAIL] entries found in logs." + log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." + log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)" + fi + + exit 0 + fi + + # Create output directory + mkdir -p "$output_dir" + + local output_file="$output_dir/parse-failures.txt" + local temp_file + temp_file=$(mktemp) + + # Extract and parse log entries + log_info "Extracting log entries..." + + # Get raw log lines containing [PARSE_FAIL] + local raw_lines + raw_lines=$(eval "$journal_cmd" 2>/dev/null | grep '\[PARSE_FAIL\]' || true) + + if [[ -z "$raw_lines" ]]; then + log_warn "No [PARSE_FAIL] entries found in logs." + log_warn "" + log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." + log_warn "The structured log format required by this script:" + log_warn "" + log_warn " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." + log_warn "" + log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)" + log_warn "" + + # Create empty output file with header comment + { + echo "# Parse failures extracted from $service" + echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" + echo "# Extracted: $(date -Iseconds)" + echo "# Format: reponpubkindevent_idreason" + echo "#" + echo "# NOTE: No [PARSE_FAIL] entries found." + echo "# This is expected if ngit-grasp logging improvements are not yet deployed." + } > "$output_file" + + log_info "Created empty output file: $output_file" + exit 0 + fi + + # Write header + { + echo "# Parse failures extracted from $service" + echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" + echo "# Extracted: $(date -Iseconds)" + echo "# Format: reponpubkindevent_idreason" + } > "$output_file" + + # Parse each line + local count=0 + while IFS= read -r line; do + local parsed + parsed=$(parse_log_line "$line") + if [[ -n "$parsed" ]]; then + echo "$parsed" >> "$output_file" + ((count++)) + fi + done <<< "$raw_lines" + + rm -f "$temp_file" + + # Summary + echo "" + log_info "=== Extraction Summary ===" + log_info "Service: $service" + log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" + log_success "Extracted $count parse failure entries" + echo "" + log_info "Output file: $output_file" + + if [[ $count -gt 0 ]]; then + echo "" + log_info "Sample entries (first 5):" + tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub kind event_id reason; do + echo " kind=$kind repo=$repo reason=\"$reason\"" + done + fi + + # Breakdown by kind + if [[ $count -gt 0 ]]; then + echo "" + log_info "Breakdown by event kind:" + tail -n +5 "$output_file" | awk -F'\t' '{print $3}' | sort | uniq -c | sort -rn | while read -r cnt kind; do + echo " kind $kind: $cnt failures" + done + fi +} + +main "$@" diff --git a/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh new file mode 100755 index 0000000..38b2ca3 --- /dev/null +++ b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh @@ -0,0 +1,346 @@ +#!/usr/bin/env bash +# +# 31-extract-purgatory-expiry.sh - Extract purgatory expiry events from systemd logs +# +# PHASE 4b of the ngit-relay to ngit-grasp migration analysis pipeline. +# Extracts structured [PURGATORY_EXPIRED] log entries from journalctl. +# +# USAGE: +# ./31-extract-purgatory-expiry.sh [options] +# +# EXAMPLES: +# # Extract from ngit-grasp service (last 30 days, default) +# ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs +# +# # Extract with custom time range +# ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs --since "2026-01-01" +# +# # Extract from specific time window +# ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs --since "2026-01-15" --until "2026-01-22" +# +# OPTIONS: +# --since Start date for log extraction (default: 30 days ago) +# --until End date for log extraction (default: now) +# --dry-run Show what would be extracted without writing files +# +# OUTPUT: +# /purgatory-expired.txt +# +# OUTPUT FORMAT (TSV): +# reponpubtimestampreason +# +# EXPECTED LOG FORMAT: +# The script looks for structured log entries in this format: +# +# 2026-01-22T10:30:45Z ngit-grasp[1234]: [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason="clone URL unreachable after 7 days" +# +# Required fields: repo, npub +# Optional fields: reason (explains why purgatory expired) +# +# BACKGROUND: +# "Purgatory" is the state where ngit-grasp has received an announcement event +# but cannot yet sync the git data (e.g., clone URL unreachable, git server down). +# After a configurable timeout (default 7 days), the repository is marked as +# expired and removed from purgatory. +# +# Purgatory expiry during migration analysis indicates repositories that: +# - Had valid announcements on the production relay +# - Could not be synced to the archive relay +# - May need manual intervention or investigation +# +# DEPENDENCY: +# This script requires logging improvements in ngit-grasp to emit structured +# [PURGATORY_EXPIRED] log entries. Until those are implemented, this script +# will find no matching entries (which is handled gracefully). +# +# See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section) +# +# Expected Rust logging code: +# tracing::warn!( +# target: "migration", +# "[PURGATORY_EXPIRED] repo={} npub={} reason=\"{}\"", +# identifier, npub, reason +# ); +# +# PREREQUISITES: +# - journalctl (systemd) +# - grep, awk (standard Unix tools) +# - Access to systemd journal (may require sudo or journal group membership) +# +# RUNTIME: Depends on log volume, typically < 30 seconds +# +# SEE ALSO: +# docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide +# 30-extract-parse-failures.sh - Companion script for parse failure logs +# + +set -euo pipefail + +# Colors for output (disabled if not a terminal) +if [[ -t 1 ]]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[0;33m' + BLUE='\033[0;34m' + NC='\033[0m' +else + RED='' + GREEN='' + YELLOW='' + BLUE='' + NC='' +fi + +log_info() { + echo -e "${BLUE}[INFO]${NC} $*" >&2 +} + +log_success() { + echo -e "${GREEN}[OK]${NC} $*" >&2 +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $*" >&2 +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +usage() { + echo "Usage: $0 [options]" + echo "" + echo "Arguments:" + echo " service-name Systemd service name (e.g., ngit-grasp.service)" + echo " output-dir Directory to store extracted log data" + echo "" + echo "Options:" + echo " --since Start date (default: 30 days ago)" + echo " --until End date (default: now)" + echo " --dry-run Show what would be extracted without writing" + echo "" + echo "Examples:" + echo " $0 ngit-grasp.service output/logs" + echo " $0 ngit-grasp.service output/logs --since '2026-01-01'" + echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'" + echo "" + echo "Expected log format:" + echo " [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason=\"...\"" + exit 1 +} + +# Parse a single log line and extract fields +# Input: log line containing [PURGATORY_EXPIRED] +# Output: TSV line: reponpubtimestampreason +parse_log_line() { + local line="$1" + + # Extract timestamp from the beginning of the log line + # Format: 2026-01-22T10:30:45+0000 or similar ISO format + local timestamp repo npub reason + + # Extract ISO timestamp from beginning of line + timestamp=$(echo "$line" | grep -oP '^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}' || echo "") + + # Extract repo=VALUE (unquoted identifier) + repo=$(echo "$line" | grep -oP 'repo=\K[^ ]+' || echo "") + + # Extract npub=VALUE (npub1... format) + npub=$(echo "$line" | grep -oP 'npub=\K[^ ]+' || echo "") + + # Extract reason="VALUE" (quoted string, optional) + reason=$(echo "$line" | grep -oP 'reason="\K[^"]*' || echo "") + + # Only output if we have the required fields + if [[ -n "$repo" && -n "$npub" ]]; then + printf '%s\t%s\t%s\t%s\n' "$repo" "$npub" "$timestamp" "$reason" + fi +} + +# Main +main() { + if [[ $# -lt 2 ]]; then + usage + fi + + local service="$1" + local output_dir="$2" + shift 2 + + # Default time range: last 30 days + local since_date + since_date=$(date -d "30 days ago" "+%Y-%m-%d" 2>/dev/null || date -v-30d "+%Y-%m-%d" 2>/dev/null || echo "") + local until_date="" + local dry_run=false + + # Parse options + while [[ $# -gt 0 ]]; do + case "$1" in + --since) + since_date="$2" + shift 2 + ;; + --until) + until_date="$2" + shift 2 + ;; + --dry-run) + dry_run=true + shift + ;; + *) + log_error "Unknown option: $1" + usage + ;; + esac + done + + # Validate service name + if [[ ! "$service" =~ \.service$ ]]; then + service="${service}.service" + fi + + log_info "Extracting purgatory expiry events from systemd logs" + log_info "Service: $service" + log_info "Output: $output_dir" + log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" + + # Check if journalctl is available + if ! command -v journalctl &> /dev/null; then + log_error "journalctl not found. This script requires systemd." + exit 1 + fi + + # Build journalctl command + local journal_cmd="journalctl -u $service --no-pager -o short-iso" + + if [[ -n "$since_date" ]]; then + journal_cmd="$journal_cmd --since '$since_date'" + fi + + if [[ -n "$until_date" ]]; then + journal_cmd="$journal_cmd --until '$until_date'" + fi + + log_info "Running: $journal_cmd | grep '\\[PURGATORY_EXPIRED\\]'" + + if [[ "$dry_run" == true ]]; then + log_info "[DRY RUN] Would extract to: $output_dir/purgatory-expired.txt" + + # Show sample of what would be extracted + log_info "Checking for matching log entries..." + local sample_count + sample_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PURGATORY_EXPIRED\]' || echo "0") + sample_count="${sample_count//[^0-9]/}" # Strip non-numeric characters + sample_count="${sample_count:-0}" + log_info "Found $sample_count matching log entries" + + if [[ "$sample_count" -eq 0 ]]; then + log_warn "No [PURGATORY_EXPIRED] entries found in logs." + log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." + log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)" + fi + + exit 0 + fi + + # Create output directory + mkdir -p "$output_dir" + + local output_file="$output_dir/purgatory-expired.txt" + local temp_file + temp_file=$(mktemp) + + # Extract and parse log entries + log_info "Extracting log entries..." + + # Get raw log lines containing [PURGATORY_EXPIRED] + local raw_lines + raw_lines=$(eval "$journal_cmd" 2>/dev/null | grep '\[PURGATORY_EXPIRED\]' || true) + + if [[ -z "$raw_lines" ]]; then + log_warn "No [PURGATORY_EXPIRED] entries found in logs." + log_warn "" + log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." + log_warn "The structured log format required by this script:" + log_warn "" + log_warn " [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason=\"...\"" + log_warn "" + log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)" + log_warn "" + + # Create empty output file with header comment + { + echo "# Purgatory expiry events extracted from $service" + echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" + echo "# Extracted: $(date -Iseconds)" + echo "# Format: reponpubtimestampreason" + echo "#" + echo "# NOTE: No [PURGATORY_EXPIRED] entries found." + echo "# This is expected if ngit-grasp logging improvements are not yet deployed." + } > "$output_file" + + log_info "Created empty output file: $output_file" + exit 0 + fi + + # Write header + { + echo "# Purgatory expiry events extracted from $service" + echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" + echo "# Extracted: $(date -Iseconds)" + echo "# Format: reponpubtimestampreason" + } > "$output_file" + + # Parse each line + local count=0 + while IFS= read -r line; do + local parsed + parsed=$(parse_log_line "$line") + if [[ -n "$parsed" ]]; then + echo "$parsed" >> "$output_file" + ((count++)) + fi + done <<< "$raw_lines" + + rm -f "$temp_file" + + # Summary + echo "" + log_info "=== Extraction Summary ===" + log_info "Service: $service" + log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" + log_success "Extracted $count purgatory expiry entries" + echo "" + log_info "Output file: $output_file" + + if [[ $count -gt 0 ]]; then + echo "" + log_info "Sample entries (first 5):" + tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub timestamp reason; do + echo " repo=$repo npub=${npub:0:20}... timestamp=$timestamp" + done + fi + + # Show unique repos affected + if [[ $count -gt 0 ]]; then + echo "" + local unique_repos + unique_repos=$(tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort -u | wc -l) + log_info "Unique repositories affected: $unique_repos" + + echo "" + log_info "Repositories with purgatory expiry:" + tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort | uniq -c | sort -rn | head -10 | while read -r cnt repo; do + echo " $repo: $cnt expiry events" + done + + local total_repos + total_repos=$(tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort -u | wc -l) + if [[ $total_repos -gt 10 ]]; then + echo " ... and $((total_repos - 10)) more repositories" + fi + fi +} + +main "$@" -- cgit v1.2.3 From 73a366cbd7be4edf9c74194cd0891c80a15236a5 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 11:41:10 +0000 Subject: Add structured logging for migration analysis - Add [PARSE_FAIL] logging when event parsing fails - Add [PURGATORY_EXPIRED] logging when repos expire from purgatory - Logs include: kind, event_id, repo, npub, reason - Supports Phase 4 migration scripts (30-extract-*.sh) - All 382 tests pass --- src/nostr/builder.rs | 135 +++++++++++++++++++++++++++++++++++++++++++++------ src/purgatory/mod.rs | 103 ++++++++++++++++++++++++++++++++------- 2 files changed, 206 insertions(+), 32 deletions(-) diff --git a/src/nostr/builder.rs b/src/nostr/builder.rs index 34014db..629c111 100644 --- a/src/nostr/builder.rs +++ b/src/nostr/builder.rs @@ -98,6 +98,62 @@ impl Nip34WritePolicy { self.ctx.set_local_relay(relay); } + /// Extract repository identifier from event's 'd' tag. + /// + /// Used for structured logging when parsing fails - we try to extract + /// the identifier even if full parsing failed. + fn extract_identifier_from_event(event: &Event) -> String { + use nostr_relay_builder::prelude::TagKind; + event + .tags + .iter() + .find(|t| t.kind() == TagKind::d()) + .and_then(|t| t.content()) + .map(|s| s.to_string()) + .unwrap_or_else(|| "unknown".to_string()) + } + + /// Extract ALL repository identifiers from PR event's 'a' tags. + /// + /// PR events can reference multiple repositories via multiple 'a' tags + /// (e.g., when there are multiple maintainers). Each tag has format + /// `30617::`. + /// + /// Returns a vector of unique identifiers, or `["unknown"]` if none found. + fn extract_repos_from_pr_event(event: &Event) -> Vec { + let repos: Vec = event + .tags + .iter() + .filter_map(|tag| { + let tag_vec = tag.clone().to_vec(); + if tag_vec.len() >= 2 && tag_vec[0] == "a" && tag_vec[1].starts_with("30617:") { + // Format: 30617:: + let parts: Vec<&str> = tag_vec[1].split(':').collect(); + if parts.len() >= 3 { + Some(parts[2].to_string()) + } else { + None + } + } else { + None + } + }) + .collect(); + + // Deduplicate while preserving order + let mut seen = std::collections::HashSet::new(); + let unique_repos: Vec = repos + .into_iter() + .filter(|r| seen.insert(r.clone())) + .collect(); + + if unique_repos.is_empty() { + vec!["unknown".to_string()] + } else { + unique_repos + } + } + /// Handle repository announcement event async fn handle_announcement(&self, event: &Event) -> WritePolicyResult { let event_id_str = event.id.to_bech32().unwrap_or_else(|_| event.id.to_hex()); @@ -129,10 +185,18 @@ impl Nip34WritePolicy { WritePolicyResult::Accept } Err(e) => { + let npub = event.pubkey.to_bech32().unwrap_or_else(|_| event.pubkey.to_hex()); + let event_id_short = &event.id.to_hex()[..12]; + // Try to extract repo identifier from 'd' tag even if parsing failed + let repo = Self::extract_identifier_from_event(event); + // Structured log for migration scripts tracing::warn!( - "Failed to parse repository announcement {}: {}", - event_id_str, - e + "[PARSE_FAIL] kind={} event_id={}... reason=\"{}\" repo={} npub={}", + event.kind.as_u16(), + event_id_short, + e, + repo, + npub ); WritePolicyResult::reject(format!("Failed to parse announcement: {}", e)) } @@ -157,10 +221,18 @@ impl Nip34WritePolicy { WritePolicyResult::Accept } Err(e) => { + let npub = event.pubkey.to_bech32().unwrap_or_else(|_| event.pubkey.to_hex()); + let event_id_short = &event.id.to_hex()[..12]; + // Try to extract repo identifier from 'd' tag even if parsing failed + let repo = Self::extract_identifier_from_event(event); + // Structured log for migration scripts tracing::warn!( - "Failed to parse maintainer announcement {}: {}", - event_id_str, - e + "[PARSE_FAIL] kind={} event_id={}... reason=\"{}\" repo={} npub={}", + event.kind.as_u16(), + event_id_short, + e, + repo, + npub ); WritePolicyResult::reject(format!("Failed to parse announcement: {}", e)) } @@ -183,8 +255,6 @@ impl Nip34WritePolicy { /// * `event` - The state event to validate /// * `is_synced` - True if this event came from proactive sync (vs user-submitted) async fn handle_state(&self, event: &Event, is_synced: bool) -> WritePolicyResult { - let event_id_str = event.id.to_bech32().unwrap_or_else(|_| event.id.to_hex()); - match self.state_policy.validate(event) { StateResult::Accept => { // Process state alignment asynchronously @@ -195,7 +265,19 @@ impl Nip34WritePolicy { { Ok(poilicy_result) => poilicy_result, Err(e) => { - tracing::warn!("Failed to process state event {}: {}", event_id_str, e); + let npub = event.pubkey.to_bech32().unwrap_or_else(|_| event.pubkey.to_hex()); + let event_id_short = &event.id.to_hex()[..12]; + // Try to extract repo identifier from 'd' tag even if parsing failed + let repo = Self::extract_identifier_from_event(event); + // Structured log for migration scripts + tracing::warn!( + "[PARSE_FAIL] kind={} event_id={}... reason=\"{}\" repo={} npub={}", + event.kind.as_u16(), + event_id_short, + e, + repo, + npub + ); // reject if processing failed WritePolicyResult::Reject { status: false, @@ -205,7 +287,19 @@ impl Nip34WritePolicy { } } StateResult::Reject(reason) => { - tracing::warn!("Rejected repository state {}: {}", event_id_str, reason); + let npub = event.pubkey.to_bech32().unwrap_or_else(|_| event.pubkey.to_hex()); + let event_id_short = &event.id.to_hex()[..12]; + // Try to extract repo identifier from 'd' tag even if parsing failed + let repo = Self::extract_identifier_from_event(event); + // Structured log for migration scripts + tracing::warn!( + "[PARSE_FAIL] kind={} event_id={}... reason=\"{}\" repo={} npub={}", + event.kind.as_u16(), + event_id_short, + reason, + repo, + npub + ); WritePolicyResult::reject(reason) } } @@ -323,11 +417,22 @@ impl Nip34WritePolicy { } Err(e) => { // Error checking git data - reject event - tracing::warn!( - "Failed to check git data for PR event {}: {}", - event_id_str, - e - ); + let npub = event.pubkey.to_bech32().unwrap_or_else(|_| event.pubkey.to_hex()); + let event_id_short = &event.id.to_hex()[..12]; + // Extract ALL repo identifiers from 'a' tags for PR events + // (PR events can reference multiple repos when there are multiple maintainers) + let repos = Self::extract_repos_from_pr_event(event); + // Structured log for migration scripts - log once per repo + for repo in &repos { + tracing::warn!( + "[PARSE_FAIL] kind={} event_id={}... reason=\"git data check failed: {}\" repo={} npub={}", + event.kind.as_u16(), + event_id_short, + e, + repo, + npub + ); + } WritePolicyResult::reject(format!("Failed to check git data: {}", e)) } } diff --git a/src/purgatory/mod.rs b/src/purgatory/mod.rs index 47798a6..8b75351 100644 --- a/src/purgatory/mod.rs +++ b/src/purgatory/mod.rs @@ -21,6 +21,7 @@ pub use types::{PrPurgatoryEntry, RefPair, RefUpdate, StatePurgatoryEntry}; use dashmap::DashMap; use nostr_sdk::prelude::*; +use nostr_sdk::ToBech32; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::collections::HashSet; @@ -608,6 +609,9 @@ impl Purgatory { /// prevent infinite re-sync loops. Events that expire without finding git data /// will be filtered out during future negentropy/REQ sync operations. /// + /// Emits structured `[PURGATORY_EXPIRED]` log entries for each expired event + /// to support migration scripts and operational monitoring. + /// /// # Returns /// Tuple of (num_state_removed, num_pr_removed) pub fn cleanup(&self) -> (usize, usize) { @@ -615,18 +619,24 @@ impl Purgatory { let mut state_removed = 0; // Remove expired state events and mark them as expired - self.state_events.retain(|_, entries| { + self.state_events.retain(|identifier, entries| { let original_len = entries.len(); - // Collect event IDs before removing - let expired_ids: Vec = entries - .iter() - .filter(|entry| entry.expires_at <= now) - .map(|entry| entry.event.id) - .collect(); - // Mark as expired to prevent re-sync - for event_id in expired_ids { - self.mark_expired(event_id); + // Log and collect expired entries before removing + for entry in entries.iter().filter(|e| e.expires_at <= now) { + let npub = entry.author.to_bech32().unwrap_or_else(|_| entry.author.to_hex()); + let event_id_short = &entry.event.id.to_hex()[..12]; + + // Structured log for migration scripts + tracing::warn!( + "[PURGATORY_EXPIRED] repo={} npub={} event_id={}... kind={} reason=\"git data not received within 30 minutes\"", + identifier, + npub, + event_id_short, + entry.event.kind.as_u16() + ); + + self.mark_expired(entry.event.id); } // Remove expired entries @@ -636,21 +646,80 @@ impl Purgatory { }); // Remove expired PR events and mark them as expired - let expired_prs: Vec<(String, Option)> = self + let expired_prs: Vec<_> = self .pr_events .iter() .filter(|entry| entry.value().expires_at <= now) .map(|entry| { - let event_id = entry.value().event.as_ref().map(|e| e.id); - (entry.key().clone(), event_id) + let pr_entry = entry.value(); + let event_id_str = entry.key().clone(); + let event_opt = pr_entry.event.clone(); + let commit = pr_entry.commit.clone(); + (event_id_str, event_opt, commit) }) .collect(); let pr_removed = expired_prs.len(); - for (event_id_str, event_id_opt) in expired_prs { - // Mark actual PR events as expired (not placeholders) - if let Some(event_id) = event_id_opt { - self.mark_expired(event_id); + for (event_id_str, event_opt, commit) in expired_prs { + // Log structured entry for PR events (not placeholders) + if let Some(ref event) = event_opt { + let npub = event.pubkey.to_bech32().unwrap_or_else(|_| event.pubkey.to_hex()); + let event_id_short = &event.id.to_hex()[..12]; + + // Extract ALL repo identifiers from 'a' tags + // (PR events can reference multiple repos when there are multiple maintainers) + let repos: Vec = event + .tags + .iter() + .filter_map(|tag| { + let tag_vec = tag.clone().to_vec(); + if tag_vec.len() >= 2 && tag_vec[0] == "a" && tag_vec[1].starts_with("30617:") { + // Format: 30617:: + let parts: Vec<&str> = tag_vec[1].split(':').collect(); + if parts.len() >= 3 { + Some(parts[2].to_string()) + } else { + None + } + } else { + None + } + }) + .collect(); + + // Deduplicate while preserving order + let mut seen = std::collections::HashSet::new(); + let unique_repos: Vec = repos + .into_iter() + .filter(|r| seen.insert(r.clone())) + .collect(); + + let repos_to_log = if unique_repos.is_empty() { + vec!["unknown".to_string()] + } else { + unique_repos + }; + + // Structured log for migration scripts - log once per repo + for repo in &repos_to_log { + tracing::warn!( + "[PURGATORY_EXPIRED] repo={} npub={} event_id={}... kind={} commit={} reason=\"git data not received within 30 minutes\"", + repo, + npub, + event_id_short, + event.kind.as_u16(), + &commit[..commit.len().min(12)] + ); + } + + self.mark_expired(event.id); + } else { + // Placeholder (git data arrived first, but PR event never came) + tracing::debug!( + "[PURGATORY_EXPIRED] placeholder event_id={} commit={} reason=\"PR event not received within 30 minutes\"", + &event_id_str[..event_id_str.len().min(12)], + &commit[..commit.len().min(12)] + ); } self.pr_events.remove(&event_id_str); } -- cgit v1.2.3 From 323336c84613b74921ebc75bf46ccd2d8f9cd6cc Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 11:41:12 +0000 Subject: Add Phase 5 migration script for final classification - Combines all data sources from Phases 1-4 - Produces three actionable outputs: no-action, action-required, manual-investigation - Generates comprehensive summary with recommendations - Handles missing Phase 4 logs gracefully - Classification logic for migration decision-making --- .../migration-scripts/40-classify-actions.sh | 770 +++++++++++++++++++++ 1 file changed, 770 insertions(+) create mode 100755 docs/how-to/migration-scripts/40-classify-actions.sh diff --git a/docs/how-to/migration-scripts/40-classify-actions.sh b/docs/how-to/migration-scripts/40-classify-actions.sh new file mode 100755 index 0000000..9fc718f --- /dev/null +++ b/docs/how-to/migration-scripts/40-classify-actions.sh @@ -0,0 +1,770 @@ +#!/usr/bin/env bash +# +# 40-classify-actions.sh - Final classification of repos for migration action +# +# PHASE 5 of the ngit-relay to ngit-grasp migration analysis pipeline. +# Combines all data sources from previous phases to produce actionable results. +# +# USAGE: +# ./40-classify-actions.sh +# +# EXAMPLES: +# ./40-classify-actions.sh work/migration-analysis-20260122-1430 +# +# INPUT DIRECTORY STRUCTURE: +# / +# ├── prod/ +# │ ├── raw/ +# │ │ └── deletions.json # Phase 1: kind 5 deletion events +# │ ├── category1-complete-match.txt # Phase 3: complete git sync +# │ ├── category2-empty-blank.txt # Phase 3: no git data +# │ ├── category3-partial-match.txt # Phase 3: partial git sync +# │ └── category4-no-match.txt # Phase 3: git exists, refs don't match +# ├── archive/ +# │ ├── raw/ +# │ │ └── deletions.json +# │ ├── category1-complete-match.txt +# │ ├── category2-empty-blank.txt +# │ ├── category3-partial-match.txt +# │ └── category4-no-match.txt +# ├── comparison/ +# │ ├── complete-in-both.txt # Phase 3: no action needed +# │ ├── complete-prod-missing-archive.txt # Phase 3: needs investigation +# │ ├── complete-prod-incomplete-archive.txt # Phase 3: sync in progress? +# │ ├── incomplete-in-both.txt # Phase 3: git incomplete +# │ └── in-archive-not-prod.txt # Phase 3: deleted or new +# └── logs/ +# ├── parse-failures.txt # Phase 4: events that failed to parse +# └── purgatory-expired.txt # Phase 4: repos that expired from purgatory +# +# OUTPUT: +# /results/ +# ├── no-action-required.txt # Repos that are fine as-is +# ├── action-required.txt # Repos needing intervention +# ├── manual-investigation.txt # Repos needing human review +# └── summary.txt # Human-readable summary +# +# OUTPUT FORMATS: +# no-action-required.txt: +# repo | npub | reason +# +# action-required.txt: +# repo | npub | reason | suggested_action +# +# manual-investigation.txt: +# repo | npub | reason | context +# +# CLASSIFICATION LOGIC: +# +# NO ACTION REQUIRED: +# - Complete in both prod and archive (successfully migrated) +# - Empty/blank in both (user never pushed any data) +# - Deleted by user (kind 5 deletion event exists) +# - In purgatory expiry logs (system already handled it) +# +# ACTION REQUIRED: +# - Complete in prod, missing from archive → Re-sync needed +# - Complete in prod, incomplete in archive → Wait for sync or re-trigger +# - Partial match in prod → Investigate why refs don't match +# - No match (category 4) → Investigate git data corruption +# - Parse failures → Fix event format or re-announce +# +# MANUAL INVESTIGATION: +# - Conflicting states (e.g., complete in prod but parse failure logged) +# - In archive but not prod (deleted? or new announcement?) +# - Multiple issues for same repo +# - Unexpected state combinations +# +# PREREQUISITES: +# - jq (for parsing JSON) +# - awk, sort, comm (standard Unix tools) +# +# RUNTIME: < 5 seconds (local processing only) +# +# SEE ALSO: +# docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide +# 01-fetch-events.sh - Phase 1 (fetch events) +# 10-check-git-sync.sh - Phase 2 (git sync check) +# 20-categorize.sh, 21-compare-relays.sh - Phase 3 (categorize and compare) +# 30-extract-parse-failures.sh, 31-extract-purgatory-expiry.sh - Phase 4 (logs) +# + +set -euo pipefail + +# Colors for output (disabled if not a terminal) +if [[ -t 1 ]]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[0;33m' + BLUE='\033[0;34m' + BOLD='\033[1m' + NC='\033[0m' +else + RED='' + GREEN='' + YELLOW='' + BLUE='' + BOLD='' + NC='' +fi + +log_info() { + echo -e "${BLUE}[INFO]${NC} $*" >&2 +} + +log_success() { + echo -e "${GREEN}[OK]${NC} $*" >&2 +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $*" >&2 +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +usage() { + echo "Usage: $0 " + echo "" + echo "Arguments:" + echo " analysis-dir Directory containing Phase 1-4 output" + echo "" + echo "Examples:" + echo " $0 work/migration-analysis-20260122-1430" + echo "" + echo "Required input structure:" + echo " /prod/category*.txt" + echo " /archive/category*.txt" + echo " /comparison/*.txt" + echo " /logs/*.txt (optional)" + echo " /prod/raw/deletions.json" + echo "" + echo "Output:" + echo " /results/no-action-required.txt" + echo " /results/action-required.txt" + echo " /results/manual-investigation.txt" + echo " /results/summary.txt" + exit 1 +} + +# Extract repo|npub key from category line +# Input: "repo | npub | state_refs=N | ..." +# Output: "repo|npub" +extract_key() { + awk -F' \\| ' '{print $1 "|" $2}' +} + +# Extract repo from category line +# Input: "repo | npub | ..." +# Output: "repo" +extract_repo() { + awk -F' \\| ' '{print $1}' +} + +# Extract npub from category line +# Input: "repo | npub | ..." +# Output: "npub" +extract_npub() { + awk -F' \\| ' '{print $2}' +} + +# Check if a file exists and has content (ignoring comment lines) +file_has_content() { + local file="$1" + if [[ ! -f "$file" ]]; then + return 1 + fi + # Check for non-comment, non-empty lines + grep -v '^#' "$file" 2>/dev/null | grep -q '.' 2>/dev/null +} + +# Count non-comment lines in a file +count_lines() { + local file="$1" + if [[ ! -f "$file" ]]; then + echo "0" + return + fi + local count + count=$(grep -v '^#' "$file" 2>/dev/null | grep -c '.' 2>/dev/null) || count=0 + # Ensure we return a clean integer + echo "${count:-0}" +} + +# Parse deletions.json to extract deleted repo identifiers +# Kind 5 events have "e" tags pointing to the deleted event +# We need to cross-reference with announcements to get repo/npub +# For now, we extract the pubkey and any "a" tags (addressable event references) +parse_deletions() { + local deletions_file="$1" + local output_file="$2" + + if [[ ! -f "$deletions_file" ]]; then + touch "$output_file" + return + fi + + # Extract deletion targets from kind 5 events + # Kind 5 events can reference: + # - "e" tag: specific event ID + # - "a" tag: addressable event (kind:pubkey:identifier) + # For 30617 announcements, "a" tag format is: 30617:: + jq -r ' + select(.kind == 5) | + .pubkey as $pubkey | + .tags[] | + select(.[0] == "a") | + .[1] | + split(":") | + select(.[0] == "30617") | + "\(.[2])|\($pubkey)" + ' "$deletions_file" 2>/dev/null | sort -u > "$output_file" || touch "$output_file" +} + +# Build a lookup set from a file (repo|npub format) +# Returns keys one per line +build_key_set() { + local file="$1" + if [[ ! -f "$file" ]]; then + return 0 + fi + # Use || true to prevent pipefail from exiting on empty grep + { grep -v '^#' "$file" 2>/dev/null || true; } | extract_key | sort -u +} + +# Main classification logic +main() { + if [[ $# -ne 1 ]]; then + usage + fi + + local analysis_dir="$1" + + # Validate input directory + if [[ ! -d "$analysis_dir" ]]; then + log_error "Analysis directory not found: $analysis_dir" + exit 1 + fi + + # Check for required subdirectories + local prod_dir="$analysis_dir/prod" + local archive_dir="$analysis_dir/archive" + local comparison_dir="$analysis_dir/comparison" + local logs_dir="$analysis_dir/logs" + local results_dir="$analysis_dir/results" + + for dir in "$prod_dir" "$archive_dir" "$comparison_dir"; do + if [[ ! -d "$dir" ]]; then + log_error "Required directory not found: $dir" + log_error "Run Phases 1-3 first to generate input data." + exit 1 + fi + done + + # Check for required category files + if [[ ! -f "$prod_dir/category1-complete-match.txt" ]]; then + log_error "Missing category files in $prod_dir" + log_error "Run Phase 3 (20-categorize.sh) first." + exit 1 + fi + + log_info "Starting final classification" + log_info "Analysis directory: $analysis_dir" + + # Create output directory + mkdir -p "$results_dir" + + # Create temp directory for intermediate files + local tmp_dir + tmp_dir=$(mktemp -d) + # shellcheck disable=SC2064 + trap "rm -rf '$tmp_dir'" EXIT + + # Initialize output files + local no_action="$results_dir/no-action-required.txt" + local action_req="$results_dir/action-required.txt" + local manual_inv="$results_dir/manual-investigation.txt" + local summary="$results_dir/summary.txt" + + # Write headers + { + echo "# No Action Required - Repos that are fine as-is" + echo "# Generated: $(date -Iseconds)" + echo "# Format: repo | npub | reason" + echo "#" + } > "$no_action" + + { + echo "# Action Required - Repos needing intervention" + echo "# Generated: $(date -Iseconds)" + echo "# Format: repo | npub | reason | suggested_action" + echo "#" + } > "$action_req" + + { + echo "# Manual Investigation Required - Repos needing human review" + echo "# Generated: $(date -Iseconds)" + echo "# Format: repo | npub | reason | context" + echo "#" + } > "$manual_inv" + + # ========================================================================= + # STEP 1: Parse deletion events + # ========================================================================= + log_info "Parsing deletion events..." + + parse_deletions "$prod_dir/raw/deletions.json" "$tmp_dir/prod_deletions.txt" + parse_deletions "$archive_dir/raw/deletions.json" "$tmp_dir/archive_deletions.txt" + + # Combine deletions (union of both) + cat "$tmp_dir/prod_deletions.txt" "$tmp_dir/archive_deletions.txt" 2>/dev/null | sort -u > "$tmp_dir/all_deletions.txt" + + local deletion_count + deletion_count=$(wc -l < "$tmp_dir/all_deletions.txt" | tr -d ' ') + log_info "Found $deletion_count deletion requests" + + # ========================================================================= + # STEP 2: Parse log-based categories (Phase 4) + # ========================================================================= + log_info "Parsing log-based categories..." + + # Parse failures: reponpubkindevent_idreason + if [[ -f "$logs_dir/parse-failures.txt" ]] && file_has_content "$logs_dir/parse-failures.txt"; then + grep -v '^#' "$logs_dir/parse-failures.txt" | awk -F'\t' '{print $1 "|" $2}' | sort -u > "$tmp_dir/parse_failures.txt" + log_info "Found $(wc -l < "$tmp_dir/parse_failures.txt" | tr -d ' ') parse failure entries" + else + touch "$tmp_dir/parse_failures.txt" + log_info "No parse failures found (logs may be empty or not yet generated)" + fi + + # Purgatory expired: reponpubtimestampreason + if [[ -f "$logs_dir/purgatory-expired.txt" ]] && file_has_content "$logs_dir/purgatory-expired.txt"; then + grep -v '^#' "$logs_dir/purgatory-expired.txt" | awk -F'\t' '{print $1 "|" $2}' | sort -u > "$tmp_dir/purgatory_expired.txt" + log_info "Found $(wc -l < "$tmp_dir/purgatory_expired.txt" | tr -d ' ') purgatory expiry entries" + else + touch "$tmp_dir/purgatory_expired.txt" + log_info "No purgatory expiry entries found (logs may be empty or not yet generated)" + fi + + # ========================================================================= + # STEP 3: Build lookup tables from category files + # ========================================================================= + log_info "Building lookup tables..." + + # Build key sets for each category (prod) + build_key_set "$prod_dir/category1-complete-match.txt" > "$tmp_dir/prod_cat1.txt" + build_key_set "$prod_dir/category2-empty-blank.txt" > "$tmp_dir/prod_cat2.txt" + build_key_set "$prod_dir/category3-partial-match.txt" > "$tmp_dir/prod_cat3.txt" + build_key_set "$prod_dir/category4-no-match.txt" > "$tmp_dir/prod_cat4.txt" + + # Build key sets for each category (archive) + build_key_set "$archive_dir/category1-complete-match.txt" > "$tmp_dir/archive_cat1.txt" + build_key_set "$archive_dir/category2-empty-blank.txt" > "$tmp_dir/archive_cat2.txt" + build_key_set "$archive_dir/category3-partial-match.txt" > "$tmp_dir/archive_cat3.txt" + build_key_set "$archive_dir/category4-no-match.txt" > "$tmp_dir/archive_cat4.txt" + + # All repos in prod + cat "$tmp_dir"/prod_cat*.txt 2>/dev/null | sort -u > "$tmp_dir/all_prod.txt" || true + + # All repos in archive + cat "$tmp_dir"/archive_cat*.txt 2>/dev/null | sort -u > "$tmp_dir/all_archive.txt" || true + + # ========================================================================= + # STEP 4: Process comparison files and apply classification + # ========================================================================= + log_info "Applying classification logic..." + + # Track processed repos to detect duplicates/conflicts + > "$tmp_dir/processed.txt" + + # Counters + local count_no_action=0 + local count_action=0 + local count_manual=0 + + # --- NO ACTION: Complete in both --- + if [[ -f "$comparison_dir/complete-in-both.txt" ]]; then + while IFS= read -r line; do + [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue + + repo=$(echo "$line" | extract_repo) + npub=$(echo "$line" | extract_npub) + key="${repo}|${npub}" + + # Check if deleted (still no action, but different reason) + if grep -qF "$key" "$tmp_dir/all_deletions.txt" 2>/dev/null; then + echo "$repo | $npub | deleted by user (also complete in both)" >> "$no_action" + else + echo "$repo | $npub | complete in both prod and archive" >> "$no_action" + fi + echo "$key" >> "$tmp_dir/processed.txt" + ((count_no_action++)) || true + done < "$comparison_dir/complete-in-both.txt" + fi + + # --- NO ACTION: Deleted by user (not already processed) --- + while IFS='|' read -r repo npub; do + [[ -z "$repo" ]] && continue + key="${repo}|${npub}" + + # Skip if already processed + if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then + continue + fi + + # Convert pubkey to npub if needed (deletions use hex pubkey) + # For now, just use the pubkey as-is since we're matching by repo + echo "$repo | $npub | deleted by user" >> "$no_action" + echo "$key" >> "$tmp_dir/processed.txt" + ((count_no_action++)) || true + done < "$tmp_dir/all_deletions.txt" + + # --- NO ACTION: Empty/blank in both --- + # Find repos that are category 2 in both prod and archive + comm -12 "$tmp_dir/prod_cat2.txt" "$tmp_dir/archive_cat2.txt" 2>/dev/null | while IFS='|' read -r repo npub; do + [[ -z "$repo" ]] && continue + key="${repo}|${npub}" + + if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then + continue + fi + + echo "$repo | $npub | empty/blank in both (user never pushed)" >> "$no_action" + echo "$key" >> "$tmp_dir/processed.txt" + done + + # --- NO ACTION: Purgatory expired (system handled it) --- + while IFS='|' read -r repo npub; do + [[ -z "$repo" ]] && continue + key="${repo}|${npub}" + + if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then + continue + fi + + echo "$repo | $npub | purgatory expired (system already handled)" >> "$no_action" + echo "$key" >> "$tmp_dir/processed.txt" + ((count_no_action++)) || true + done < "$tmp_dir/purgatory_expired.txt" + + # --- ACTION REQUIRED: Complete in prod, missing from archive --- + if [[ -f "$comparison_dir/complete-prod-missing-archive.txt" ]]; then + while IFS= read -r line; do + [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue + + repo=$(echo "$line" | extract_repo) + npub=$(echo "$line" | extract_npub) + key="${repo}|${npub}" + + if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then + continue + fi + + # Check for parse failure + if grep -qF "$key" "$tmp_dir/parse_failures.txt" 2>/dev/null; then + echo "$repo | $npub | complete in prod, missing from archive, parse failure logged | investigate parse failure, may need re-announcement" >> "$manual_inv" + echo "$key" >> "$tmp_dir/processed.txt" + ((count_manual++)) || true + else + echo "$repo | $npub | complete in prod, missing from archive | trigger re-sync or investigate why not archived" >> "$action_req" + echo "$key" >> "$tmp_dir/processed.txt" + ((count_action++)) || true + fi + done < "$comparison_dir/complete-prod-missing-archive.txt" + fi + + # --- ACTION REQUIRED: Complete in prod, incomplete in archive --- + if [[ -f "$comparison_dir/complete-prod-incomplete-archive.txt" ]]; then + while IFS= read -r line; do + [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue + + repo=$(echo "$line" | extract_repo) + npub=$(echo "$line" | extract_npub) + key="${repo}|${npub}" + + if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then + continue + fi + + # Extract archive status from line + archive_status=$(echo "$line" | grep -oP 'archive=\K[^ ]+' || echo "unknown") + + echo "$repo | $npub | complete in prod, $archive_status in archive | wait for sync to complete or trigger re-sync" >> "$action_req" + echo "$key" >> "$tmp_dir/processed.txt" + ((count_action++)) || true + done < "$comparison_dir/complete-prod-incomplete-archive.txt" + fi + + # --- ACTION REQUIRED: Incomplete in both --- + if [[ -f "$comparison_dir/incomplete-in-both.txt" ]]; then + while IFS= read -r line; do + [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue + + repo=$(echo "$line" | extract_repo) + npub=$(echo "$line" | extract_npub) + key="${repo}|${npub}" + + if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then + continue + fi + + # Extract statuses + prod_status=$(echo "$line" | grep -oP 'prod=\K[^ ]+' | tr -d '|' || echo "unknown") + archive_status=$(echo "$line" | grep -oP 'archive=\K[^ ]+' || echo "unknown") + + echo "$repo | $npub | incomplete in both (prod=$prod_status, archive=$archive_status) | investigate git data source, may need user to re-push" >> "$action_req" + echo "$key" >> "$tmp_dir/processed.txt" + ((count_action++)) || true + done < "$comparison_dir/incomplete-in-both.txt" + fi + + # --- MANUAL INVESTIGATION: In archive but not prod --- + if [[ -f "$comparison_dir/in-archive-not-prod.txt" ]]; then + while IFS= read -r line; do + [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue + + repo=$(echo "$line" | extract_repo) + npub=$(echo "$line" | extract_npub) + key="${repo}|${npub}" + + if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then + continue + fi + + archive_status=$(echo "$line" | grep -oP 'archive=\K[^ ]+' || echo "unknown") + + # Check if it was deleted + if grep -qF "$key" "$tmp_dir/all_deletions.txt" 2>/dev/null; then + echo "$repo | $npub | in archive not prod, deletion exists | verify deletion was intentional" >> "$manual_inv" + else + echo "$repo | $npub | in archive ($archive_status) but not in prod | may be new announcement or deleted from prod" >> "$manual_inv" + fi + echo "$key" >> "$tmp_dir/processed.txt" + ((count_manual++)) || true + done < "$comparison_dir/in-archive-not-prod.txt" + fi + + # --- ACTION REQUIRED: Parse failures not yet processed --- + while IFS='|' read -r repo npub; do + [[ -z "$repo" ]] && continue + key="${repo}|${npub}" + + if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then + continue + fi + + echo "$repo | $npub | parse failure logged | fix event format or request user to re-announce" >> "$action_req" + echo "$key" >> "$tmp_dir/processed.txt" + ((count_action++)) || true + done < "$tmp_dir/parse_failures.txt" + + # --- MANUAL INVESTIGATION: Prod category 3/4 not yet processed --- + for cat_file in "$tmp_dir/prod_cat3.txt" "$tmp_dir/prod_cat4.txt"; do + [[ ! -f "$cat_file" ]] && continue + cat_name=$(basename "$cat_file" .txt | sed 's/prod_//') + while IFS='|' read -r repo npub; do + [[ -z "$repo" ]] && continue + key="${repo}|${npub}" + + if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then + continue + fi + + if [[ "$cat_name" == "cat3" ]]; then + echo "$repo | $npub | partial match in prod, not in comparison results | investigate git ref mismatch" >> "$manual_inv" + else + echo "$repo | $npub | no match in prod (git exists but refs don't match) | investigate git data corruption" >> "$manual_inv" + fi + echo "$key" >> "$tmp_dir/processed.txt" + ((count_manual++)) || true + done < "$cat_file" + done + + # ========================================================================= + # STEP 5: Count final results + # ========================================================================= + count_no_action=$(count_lines "$no_action") + count_action=$(count_lines "$action_req") + count_manual=$(count_lines "$manual_inv") + + # Ensure counts are valid integers + count_no_action=${count_no_action:-0} + count_action=${count_action:-0} + count_manual=${count_manual:-0} + + local total=$((count_no_action + count_action + count_manual)) + + # Handle division by zero + if [[ $total -eq 0 ]]; then + total=1 # Avoid division by zero in percentage calculations + log_warn "No repos were classified. Check input files." + fi + + # ========================================================================= + # STEP 6: Generate summary + # ========================================================================= + log_info "Generating summary..." + + cat > "$summary" << EOF +# Migration Classification Summary +Generated: $(date -Iseconds) +Analysis Directory: $analysis_dir + +## Overview + +| Category | Count | Percentage | +|----------|-------|------------| +| No Action Required | $count_no_action | $(awk "BEGIN {printf \"%.1f\", ($count_no_action/$total)*100}")% | +| Action Required | $count_action | $(awk "BEGIN {printf \"%.1f\", ($count_action/$total)*100}")% | +| Manual Investigation | $count_manual | $(awk "BEGIN {printf \"%.1f\", ($count_manual/$total)*100}")% | +| **Total** | **$total** | **100%** | + +## No Action Required ($count_no_action repos) + +These repositories are ready for migration or don't need migration: + +EOF + + # Breakdown of no-action reasons + echo "| Reason | Count |" >> "$summary" + echo "|--------|-------|" >> "$summary" + grep -v '^#' "$no_action" 2>/dev/null | awk -F' \\| ' '{print $3}' | sort | uniq -c | sort -rn > "$tmp_dir/no_action_breakdown.txt" || true + while read -r cnt reason; do + echo "| $reason | $cnt |" >> "$summary" + done < "$tmp_dir/no_action_breakdown.txt" + + cat >> "$summary" << EOF + +## Action Required ($count_action repos) + +These repositories need intervention before migration: + +EOF + + # Breakdown of action reasons + echo "| Reason | Count | Suggested Action |" >> "$summary" + echo "|--------|-------|------------------|" >> "$summary" + grep -v '^#' "$action_req" 2>/dev/null | awk -F' \\| ' '{print $3 "|" $4}' | sort | uniq -c | sort -rn > "$tmp_dir/action_breakdown.txt" || true + while read -r cnt reason_action; do + reason=$(echo "$reason_action" | cut -d'|' -f1) + action=$(echo "$reason_action" | cut -d'|' -f2) + echo "| $reason | $cnt | $action |" >> "$summary" + done < "$tmp_dir/action_breakdown.txt" + + cat >> "$summary" << EOF + +## Manual Investigation ($count_manual repos) + +These repositories have conflicting or unexpected states requiring human review: + +EOF + + # Breakdown of manual investigation reasons + echo "| Reason | Count |" >> "$summary" + echo "|--------|-------|" >> "$summary" + grep -v '^#' "$manual_inv" 2>/dev/null | awk -F' \\| ' '{print $3}' | sort | uniq -c | sort -rn > "$tmp_dir/manual_breakdown.txt" || true + while read -r cnt reason; do + echo "| $reason | $cnt |" >> "$summary" + done < "$tmp_dir/manual_breakdown.txt" + + # Pre-compute counts from temp files before they might be cleaned up + local prod_del_count archive_del_count + local prod_cat1_count prod_cat2_count prod_cat3_count prod_cat4_count + local archive_cat1_count archive_cat2_count archive_cat3_count archive_cat4_count + local parse_fail_count purgatory_count + + prod_del_count=$(wc -l < "$tmp_dir/prod_deletions.txt" 2>/dev/null | tr -d ' ') || prod_del_count=0 + archive_del_count=$(wc -l < "$tmp_dir/archive_deletions.txt" 2>/dev/null | tr -d ' ') || archive_del_count=0 + prod_cat1_count=$(wc -l < "$tmp_dir/prod_cat1.txt" 2>/dev/null | tr -d ' ') || prod_cat1_count=0 + prod_cat2_count=$(wc -l < "$tmp_dir/prod_cat2.txt" 2>/dev/null | tr -d ' ') || prod_cat2_count=0 + prod_cat3_count=$(wc -l < "$tmp_dir/prod_cat3.txt" 2>/dev/null | tr -d ' ') || prod_cat3_count=0 + prod_cat4_count=$(wc -l < "$tmp_dir/prod_cat4.txt" 2>/dev/null | tr -d ' ') || prod_cat4_count=0 + archive_cat1_count=$(wc -l < "$tmp_dir/archive_cat1.txt" 2>/dev/null | tr -d ' ') || archive_cat1_count=0 + archive_cat2_count=$(wc -l < "$tmp_dir/archive_cat2.txt" 2>/dev/null | tr -d ' ') || archive_cat2_count=0 + archive_cat3_count=$(wc -l < "$tmp_dir/archive_cat3.txt" 2>/dev/null | tr -d ' ') || archive_cat3_count=0 + archive_cat4_count=$(wc -l < "$tmp_dir/archive_cat4.txt" 2>/dev/null | tr -d ' ') || archive_cat4_count=0 + parse_fail_count=$(wc -l < "$tmp_dir/parse_failures.txt" 2>/dev/null | tr -d ' ') || parse_fail_count=0 + purgatory_count=$(wc -l < "$tmp_dir/purgatory_expired.txt" 2>/dev/null | tr -d ' ') || purgatory_count=0 + + cat >> "$summary" << EOF + +## Input Data Summary + +### Phase 1 (Events) +- Prod deletions: $prod_del_count +- Archive deletions: $archive_del_count + +### Phase 3 (Categories) +**Prod:** +- Category 1 (complete): $prod_cat1_count +- Category 2 (empty): $prod_cat2_count +- Category 3 (partial): $prod_cat3_count +- Category 4 (no match): $prod_cat4_count + +**Archive:** +- Category 1 (complete): $archive_cat1_count +- Category 2 (empty): $archive_cat2_count +- Category 3 (partial): $archive_cat3_count +- Category 4 (no match): $archive_cat4_count + +### Phase 4 (Logs) +- Parse failures: $parse_fail_count +- Purgatory expired: $purgatory_count + +## Recommended Next Steps + +1. **Review action-required.txt** - Address these repos before migration +2. **Review manual-investigation.txt** - Investigate unusual states +3. **Verify no-action-required.txt** - Spot-check a few repos to confirm +4. **Plan migration window** - Schedule cutover when action items are resolved + +## Output Files + +- \`results/no-action-required.txt\` - $count_no_action repos ready for migration +- \`results/action-required.txt\` - $count_action repos needing intervention +- \`results/manual-investigation.txt\` - $count_manual repos needing human review +- \`results/summary.txt\` - This summary file +EOF + + # ========================================================================= + # STEP 7: Display results + # ========================================================================= + echo "" + log_info "=== Classification Complete ===" + echo "" + log_success "No Action Required: $count_no_action repos" + log_warn "Action Required: $count_action repos" + log_error "Manual Investigation: $count_manual repos" + echo "" + log_info "Total: $total repos classified" + echo "" + log_info "Output files:" + echo " $no_action" + echo " $action_req" + echo " $manual_inv" + echo " $summary" + echo "" + + # Show top action items + if [[ $count_action -gt 0 ]]; then + log_info "Top action items:" + grep -v '^#' "$action_req" 2>/dev/null | awk -F' \\| ' '{print $3}' | sort | uniq -c | sort -rn | head -5 | while read -r cnt reason; do + echo " - $reason: $cnt repos" + done + echo "" + fi + + # Show top investigation items + if [[ $count_manual -gt 0 ]]; then + log_info "Top investigation items:" + grep -v '^#' "$manual_inv" 2>/dev/null | awk -F' \\| ' '{print $3}' | sort | uniq -c | sort -rn | head -5 | while read -r cnt reason; do + echo " - $reason: $cnt repos" + done + echo "" + fi + + log_info "See $summary for full details and recommended next steps." +} + +main "$@" -- cgit v1.2.3 From 4998ae6e575015f7d1a1cc75f2edd32369967bb2 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 11:44:14 +0000 Subject: Add orchestration script for migration analysis pipeline Adds run-migration-analysis.sh that orchestrates all 5 phases of the migration analysis with: - Parameterized inputs for relay URLs, git paths, and service name - Phase control (skip, only, from-phase options) - Dry-run mode to preview execution - Progress indicators and timing information - Error handling with continue-on-error option - Auto-detection of available features (git paths, journalctl) - Summary display with results overview --- .../migration-scripts/run-migration-analysis.sh | 716 +++++++++++++++++++++ 1 file changed, 716 insertions(+) create mode 100755 docs/how-to/migration-scripts/run-migration-analysis.sh diff --git a/docs/how-to/migration-scripts/run-migration-analysis.sh b/docs/how-to/migration-scripts/run-migration-analysis.sh new file mode 100755 index 0000000..2ba25ab --- /dev/null +++ b/docs/how-to/migration-scripts/run-migration-analysis.sh @@ -0,0 +1,716 @@ +#!/usr/bin/env bash +# +# run-migration-analysis.sh - Orchestrate the complete ngit-relay to ngit-grasp migration analysis +# +# This script runs all 5 phases of the migration analysis pipeline in sequence, +# with proper error handling, progress reporting, and timing information. +# +# QUICK START: +# # Basic usage (local analysis only - Phases 1, 3, 5) +# ./run-migration-analysis.sh --prod-relay wss://relay.ngit.dev --archive-relay wss://archive.relay.ngit.dev +# +# # Full analysis including git sync check (requires VPS access) +# ./run-migration-analysis.sh \ +# --prod-relay wss://relay.ngit.dev \ +# --archive-relay wss://archive.relay.ngit.dev \ +# --prod-git /var/lib/ngit-relay/git \ +# --archive-git /var/lib/ngit-relay-archive/git +# +# USAGE: +# ./run-migration-analysis.sh [options] +# +# REQUIRED OPTIONS: +# --prod-relay Production relay WebSocket URL (e.g., wss://relay.ngit.dev) +# --archive-relay Archive relay WebSocket URL (e.g., wss://archive.relay.ngit.dev) +# +# OPTIONAL OPTIONS: +# --prod-git Git base directory for prod (enables Phase 2) +# --archive-git Git base directory for archive (enables Phase 2) +# --service Systemd service name for log extraction (enables Phase 4) +# --output Output directory (default: work/migration-analysis-YYYYMMDD-HHMM) +# +# PHASE CONTROL: +# --skip-phase-1 Skip event fetching (use existing data) +# --skip-phase-2 Skip git sync check (use existing data) +# --skip-phase-3 Skip categorization (use existing data) +# --skip-phase-4 Skip log extraction (use existing data) +# --skip-phase-5 Skip final classification +# --only-phase-N Run only phase N (1-5) +# --from-phase-N Start from phase N (skip earlier phases) +# +# OTHER OPTIONS: +# --dry-run Show what would be executed without running +# --continue-on-error Continue to next phase even if current phase fails +# --help Show this help message +# +# PHASES: +# Phase 1: Fetch events from both relays (~30s each, local) +# Phase 2: Check git sync status (~20 min each, requires VPS) +# Phase 3: Categorize and compare results (fast, local) +# Phase 4: Extract logs from systemd (requires VPS) +# Phase 5: Final classification (fast, local) +# +# EXAMPLES: +# # Dry run to see what would happen +# ./run-migration-analysis.sh --prod-relay wss://relay.ngit.dev --archive-relay wss://archive.relay.ngit.dev --dry-run +# +# # Run only Phase 1 (fetch events) +# ./run-migration-analysis.sh --prod-relay wss://relay.ngit.dev --archive-relay wss://archive.relay.ngit.dev --only-phase-1 +# +# # Resume from Phase 3 using existing Phase 1-2 data +# ./run-migration-analysis.sh --prod-relay wss://relay.ngit.dev --archive-relay wss://archive.relay.ngit.dev --from-phase-3 --output work/migration-analysis-20260122-1430 +# +# # Full analysis on VPS with all features +# ./run-migration-analysis.sh \ +# --prod-relay wss://relay.ngit.dev \ +# --archive-relay wss://archive.relay.ngit.dev \ +# --prod-git /var/lib/ngit-relay/git \ +# --archive-git /var/lib/ngit-relay-archive/git \ +# --service ngit-grasp.service +# +# SEE ALSO: +# docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide +# + +set -euo pipefail + +# Get script directory for finding other scripts +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Colors for output (disabled if not a terminal) +if [[ -t 1 ]]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[0;33m' + BLUE='\033[0;34m' + CYAN='\033[0;36m' + BOLD='\033[1m' + NC='\033[0m' +else + RED='' + GREEN='' + YELLOW='' + BLUE='' + CYAN='' + BOLD='' + NC='' +fi + +# Logging functions +log_header() { + echo "" + echo -e "${BOLD}${CYAN}════════════════════════════════════════════════════════════════${NC}" + echo -e "${BOLD}${CYAN} $*${NC}" + echo -e "${BOLD}${CYAN}════════════════════════════════════════════════════════════════${NC}" + echo "" +} + +log_phase() { + echo "" + echo -e "${BOLD}${BLUE}┌──────────────────────────────────────────────────────────────┐${NC}" + echo -e "${BOLD}${BLUE}│ $*${NC}" + echo -e "${BOLD}${BLUE}└──────────────────────────────────────────────────────────────┘${NC}" +} + +log_info() { + echo -e "${BLUE}[INFO]${NC} $*" >&2 +} + +log_success() { + echo -e "${GREEN}[OK]${NC} $*" >&2 +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $*" >&2 +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +log_step() { + echo -e "${CYAN} →${NC} $*" >&2 +} + +# Default values +PROD_RELAY="" +ARCHIVE_RELAY="" +PROD_GIT="" +ARCHIVE_GIT="" +SERVICE_NAME="" +OUTPUT_DIR="" +DRY_RUN=false +CONTINUE_ON_ERROR=false + +# Phase control +SKIP_PHASE_1=false +SKIP_PHASE_2=false +SKIP_PHASE_3=false +SKIP_PHASE_4=false +SKIP_PHASE_5=false +ONLY_PHASE="" +FROM_PHASE="" + +# Timing +declare -A PHASE_TIMES + +usage() { + head -80 "$0" | tail -n +3 | sed 's/^# //' | sed 's/^#//' + exit 0 +} + +# Parse command line arguments +parse_args() { + while [[ $# -gt 0 ]]; do + case "$1" in + --prod-relay) + PROD_RELAY="$2" + shift 2 + ;; + --archive-relay) + ARCHIVE_RELAY="$2" + shift 2 + ;; + --prod-git) + PROD_GIT="$2" + shift 2 + ;; + --archive-git) + ARCHIVE_GIT="$2" + shift 2 + ;; + --service) + SERVICE_NAME="$2" + shift 2 + ;; + --output) + OUTPUT_DIR="$2" + shift 2 + ;; + --skip-phase-1) + SKIP_PHASE_1=true + shift + ;; + --skip-phase-2) + SKIP_PHASE_2=true + shift + ;; + --skip-phase-3) + SKIP_PHASE_3=true + shift + ;; + --skip-phase-4) + SKIP_PHASE_4=true + shift + ;; + --skip-phase-5) + SKIP_PHASE_5=true + shift + ;; + --only-phase-1|--only-phase-2|--only-phase-3|--only-phase-4|--only-phase-5) + ONLY_PHASE="${1#--only-phase-}" + shift + ;; + --from-phase-1|--from-phase-2|--from-phase-3|--from-phase-4|--from-phase-5) + FROM_PHASE="${1#--from-phase-}" + shift + ;; + --dry-run) + DRY_RUN=true + shift + ;; + --continue-on-error) + CONTINUE_ON_ERROR=true + shift + ;; + --help|-h) + usage + ;; + *) + log_error "Unknown option: $1" + echo "Use --help for usage information." + exit 1 + ;; + esac + done +} + +# Validate required arguments +validate_args() { + local errors=0 + + if [[ -z "$PROD_RELAY" ]]; then + log_error "Missing required option: --prod-relay" + errors=1 + fi + + if [[ -z "$ARCHIVE_RELAY" ]]; then + log_error "Missing required option: --archive-relay" + errors=1 + fi + + # Validate relay URLs + if [[ -n "$PROD_RELAY" && ! "$PROD_RELAY" =~ ^wss?:// ]]; then + log_error "Invalid prod relay URL: $PROD_RELAY (must start with ws:// or wss://)" + errors=1 + fi + + if [[ -n "$ARCHIVE_RELAY" && ! "$ARCHIVE_RELAY" =~ ^wss?:// ]]; then + log_error "Invalid archive relay URL: $ARCHIVE_RELAY (must start with ws:// or wss://)" + errors=1 + fi + + # Validate git paths if provided + if [[ -n "$PROD_GIT" && ! -d "$PROD_GIT" ]]; then + log_warn "Prod git directory not found: $PROD_GIT" + log_warn "Phase 2 will fail unless running on VPS with access to this path." + fi + + if [[ -n "$ARCHIVE_GIT" && ! -d "$ARCHIVE_GIT" ]]; then + log_warn "Archive git directory not found: $ARCHIVE_GIT" + log_warn "Phase 2 will fail unless running on VPS with access to this path." + fi + + if [[ $errors -eq 1 ]]; then + echo "" + echo "Use --help for usage information." + exit 1 + fi +} + +# Check prerequisites +check_prerequisites() { + local missing=0 + + log_info "Checking prerequisites..." + + # Required tools + for tool in nak jq awk sort; do + if command -v "$tool" &> /dev/null; then + log_step "$tool: found" + else + log_error "$tool: NOT FOUND" + missing=1 + fi + done + + # Optional tools + if command -v journalctl &> /dev/null; then + log_step "journalctl: found (Phase 4 available)" + else + log_step "journalctl: not found (Phase 4 will be skipped)" + SKIP_PHASE_4=true + fi + + if [[ $missing -eq 1 ]]; then + log_error "Missing required tools. Install them and try again." + exit 1 + fi + + # Check scripts exist + for script in 01-fetch-events.sh 10-check-git-sync.sh 20-categorize.sh 21-compare-relays.sh 30-extract-parse-failures.sh 31-extract-purgatory-expiry.sh 40-classify-actions.sh; do + if [[ ! -x "$SCRIPT_DIR/$script" ]]; then + log_error "Script not found or not executable: $SCRIPT_DIR/$script" + missing=1 + fi + done + + if [[ $missing -eq 1 ]]; then + exit 1 + fi + + log_success "All prerequisites satisfied" +} + +# Determine which phases to run +determine_phases() { + # Handle --only-phase-N + if [[ -n "$ONLY_PHASE" ]]; then + for i in 1 2 3 4 5; do + if [[ "$i" != "$ONLY_PHASE" ]]; then + eval "SKIP_PHASE_$i=true" + fi + done + fi + + # Handle --from-phase-N + if [[ -n "$FROM_PHASE" ]]; then + for i in 1 2 3 4 5; do + if [[ "$i" -lt "$FROM_PHASE" ]]; then + eval "SKIP_PHASE_$i=true" + fi + done + fi + + # Auto-skip Phase 2 if git paths not provided + if [[ -z "$PROD_GIT" && -z "$ARCHIVE_GIT" ]]; then + if [[ "$SKIP_PHASE_2" != "true" ]]; then + log_warn "No git paths provided. Phase 2 (git sync check) will be skipped." + log_warn "Use --prod-git and --archive-git to enable Phase 2." + SKIP_PHASE_2=true + fi + fi + + # Auto-skip Phase 4 if service not provided + if [[ -z "$SERVICE_NAME" ]]; then + if [[ "$SKIP_PHASE_4" != "true" ]]; then + log_warn "No service name provided. Phase 4 (log extraction) will be skipped." + log_warn "Use --service to enable Phase 4." + SKIP_PHASE_4=true + fi + fi +} + +# Setup output directory +setup_output_dir() { + if [[ -z "$OUTPUT_DIR" ]]; then + OUTPUT_DIR="work/migration-analysis-$(date +%Y%m%d-%H%M)" + fi + + log_info "Output directory: $OUTPUT_DIR" + + if [[ "$DRY_RUN" == "true" ]]; then + log_info "[DRY RUN] Would create directory structure" + return + fi + + mkdir -p "$OUTPUT_DIR"/{prod/raw,archive/raw,comparison,logs,results} + + # Save configuration + cat > "$OUTPUT_DIR/config.txt" << EOF +# Migration Analysis Configuration +# Generated: $(date -Iseconds) + +PROD_RELAY=$PROD_RELAY +ARCHIVE_RELAY=$ARCHIVE_RELAY +PROD_GIT=$PROD_GIT +ARCHIVE_GIT=$ARCHIVE_GIT +SERVICE_NAME=$SERVICE_NAME +OUTPUT_DIR=$OUTPUT_DIR +EOF + + log_success "Created output directory structure" +} + +# Run a phase with timing and error handling +run_phase() { + local phase_num="$1" + local phase_name="$2" + shift 2 + local cmd=("$@") + + local skip_var="SKIP_PHASE_$phase_num" + if [[ "${!skip_var}" == "true" ]]; then + log_phase "Phase $phase_num: $phase_name [SKIPPED]" + return 0 + fi + + log_phase "Phase $phase_num: $phase_name" + + if [[ "$DRY_RUN" == "true" ]]; then + log_info "[DRY RUN] Would execute:" + for c in "${cmd[@]}"; do + echo " $c" + done + return 0 + fi + + local start_time + start_time=$(date +%s) + + local exit_code=0 + + # Execute the command(s) + for c in "${cmd[@]}"; do + log_step "Running: $c" + if ! eval "$c"; then + exit_code=1 + if [[ "$CONTINUE_ON_ERROR" == "true" ]]; then + log_warn "Command failed, continuing due to --continue-on-error" + else + log_error "Command failed" + break + fi + fi + done + + local end_time + end_time=$(date +%s) + local duration=$((end_time - start_time)) + PHASE_TIMES[$phase_num]=$duration + + if [[ $exit_code -eq 0 ]]; then + log_success "Phase $phase_num completed in ${duration}s" + else + log_error "Phase $phase_num failed after ${duration}s" + if [[ "$CONTINUE_ON_ERROR" != "true" ]]; then + return 1 + fi + fi + + return $exit_code +} + +# Phase 1: Fetch events +run_phase_1() { + local cmds=() + + # Fetch from prod relay + cmds+=("'$SCRIPT_DIR/01-fetch-events.sh' '$PROD_RELAY' '$OUTPUT_DIR/prod'") + + # Fetch from archive relay + cmds+=("'$SCRIPT_DIR/01-fetch-events.sh' '$ARCHIVE_RELAY' '$OUTPUT_DIR/archive'") + + run_phase 1 "Fetch Events (~30s each)" "${cmds[@]}" +} + +# Phase 2: Git sync check +run_phase_2() { + local cmds=() + + if [[ -n "$PROD_GIT" ]]; then + cmds+=("'$SCRIPT_DIR/10-check-git-sync.sh' '$OUTPUT_DIR/prod/raw/state-events.json' '$PROD_GIT' '$OUTPUT_DIR/prod' --categorize") + else + log_warn "Skipping prod git sync check (no --prod-git provided)" + fi + + if [[ -n "$ARCHIVE_GIT" ]]; then + cmds+=("'$SCRIPT_DIR/10-check-git-sync.sh' '$OUTPUT_DIR/archive/raw/state-events.json' '$ARCHIVE_GIT' '$OUTPUT_DIR/archive' --categorize") + else + log_warn "Skipping archive git sync check (no --archive-git provided)" + fi + + if [[ ${#cmds[@]} -eq 0 ]]; then + log_warn "No git paths provided, skipping Phase 2" + return 0 + fi + + run_phase 2 "Git Sync Check (~20 min each)" "${cmds[@]}" +} + +# Phase 3: Categorize and compare +run_phase_3() { + local cmds=() + + # Check if we have git-sync-status.tsv files (from Phase 2) + # If not, we can't run categorization + local has_prod_sync=false + local has_archive_sync=false + + if [[ -f "$OUTPUT_DIR/prod/git-sync-status.tsv" ]]; then + has_prod_sync=true + fi + + if [[ -f "$OUTPUT_DIR/archive/git-sync-status.tsv" ]]; then + has_archive_sync=true + fi + + # Run categorization if we have sync data but no category files + if [[ "$has_prod_sync" == "true" && ! -f "$OUTPUT_DIR/prod/category1-complete-match.txt" ]]; then + cmds+=("'$SCRIPT_DIR/20-categorize.sh' '$OUTPUT_DIR/prod/git-sync-status.tsv' '$OUTPUT_DIR/prod'") + fi + + if [[ "$has_archive_sync" == "true" && ! -f "$OUTPUT_DIR/archive/category1-complete-match.txt" ]]; then + cmds+=("'$SCRIPT_DIR/20-categorize.sh' '$OUTPUT_DIR/archive/git-sync-status.tsv' '$OUTPUT_DIR/archive'") + fi + + # Run comparison if we have category files + if [[ -f "$OUTPUT_DIR/prod/category1-complete-match.txt" && -f "$OUTPUT_DIR/archive/category1-complete-match.txt" ]]; then + cmds+=("'$SCRIPT_DIR/21-compare-relays.sh' '$OUTPUT_DIR/prod' '$OUTPUT_DIR/archive' '$OUTPUT_DIR/comparison'") + else + log_warn "Missing category files for comparison." + log_warn "Phase 2 must complete successfully before Phase 3 can compare relays." + + # Create placeholder comparison files if they don't exist + if [[ "$DRY_RUN" != "true" ]]; then + mkdir -p "$OUTPUT_DIR/comparison" + for f in complete-in-both.txt complete-prod-missing-archive.txt complete-prod-incomplete-archive.txt incomplete-in-both.txt in-archive-not-prod.txt; do + if [[ ! -f "$OUTPUT_DIR/comparison/$f" ]]; then + echo "# Placeholder - Phase 2 data not available" > "$OUTPUT_DIR/comparison/$f" + fi + done + echo "# Comparison not available - Phase 2 data missing" > "$OUTPUT_DIR/comparison/summary.txt" + fi + fi + + if [[ ${#cmds[@]} -eq 0 ]]; then + log_warn "No categorization or comparison needed (already done or missing input)" + return 0 + fi + + run_phase 3 "Categorize & Compare (fast)" "${cmds[@]}" +} + +# Phase 4: Extract logs +run_phase_4() { + if [[ -z "$SERVICE_NAME" ]]; then + log_warn "No service name provided, skipping Phase 4" + return 0 + fi + + local cmds=() + + cmds+=("'$SCRIPT_DIR/30-extract-parse-failures.sh' '$SERVICE_NAME' '$OUTPUT_DIR/logs'") + cmds+=("'$SCRIPT_DIR/31-extract-purgatory-expiry.sh' '$SERVICE_NAME' '$OUTPUT_DIR/logs'") + + run_phase 4 "Extract Logs (VPS required)" "${cmds[@]}" +} + +# Phase 5: Final classification +run_phase_5() { + # Check if we have the minimum required files + local can_run=true + + if [[ ! -d "$OUTPUT_DIR/prod" ]]; then + log_warn "Missing prod directory" + can_run=false + fi + + if [[ ! -d "$OUTPUT_DIR/archive" ]]; then + log_warn "Missing archive directory" + can_run=false + fi + + if [[ ! -d "$OUTPUT_DIR/comparison" ]]; then + log_warn "Missing comparison directory" + can_run=false + fi + + # Create logs directory with empty files if missing + if [[ "$DRY_RUN" != "true" ]]; then + mkdir -p "$OUTPUT_DIR/logs" + for f in parse-failures.txt purgatory-expired.txt; do + if [[ ! -f "$OUTPUT_DIR/logs/$f" ]]; then + echo "# No data - Phase 4 not run" > "$OUTPUT_DIR/logs/$f" + fi + done + fi + + if [[ "$can_run" == "false" ]]; then + log_error "Cannot run Phase 5 - missing required input directories" + return 1 + fi + + run_phase 5 "Final Classification (fast)" "'$SCRIPT_DIR/40-classify-actions.sh' '$OUTPUT_DIR'" +} + +# Display summary +display_summary() { + log_header "Migration Analysis Complete" + + echo "Output Directory: $OUTPUT_DIR" + echo "" + + # Phase timing summary + echo "Phase Timing:" + local total_time=0 + for phase in 1 2 3 4 5; do + local skip_var="SKIP_PHASE_$phase" + if [[ "${!skip_var}" == "true" ]]; then + echo " Phase $phase: SKIPPED" + elif [[ -n "${PHASE_TIMES[$phase]:-}" ]]; then + local t="${PHASE_TIMES[$phase]}" + echo " Phase $phase: ${t}s" + total_time=$((total_time + t)) + else + echo " Phase $phase: N/A" + fi + done + echo " ─────────────" + echo " Total: ${total_time}s" + echo "" + + # Results summary + if [[ -f "$OUTPUT_DIR/results/summary.txt" ]]; then + echo "Results Summary:" + echo "" + # Extract key metrics from summary + if grep -q "No Action Required" "$OUTPUT_DIR/results/summary.txt"; then + grep -A1 "No Action Required" "$OUTPUT_DIR/results/summary.txt" | head -2 + fi + if grep -q "Action Required" "$OUTPUT_DIR/results/summary.txt"; then + grep -A1 "Action Required" "$OUTPUT_DIR/results/summary.txt" | head -2 + fi + if grep -q "Manual Investigation" "$OUTPUT_DIR/results/summary.txt"; then + grep -A1 "Manual Investigation" "$OUTPUT_DIR/results/summary.txt" | head -2 + fi + echo "" + fi + + # Output files + echo "Output Files:" + echo " $OUTPUT_DIR/results/no-action-required.txt" + echo " $OUTPUT_DIR/results/action-required.txt" + echo " $OUTPUT_DIR/results/manual-investigation.txt" + echo " $OUTPUT_DIR/results/summary.txt" + echo "" + + # Next steps + echo "Next Steps:" + echo " 1. Review results/summary.txt for overview" + echo " 2. Address items in results/action-required.txt" + echo " 3. Investigate items in results/manual-investigation.txt" + echo " 4. Plan migration window when action items are resolved" + echo "" +} + +# Main +main() { + parse_args "$@" + + log_header "ngit-relay to ngit-grasp Migration Analysis" + + validate_args + check_prerequisites + determine_phases + setup_output_dir + + # Show configuration + log_info "Configuration:" + log_step "Prod relay: $PROD_RELAY" + log_step "Archive relay: $ARCHIVE_RELAY" + [[ -n "$PROD_GIT" ]] && log_step "Prod git: $PROD_GIT" + [[ -n "$ARCHIVE_GIT" ]] && log_step "Archive git: $ARCHIVE_GIT" + [[ -n "$SERVICE_NAME" ]] && log_step "Service: $SERVICE_NAME" + log_step "Output: $OUTPUT_DIR" + echo "" + + # Show phase plan + log_info "Phase Plan:" + for phase in 1 2 3 4 5; do + local skip_var="SKIP_PHASE_$phase" + if [[ "${!skip_var}" == "true" ]]; then + log_step "Phase $phase: SKIP" + else + log_step "Phase $phase: RUN" + fi + done + echo "" + + if [[ "$DRY_RUN" == "true" ]]; then + log_warn "DRY RUN MODE - No changes will be made" + echo "" + fi + + # Run phases + local overall_exit=0 + + run_phase_1 || overall_exit=1 + run_phase_2 || overall_exit=1 + run_phase_3 || overall_exit=1 + run_phase_4 || overall_exit=1 + run_phase_5 || overall_exit=1 + + # Display summary + if [[ "$DRY_RUN" != "true" ]]; then + display_summary + fi + + if [[ $overall_exit -ne 0 ]]; then + log_warn "Some phases failed. Review output for details." + fi + + exit $overall_exit +} + +main "$@" -- cgit v1.2.3 From f84c7d04ff5d3f9c6c56d78bc00c01814e7348e4 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 11:45:33 +0000 Subject: Restructure migration guide for practical usage Transforms the guide from a technical reference into a practical step-by-step guide with: - Quick Start section at the top with copy-paste commands - Prerequisites section with verification steps - Migration Overview explaining the 3-stage process - Running the Analysis section with all options documented - Understanding Results section explaining output files - Troubleshooting section for common issues - Architecture section (moved from top) for those wanting details - Next Steps section for post-analysis workflow The guide now follows a practical flow: get started fast, understand results, then dive into architecture details if needed. --- docs/how-to/migrate-ngit-relay-to-ngit-grasp.md | 484 ++++++++++++++++-------- 1 file changed, 336 insertions(+), 148 deletions(-) diff --git a/docs/how-to/migrate-ngit-relay-to-ngit-grasp.md b/docs/how-to/migrate-ngit-relay-to-ngit-grasp.md index 4c3a4ba..975eb4c 100644 --- a/docs/how-to/migrate-ngit-relay-to-ngit-grasp.md +++ b/docs/how-to/migrate-ngit-relay-to-ngit-grasp.md @@ -1,207 +1,395 @@ -# Migrate ngit-relay to ngit-grasp on NixOS VPS +# Migrate ngit-relay to ngit-grasp -**Goal:** Replace an ngit-relay instance on a VPS running NixOS with ngit-grasp. +This guide walks you through migrating a production ngit-relay instance to ngit-grasp. The process involves analyzing your existing data to identify repositories that need attention before switching over. -**Specifics:** VPS running NixOS. +## Quick Start -## Approach +Run the migration analysis with a single command: -1. Deploy ngit-grasp with 'domain' of `.internal` and an `archiveService` of `` running on a different port. This will gather all the events and git data from the production service and relays/git servers/grasp servers that for repositories that list the service in their announcement event. To sync all git data may take an hour. +```bash +# Basic analysis (fetches events, compares relays) +./docs/how-to/migration-scripts/run-migration-analysis.sh \ + --prod-relay wss://relay.ngit.dev \ + --archive-relay wss://archive.relay.ngit.dev -2. Analyze the data to see which repositories have not been moved with complete data. Understand why and for each decide if action is needed / not needed to move it. +# Full analysis (includes git sync check - run on VPS) +./docs/how-to/migration-scripts/run-migration-analysis.sh \ + --prod-relay wss://relay.ngit.dev \ + --archive-relay wss://archive.relay.ngit.dev \ + --prod-git /var/lib/ngit-relay/git \ + --archive-git /var/lib/ngit-relay-archive/git \ + --service ngit-grasp.service +``` + +The script produces three output files: +- `results/no-action-required.txt` - Repos ready for migration +- `results/action-required.txt` - Repos needing intervention +- `results/manual-investigation.txt` - Repos needing human review + +See [Running the Analysis](#running-the-analysis) for detailed options. + +## Prerequisites + +### Required Tools + +- **nak** - Nostr Army Knife for fetching events ([install](https://github.com/fiatjaf/nak)) +- **jq** - JSON processing (install via package manager) + +### For Full Analysis (VPS) + +- SSH access to the VPS running ngit-relay +- Read access to git data directories +- Access to systemd journal (for log extraction) + +### Verify Installation + +```bash +# Check required tools +nak --version +jq --version + +# Check optional tools (for VPS phases) +journalctl --version +``` + +## Migration Overview + +The migration process has three stages: + +### Stage 1: Deploy Archive Instance + +Deploy ngit-grasp alongside your production ngit-relay: + +1. Configure ngit-grasp with: + - `domain` set to `.internal` (temporary) + - `archiveService` set to your production domain + - Running on a different port + +2. Let it sync for ~1 hour to gather all events and git data + +### Stage 2: Analyze Data + +Run the migration analysis to identify: +- Repositories successfully migrated (no action needed) +- Repositories with incomplete data (need investigation) +- Repositories with parse failures (may need re-announcement) + +### Stage 3: Switch Over + +Once all issues are resolved: +1. Set `domain` to your production URL +2. Disable archive mode +3. Update your reverse proxy to point to ngit-grasp + +## Running the Analysis + +### Basic Usage + +```bash +# Preview what will happen (dry run) +./run-migration-analysis.sh \ + --prod-relay wss://relay.ngit.dev \ + --archive-relay wss://archive.relay.ngit.dev \ + --dry-run + +# Run the analysis +./run-migration-analysis.sh \ + --prod-relay wss://relay.ngit.dev \ + --archive-relay wss://archive.relay.ngit.dev +``` + +### Full Analysis on VPS + +```bash +./run-migration-analysis.sh \ + --prod-relay wss://relay.ngit.dev \ + --archive-relay wss://archive.relay.ngit.dev \ + --prod-git /var/lib/ngit-relay/git \ + --archive-git /var/lib/ngit-relay-archive/git \ + --service ngit-grasp.service +``` + +### Phase Control + +Skip or run specific phases: + +```bash +# Skip Phase 2 (use cached git sync data) +./run-migration-analysis.sh ... --skip-phase-2 + +# Run only Phase 1 (fetch events) +./run-migration-analysis.sh ... --only-phase-1 + +# Resume from Phase 3 (using existing data) +./run-migration-analysis.sh ... --from-phase-3 --output work/migration-analysis-20260122-1430 +``` + +### All Options + +| Option | Description | +|--------|-------------| +| `--prod-relay ` | Production relay WebSocket URL (required) | +| `--archive-relay ` | Archive relay WebSocket URL (required) | +| `--prod-git ` | Git base directory for prod (enables Phase 2) | +| `--archive-git ` | Git base directory for archive (enables Phase 2) | +| `--service ` | Systemd service name (enables Phase 4) | +| `--output ` | Output directory (default: auto-generated) | +| `--skip-phase-N` | Skip phase N (1-5) | +| `--only-phase-N` | Run only phase N | +| `--from-phase-N` | Start from phase N | +| `--dry-run` | Show what would be executed | +| `--continue-on-error` | Continue even if a phase fails | + +## Understanding Results + +### Summary File + +The `results/summary.txt` file provides an overview: + +``` +## Overview + +| Category | Count | Percentage | +|----------|-------|------------| +| No Action Required | 450 | 85.7% | +| Action Required | 52 | 9.9% | +| Manual Investigation | 23 | 4.4% | +``` + +### No Action Required -3. Set the 'domain' to production URL, turn off archive mode, and point your reverse proxy at the new port. +Repositories in `no-action-required.txt` are ready for migration: -## Challenges +``` +myrepo | npub1abc... | complete in both prod and archive +oldrepo | npub1def... | deleted by user +testrepo | npub1ghi... | empty/blank in both (user never pushed) +``` + +**Common reasons:** +- `complete in both prod and archive` - Successfully migrated +- `deleted by user` - User requested deletion (kind 5 event) +- `empty/blank in both` - No git data was ever pushed +- `purgatory expired` - System already handled the timeout + +### Action Required + +Repositories in `action-required.txt` need intervention: + +``` +myrepo | npub1abc... | complete in prod, missing from archive | trigger re-sync or investigate +otherrepo | npub1def... | incomplete in both (prod=cat3, archive=cat2) | investigate git data source +``` + +**Common actions:** +- **Re-sync needed**: Trigger the archive to re-fetch from the source +- **Wait for sync**: Archive sync may still be in progress +- **Investigate git source**: Original git data may be incomplete +- **Fix parse failure**: Event format issue, may need re-announcement -- **ngit-relay accepts any commits/annotated tags** that were at that point of time referenced in the latest state event. **ngit-grasp requires all the git data** to reproduce the latest state. So if the git data is incomplete, it won't accept the repository. +### Manual Investigation -- **ngit-relay doesn't clear out refs/nostr/** where it doesn't have a PR event. Fortunately the 'PR' (as opposed to patches) functionality is not widely used so we just need to check a few repositories (shakespeare, ngit and gitworkshop). +Repositories in `manual-investigation.txt` have unusual states: + +``` +weirdrepo | npub1abc... | in archive (cat1) but not in prod | may be new announcement or deleted from prod +conflictrepo | npub1def... | complete in prod, missing from archive, parse failure logged | investigate parse failure +``` -## Analysis Categories +These require human judgment to determine the correct action. -### No action required: +## Troubleshooting -| Category | How to Detect | Source | -|----------|---------------|--------| -| **Git Data Complete - Moved** | prod cat1 AND archive cat1 (same repo) | Git sync check | -| **Invalid Announcement** (Won't Parse) | Log: `[PARSE_FAIL] kind=30617` | Archive logs | -| **Deletion Request** | kind 5 event tagging announcement | Event fetch | -| **Announcement Not on Prod But In Archive** | In archive announcements, not in prod | Event comparison | +### "nak not found" -### Action/decision required: +Install nak from https://github.com/fiatjaf/nak: -| Category | How to Detect | Source | -|----------|---------------|--------| -| **Invalid State Event** (Won't Parse) | Log: `[PARSE_FAIL] kind=30618` | Archive logs | -| **Purgatory Expired** (sync should have worked) | Log: `[PURGATORY_EXPIRED]` | Archive logs | -| **Incomplete Git Data** (both relays) | prod cat2/3/4 AND archive cat2/3/4 | Git sync check | -| **No Announcement In Archive** | In prod, not in archive, no deletion | Event comparison | -| **State but incomplete git in Archive** | archive cat3 or cat4 | Git sync check | +```bash +# Using Go +go install github.com/fiatjaf/nak@latest -### Manual investigation required: +# Or download binary from releases +``` -- Repos that don't fit above categories -- Repos with unexpected state (e.g., complete in prod, missing in archive, no log entries) +### "Permission denied" on git directories -## Analysis Script Architecture +Run with sudo or ensure your user has read access: -The analysis is split into modular phases for fast iteration. Phases 1-3 and 5 can run locally; Phase 2 and 4 require VPS access. +```bash +# Check permissions +ls -la /var/lib/ngit-relay/git + +# Run with sudo if needed +sudo ./run-migration-analysis.sh ... +``` + +### Phase 2 takes too long + +The git sync check processes each repository individually (~20 minutes total). To speed up iteration: + +1. Run Phase 2 once and save the output +2. Use `--skip-phase-2` for subsequent runs +3. Use `--from-phase-3` to re-run classification with existing data + +### No parse failures found + +This is expected if: +- ngit-grasp logging improvements aren't deployed yet +- No events actually failed to parse + +The analysis will continue without log data. + +### Event counts are multiples of 250 + +This suggests pagination may have failed. The scripts use `--paginate` by default, but if you see exactly 250, 500, 750 events, verify the relay is responding correctly. + +## Architecture + +### Analysis Phases + +The analysis is split into 5 modular phases: + +| Phase | Name | Time | Location | Description | +|-------|------|------|----------|-------------| +| 1 | Fetch Events | ~30s each | Local | Fetch events from both relays | +| 2 | Git Sync Check | ~20 min each | VPS | Compare state events to git data | +| 3 | Categorize & Compare | <1s | Local | Categorize and compare results | +| 4 | Extract Logs | <30s | VPS | Extract parse failures and purgatory expiry | +| 5 | Final Classification | <5s | Local | Combine all data into actionable results | + +### Phase Flow Diagram ``` ┌─────────────────────────────────────────────────────────────────┐ │ PHASE 1: Fetch Events (~30s, local) │ -│ migration-scripts/01-fetch-events.sh │ -├─────────────────────────────────────────────────────────────────┤ -│ Fetches from relay: │ -│ - kind 30618 (state events) │ -│ - kind 30617 (announcements) │ -│ - kind 5 (deletion requests) │ -│ │ -│ Run twice: once for prod (relay.ngit.dev), once for archive │ -│ Output: /{state,announcements,deletions}.json │ +│ Fetches kind 30618 (state), 30617 (announcements), 5 (deletion) │ +│ Run twice: once for prod, once for archive │ └─────────────────────────────────────────────────────────────────┘ ↓ ┌─────────────────────────────────────────────────────────────────┐ │ PHASE 2: Git Sync Check (~20 mins, VPS required) │ -│ migration-scripts/10-check-git-sync.sh │ -├─────────────────────────────────────────────────────────────────┤ -│ For each state event, compares refs to actual git data on disk. │ -│ │ -│ Run twice: │ -│ - prod: GIT_BASE=/persistent/relay-ngit-dev-ngit-relay/... │ -│ - archive: GIT_BASE=/persistent/grasp/sync-archive/git │ -│ │ -│ Output: git-sync-status.tsv │ -│ repo|npub|state_refs|git_refs|matches|status │ +│ Compares state event refs to actual git data on disk │ +│ Categorizes into: complete, empty, partial, no-match │ └─────────────────────────────────────────────────────────────────┘ ↓ ┌─────────────────────────────────────────────────────────────────┐ │ PHASE 3: Categorize & Compare (fast, local) │ -│ migration-scripts/20-categorize.sh │ -│ migration-scripts/21-compare-relays.sh │ -├─────────────────────────────────────────────────────────────────┤ -│ 20-categorize.sh applies 4-category logic: │ -│ - cat1: complete match (all refs match) │ -│ - cat2: empty/blank (no git data) │ -│ - cat3: partial match (some refs match) │ -│ - cat4: no match (git exists but refs don't match) │ -│ │ -│ 21-compare-relays.sh compares prod vs archive: │ -│ - complete-in-both.txt (no action needed) │ -│ - complete-prod-missing-archive.txt (needs investigation) │ -│ - complete-prod-incomplete-archive.txt (sync in progress?) │ -│ - incomplete-in-both.txt (git data incomplete) │ -│ - in-archive-not-prod.txt (deleted or new) │ -│ │ -│ Output: category-{1,2,3,4}.txt, comparison/*.txt, summary.txt │ +│ Compares prod vs archive categories │ +│ Identifies gaps and sync issues │ └─────────────────────────────────────────────────────────────────┘ ↓ ┌─────────────────────────────────────────────────────────────────┐ │ PHASE 4: Log-Based Categories (VPS required) │ -│ migration-scripts/30-extract-parse-failures.sh │ -│ migration-scripts/31-extract-purgatory-expiry.sh │ -├─────────────────────────────────────────────────────────────────┤ -│ Extracts structured log entries from journalctl: │ -│ - Parse failures: [PARSE_FAIL] kind=X event_id=Y reason=Z │ -│ - Purgatory expiry: [PURGATORY_EXPIRED] repo=X npub=Y │ -│ │ -│ NOTE: Requires logging improvements in ngit-grasp to emit │ -│ these structured log entries. See issue: TBD │ -│ │ -│ Output: parse-failures.txt, purgatory-expired.txt │ +│ Extracts [PARSE_FAIL] and [PURGATORY_EXPIRED] from logs │ +│ Provides context for why repos failed to sync │ └─────────────────────────────────────────────────────────────────┘ ↓ ┌─────────────────────────────────────────────────────────────────┐ │ PHASE 5: Final Classification (fast, local) │ -│ migration-scripts/40-classify-actions.sh │ -├─────────────────────────────────────────────────────────────────┤ -│ Combines all data sources to produce final classification: │ -│ │ -│ Inputs: │ -│ - category files (prod and archive) │ -│ - relay-gaps.txt │ -│ - parse-failures.txt │ -│ - purgatory-expired.txt │ -│ - deletions.json │ -│ │ -│ Output: │ -│ - no-action-required.txt (repo|reason) │ -│ - action-required.txt (repo|reason|suggested_action) │ -│ - manual-investigation.txt (repo|notes) │ +│ Combines all data sources │ +│ Outputs: no-action, action-required, manual-investigation │ └─────────────────────────────────────────────────────────────────┘ ``` -## Directory Structure +### Git Sync Categories + +Phase 2 categorizes repositories into 4 categories: + +| Category | Description | Meaning | +|----------|-------------|---------| +| 1 | Complete Match | All refs in state event match git data | +| 2 | Empty/Blank | No git data available | +| 3 | Partial Match | Some refs match, some don't | +| 4 | No Match | Git data exists but refs don't match | + +### Output Directory Structure ``` work/migration-analysis-YYYYMMDD-HHMM/ ├── prod/ │ ├── raw/ -│ │ ├── state-events.json # Phase 1 output -│ │ ├── announcements.json # Phase 1 output -│ │ └── deletions.json # Phase 1 output -│ ├── git-sync-status.tsv # Phase 2 output (optional) -│ ├── category1-complete-match.txt # Phase 2/3 output -│ ├── category2-empty-blank.txt # Phase 2/3 output -│ ├── category3-partial-match.txt # Phase 2/3 output -│ └── category4-no-match.txt # Phase 2/3 output +│ │ ├── state-events.json # Phase 1 +│ │ ├── announcements.json # Phase 1 +│ │ └── deletions.json # Phase 1 +│ ├── git-sync-status.tsv # Phase 2 +│ └── category*.txt # Phase 2/3 ├── archive/ -│ ├── raw/ -│ │ ├── state-events.json -│ │ ├── announcements.json -│ │ └── deletions.json -│ ├── git-sync-status.tsv -│ ├── category1-complete-match.txt -│ ├── category2-empty-blank.txt -│ ├── category3-partial-match.txt -│ └── category4-no-match.txt -├── logs/ -│ ├── parse-failures.txt # Phase 4 output -│ └── purgatory-expired.txt # Phase 4 output +│ └── (same structure as prod) ├── comparison/ -│ ├── complete-in-both.txt # Phase 3 output (no action) -│ ├── complete-prod-missing-archive.txt # Phase 3 output (investigate) -│ ├── complete-prod-incomplete-archive.txt # Phase 3 output (sync in progress?) -│ ├── incomplete-in-both.txt # Phase 3 output (git incomplete) -│ ├── in-archive-not-prod.txt # Phase 3 output (deleted/new) -│ └── summary.txt # Phase 3 output (human-readable) +│ ├── complete-in-both.txt # Phase 3 +│ ├── complete-prod-missing-archive.txt +│ ├── complete-prod-incomplete-archive.txt +│ ├── incomplete-in-both.txt +│ ├── in-archive-not-prod.txt +│ └── summary.txt +├── logs/ +│ ├── parse-failures.txt # Phase 4 +│ └── purgatory-expired.txt # Phase 4 └── results/ - ├── no-action-required.txt # Phase 5 output - ├── action-required.txt # Phase 5 output - └── manual-investigation.txt # Phase 5 output + ├── no-action-required.txt # Phase 5 + ├── action-required.txt # Phase 5 + ├── manual-investigation.txt # Phase 5 + └── summary.txt # Phase 5 ``` -## Prerequisites +## Key Differences: ngit-relay vs ngit-grasp + +Understanding these differences helps explain why some repositories need attention: -- `nak` - Nostr Army Knife for fetching events -- `jq` - JSON processing -- SSH access to VPS for Phase 2 and 4 -- Logging improvements in ngit-grasp for Phase 4 (see Dependencies) +| Aspect | ngit-relay | ngit-grasp | +|--------|------------|------------| +| Git data validation | Accepts commits/tags referenced in state event | Requires all git data to reproduce state | +| PR refs cleanup | Doesn't clear `refs/nostr/` | Properly manages PR refs | +| Parse failures | Silently ignores | Logs structured `[PARSE_FAIL]` entries | +| Sync timeout | No timeout | Purgatory expires after configurable period | -## Dependencies +## Next Steps -Phase 4 requires structured logging in ngit-grasp. Create a separate issue to add: +After running the analysis: -```rust -// On parse failure: -tracing::warn!( - target: "migration", - "[PARSE_FAIL] kind={} event_id={} reason=\"{}\"", - event.kind, event.id, reason -); +1. **Review the summary** - Check `results/summary.txt` for the overview +2. **Address action items** - Work through `results/action-required.txt` +3. **Investigate edge cases** - Review `results/manual-investigation.txt` +4. **Re-run analysis** - After fixing issues, re-run to verify +5. **Plan cutover** - Schedule the switch when all issues are resolved -// On purgatory expiry: -tracing::warn!( - target: "migration", - "[PURGATORY_EXPIRED] repo={} npub={}", - identifier, npub -); +### When to Re-run + +Re-run the analysis when: +- Archive sync has had time to complete +- You've fixed parse failures or re-announced events +- You want to verify fixes before cutover + +```bash +# Re-run with existing Phase 2 data (faster) +./run-migration-analysis.sh ... --skip-phase-2 --output work/migration-analysis-20260122-1430 ``` -## Gotchas +## Individual Scripts + +For advanced usage, you can run individual phase scripts: + +```bash +# Phase 1: Fetch events +./migration-scripts/01-fetch-events.sh wss://relay.ngit.dev output/prod + +# Phase 2: Git sync check +./migration-scripts/10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/ngit-relay/git output/prod --categorize + +# Phase 3a: Categorize +./migration-scripts/20-categorize.sh output/prod/git-sync-status.tsv output/prod + +# Phase 3b: Compare relays +./migration-scripts/21-compare-relays.sh output/prod output/archive output/comparison + +# Phase 4a: Extract parse failures +./migration-scripts/30-extract-parse-failures.sh ngit-grasp.service output/logs + +# Phase 4b: Extract purgatory expiry +./migration-scripts/31-extract-purgatory-expiry.sh ngit-grasp.service output/logs + +# Phase 5: Final classification +./migration-scripts/40-classify-actions.sh work/migration-analysis-20260122-1430 +``` -- Always use `nak req` with `--paginate` flag so we don't miss any events. If we receive increments of 250 (e.g., exactly 500) then it's a red flag that we are not paginating and there are probably more events. -- Phase 1 and 2 should run back-to-back for an accurate snapshot. -- The git sync check (Phase 2) takes ~20 minutes per relay - this is the slow part. -- Existing analysis data from Jan 22 can be used for developing Phase 3/5 logic before re-running Phase 2. +Each script has detailed help available with `--help` or by reading the script header. -- cgit v1.2.3 From 79d2d01923cf66217fd5aa3a0be5ee9ea2000872 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 12:15:12 +0000 Subject: Generalize migration guide for any GRASP implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename guide: migrate-ngit-relay-to-ngit-grasp.md → migrate-to-ngit-grasp.md - Remove ngit-relay and relay.ngit.dev specific references - Use generic terminology: source/target relay, current implementation - Add Compatibility section explaining requirements - Update examples to be implementation-agnostic - Update script comments to reference GRASP relay (not ngit-relay) - Update README.md to link to the new guide Scripts already work with any GRASP implementation via parameters. --- docs/how-to/README.md | 14 +- docs/how-to/migrate-ngit-relay-to-ngit-grasp.md | 395 -------------------- docs/how-to/migrate-to-ngit-grasp.md | 412 +++++++++++++++++++++ docs/how-to/migration-scripts/01-fetch-events.sh | 4 +- docs/how-to/migration-scripts/10-check-git-sync.sh | 20 +- docs/how-to/migration-scripts/20-categorize.sh | 4 +- docs/how-to/migration-scripts/21-compare-relays.sh | 4 +- .../migration-scripts/30-extract-parse-failures.sh | 10 +- .../31-extract-purgatory-expiry.sh | 10 +- .../migration-scripts/40-classify-actions.sh | 4 +- .../migration-scripts/run-migration-analysis.sh | 14 +- 11 files changed, 454 insertions(+), 437 deletions(-) delete mode 100644 docs/how-to/migrate-ngit-relay-to-ngit-grasp.md create mode 100644 docs/how-to/migrate-to-ngit-grasp.md diff --git a/docs/how-to/README.md b/docs/how-to/README.md index ba58c08..f755be1 100644 --- a/docs/how-to/README.md +++ b/docs/how-to/README.md @@ -110,15 +110,15 @@ How-to guides are **recipes** that show you how to solve specific problems or ac --- -### Migrate from ngit-relay -**Status:** 🔜 Planned +### [Migrate to ngit-grasp](migrate-to-ngit-grasp.md) +**Status:** ✅ Available -**Problem:** Switch from reference implementation +**Problem:** Switch from another GRASP implementation **You'll learn:** -- Export data from ngit-relay -- Import to ngit-grasp -- Update repository URLs -- Verify migration +- Analyze existing relay data +- Identify repositories needing attention +- Run migration analysis scripts +- Plan and execute cutover --- diff --git a/docs/how-to/migrate-ngit-relay-to-ngit-grasp.md b/docs/how-to/migrate-ngit-relay-to-ngit-grasp.md deleted file mode 100644 index 975eb4c..0000000 --- a/docs/how-to/migrate-ngit-relay-to-ngit-grasp.md +++ /dev/null @@ -1,395 +0,0 @@ -# Migrate ngit-relay to ngit-grasp - -This guide walks you through migrating a production ngit-relay instance to ngit-grasp. The process involves analyzing your existing data to identify repositories that need attention before switching over. - -## Quick Start - -Run the migration analysis with a single command: - -```bash -# Basic analysis (fetches events, compares relays) -./docs/how-to/migration-scripts/run-migration-analysis.sh \ - --prod-relay wss://relay.ngit.dev \ - --archive-relay wss://archive.relay.ngit.dev - -# Full analysis (includes git sync check - run on VPS) -./docs/how-to/migration-scripts/run-migration-analysis.sh \ - --prod-relay wss://relay.ngit.dev \ - --archive-relay wss://archive.relay.ngit.dev \ - --prod-git /var/lib/ngit-relay/git \ - --archive-git /var/lib/ngit-relay-archive/git \ - --service ngit-grasp.service -``` - -The script produces three output files: -- `results/no-action-required.txt` - Repos ready for migration -- `results/action-required.txt` - Repos needing intervention -- `results/manual-investigation.txt` - Repos needing human review - -See [Running the Analysis](#running-the-analysis) for detailed options. - -## Prerequisites - -### Required Tools - -- **nak** - Nostr Army Knife for fetching events ([install](https://github.com/fiatjaf/nak)) -- **jq** - JSON processing (install via package manager) - -### For Full Analysis (VPS) - -- SSH access to the VPS running ngit-relay -- Read access to git data directories -- Access to systemd journal (for log extraction) - -### Verify Installation - -```bash -# Check required tools -nak --version -jq --version - -# Check optional tools (for VPS phases) -journalctl --version -``` - -## Migration Overview - -The migration process has three stages: - -### Stage 1: Deploy Archive Instance - -Deploy ngit-grasp alongside your production ngit-relay: - -1. Configure ngit-grasp with: - - `domain` set to `.internal` (temporary) - - `archiveService` set to your production domain - - Running on a different port - -2. Let it sync for ~1 hour to gather all events and git data - -### Stage 2: Analyze Data - -Run the migration analysis to identify: -- Repositories successfully migrated (no action needed) -- Repositories with incomplete data (need investigation) -- Repositories with parse failures (may need re-announcement) - -### Stage 3: Switch Over - -Once all issues are resolved: -1. Set `domain` to your production URL -2. Disable archive mode -3. Update your reverse proxy to point to ngit-grasp - -## Running the Analysis - -### Basic Usage - -```bash -# Preview what will happen (dry run) -./run-migration-analysis.sh \ - --prod-relay wss://relay.ngit.dev \ - --archive-relay wss://archive.relay.ngit.dev \ - --dry-run - -# Run the analysis -./run-migration-analysis.sh \ - --prod-relay wss://relay.ngit.dev \ - --archive-relay wss://archive.relay.ngit.dev -``` - -### Full Analysis on VPS - -```bash -./run-migration-analysis.sh \ - --prod-relay wss://relay.ngit.dev \ - --archive-relay wss://archive.relay.ngit.dev \ - --prod-git /var/lib/ngit-relay/git \ - --archive-git /var/lib/ngit-relay-archive/git \ - --service ngit-grasp.service -``` - -### Phase Control - -Skip or run specific phases: - -```bash -# Skip Phase 2 (use cached git sync data) -./run-migration-analysis.sh ... --skip-phase-2 - -# Run only Phase 1 (fetch events) -./run-migration-analysis.sh ... --only-phase-1 - -# Resume from Phase 3 (using existing data) -./run-migration-analysis.sh ... --from-phase-3 --output work/migration-analysis-20260122-1430 -``` - -### All Options - -| Option | Description | -|--------|-------------| -| `--prod-relay ` | Production relay WebSocket URL (required) | -| `--archive-relay ` | Archive relay WebSocket URL (required) | -| `--prod-git ` | Git base directory for prod (enables Phase 2) | -| `--archive-git ` | Git base directory for archive (enables Phase 2) | -| `--service ` | Systemd service name (enables Phase 4) | -| `--output ` | Output directory (default: auto-generated) | -| `--skip-phase-N` | Skip phase N (1-5) | -| `--only-phase-N` | Run only phase N | -| `--from-phase-N` | Start from phase N | -| `--dry-run` | Show what would be executed | -| `--continue-on-error` | Continue even if a phase fails | - -## Understanding Results - -### Summary File - -The `results/summary.txt` file provides an overview: - -``` -## Overview - -| Category | Count | Percentage | -|----------|-------|------------| -| No Action Required | 450 | 85.7% | -| Action Required | 52 | 9.9% | -| Manual Investigation | 23 | 4.4% | -``` - -### No Action Required - -Repositories in `no-action-required.txt` are ready for migration: - -``` -myrepo | npub1abc... | complete in both prod and archive -oldrepo | npub1def... | deleted by user -testrepo | npub1ghi... | empty/blank in both (user never pushed) -``` - -**Common reasons:** -- `complete in both prod and archive` - Successfully migrated -- `deleted by user` - User requested deletion (kind 5 event) -- `empty/blank in both` - No git data was ever pushed -- `purgatory expired` - System already handled the timeout - -### Action Required - -Repositories in `action-required.txt` need intervention: - -``` -myrepo | npub1abc... | complete in prod, missing from archive | trigger re-sync or investigate -otherrepo | npub1def... | incomplete in both (prod=cat3, archive=cat2) | investigate git data source -``` - -**Common actions:** -- **Re-sync needed**: Trigger the archive to re-fetch from the source -- **Wait for sync**: Archive sync may still be in progress -- **Investigate git source**: Original git data may be incomplete -- **Fix parse failure**: Event format issue, may need re-announcement - -### Manual Investigation - -Repositories in `manual-investigation.txt` have unusual states: - -``` -weirdrepo | npub1abc... | in archive (cat1) but not in prod | may be new announcement or deleted from prod -conflictrepo | npub1def... | complete in prod, missing from archive, parse failure logged | investigate parse failure -``` - -These require human judgment to determine the correct action. - -## Troubleshooting - -### "nak not found" - -Install nak from https://github.com/fiatjaf/nak: - -```bash -# Using Go -go install github.com/fiatjaf/nak@latest - -# Or download binary from releases -``` - -### "Permission denied" on git directories - -Run with sudo or ensure your user has read access: - -```bash -# Check permissions -ls -la /var/lib/ngit-relay/git - -# Run with sudo if needed -sudo ./run-migration-analysis.sh ... -``` - -### Phase 2 takes too long - -The git sync check processes each repository individually (~20 minutes total). To speed up iteration: - -1. Run Phase 2 once and save the output -2. Use `--skip-phase-2` for subsequent runs -3. Use `--from-phase-3` to re-run classification with existing data - -### No parse failures found - -This is expected if: -- ngit-grasp logging improvements aren't deployed yet -- No events actually failed to parse - -The analysis will continue without log data. - -### Event counts are multiples of 250 - -This suggests pagination may have failed. The scripts use `--paginate` by default, but if you see exactly 250, 500, 750 events, verify the relay is responding correctly. - -## Architecture - -### Analysis Phases - -The analysis is split into 5 modular phases: - -| Phase | Name | Time | Location | Description | -|-------|------|------|----------|-------------| -| 1 | Fetch Events | ~30s each | Local | Fetch events from both relays | -| 2 | Git Sync Check | ~20 min each | VPS | Compare state events to git data | -| 3 | Categorize & Compare | <1s | Local | Categorize and compare results | -| 4 | Extract Logs | <30s | VPS | Extract parse failures and purgatory expiry | -| 5 | Final Classification | <5s | Local | Combine all data into actionable results | - -### Phase Flow Diagram - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ PHASE 1: Fetch Events (~30s, local) │ -│ Fetches kind 30618 (state), 30617 (announcements), 5 (deletion) │ -│ Run twice: once for prod, once for archive │ -└─────────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────────┐ -│ PHASE 2: Git Sync Check (~20 mins, VPS required) │ -│ Compares state event refs to actual git data on disk │ -│ Categorizes into: complete, empty, partial, no-match │ -└─────────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────────┐ -│ PHASE 3: Categorize & Compare (fast, local) │ -│ Compares prod vs archive categories │ -│ Identifies gaps and sync issues │ -└─────────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────────┐ -│ PHASE 4: Log-Based Categories (VPS required) │ -│ Extracts [PARSE_FAIL] and [PURGATORY_EXPIRED] from logs │ -│ Provides context for why repos failed to sync │ -└─────────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────────┐ -│ PHASE 5: Final Classification (fast, local) │ -│ Combines all data sources │ -│ Outputs: no-action, action-required, manual-investigation │ -└─────────────────────────────────────────────────────────────────┘ -``` - -### Git Sync Categories - -Phase 2 categorizes repositories into 4 categories: - -| Category | Description | Meaning | -|----------|-------------|---------| -| 1 | Complete Match | All refs in state event match git data | -| 2 | Empty/Blank | No git data available | -| 3 | Partial Match | Some refs match, some don't | -| 4 | No Match | Git data exists but refs don't match | - -### Output Directory Structure - -``` -work/migration-analysis-YYYYMMDD-HHMM/ -├── prod/ -│ ├── raw/ -│ │ ├── state-events.json # Phase 1 -│ │ ├── announcements.json # Phase 1 -│ │ └── deletions.json # Phase 1 -│ ├── git-sync-status.tsv # Phase 2 -│ └── category*.txt # Phase 2/3 -├── archive/ -│ └── (same structure as prod) -├── comparison/ -│ ├── complete-in-both.txt # Phase 3 -│ ├── complete-prod-missing-archive.txt -│ ├── complete-prod-incomplete-archive.txt -│ ├── incomplete-in-both.txt -│ ├── in-archive-not-prod.txt -│ └── summary.txt -├── logs/ -│ ├── parse-failures.txt # Phase 4 -│ └── purgatory-expired.txt # Phase 4 -└── results/ - ├── no-action-required.txt # Phase 5 - ├── action-required.txt # Phase 5 - ├── manual-investigation.txt # Phase 5 - └── summary.txt # Phase 5 -``` - -## Key Differences: ngit-relay vs ngit-grasp - -Understanding these differences helps explain why some repositories need attention: - -| Aspect | ngit-relay | ngit-grasp | -|--------|------------|------------| -| Git data validation | Accepts commits/tags referenced in state event | Requires all git data to reproduce state | -| PR refs cleanup | Doesn't clear `refs/nostr/` | Properly manages PR refs | -| Parse failures | Silently ignores | Logs structured `[PARSE_FAIL]` entries | -| Sync timeout | No timeout | Purgatory expires after configurable period | - -## Next Steps - -After running the analysis: - -1. **Review the summary** - Check `results/summary.txt` for the overview -2. **Address action items** - Work through `results/action-required.txt` -3. **Investigate edge cases** - Review `results/manual-investigation.txt` -4. **Re-run analysis** - After fixing issues, re-run to verify -5. **Plan cutover** - Schedule the switch when all issues are resolved - -### When to Re-run - -Re-run the analysis when: -- Archive sync has had time to complete -- You've fixed parse failures or re-announced events -- You want to verify fixes before cutover - -```bash -# Re-run with existing Phase 2 data (faster) -./run-migration-analysis.sh ... --skip-phase-2 --output work/migration-analysis-20260122-1430 -``` - -## Individual Scripts - -For advanced usage, you can run individual phase scripts: - -```bash -# Phase 1: Fetch events -./migration-scripts/01-fetch-events.sh wss://relay.ngit.dev output/prod - -# Phase 2: Git sync check -./migration-scripts/10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/ngit-relay/git output/prod --categorize - -# Phase 3a: Categorize -./migration-scripts/20-categorize.sh output/prod/git-sync-status.tsv output/prod - -# Phase 3b: Compare relays -./migration-scripts/21-compare-relays.sh output/prod output/archive output/comparison - -# Phase 4a: Extract parse failures -./migration-scripts/30-extract-parse-failures.sh ngit-grasp.service output/logs - -# Phase 4b: Extract purgatory expiry -./migration-scripts/31-extract-purgatory-expiry.sh ngit-grasp.service output/logs - -# Phase 5: Final classification -./migration-scripts/40-classify-actions.sh work/migration-analysis-20260122-1430 -``` - -Each script has detailed help available with `--help` or by reading the script header. diff --git a/docs/how-to/migrate-to-ngit-grasp.md b/docs/how-to/migrate-to-ngit-grasp.md new file mode 100644 index 0000000..f4dff86 --- /dev/null +++ b/docs/how-to/migrate-to-ngit-grasp.md @@ -0,0 +1,412 @@ +# Migrate to ngit-grasp from another GRASP implementation + +This guide walks you through migrating a production GRASP relay to ngit-grasp. The process involves analyzing your existing data to identify repositories that need attention before switching over. + +## Compatibility + +This migration process works with any GRASP implementation that: + +- Stores git data in the `/.git` directory structure +- Uses standard GRASP events (kind 30617 announcements, kind 30618 state, kind 5 deletions) +- Exposes a Nostr relay WebSocket endpoint + +**Known compatible implementations:** +- ngit-relay (reference implementation) +- ngit-grasp (when migrating between instances or from archive mode) +- Other GRASP-compliant relays following the specification + +The migration scripts analyze Nostr events and git data directly, making them implementation-agnostic. + +## Quick Start + +Run the migration analysis with a single command: + +```bash +# Basic analysis (fetches events, compares relays) +./docs/how-to/migration-scripts/run-migration-analysis.sh \ + --prod-relay wss://source-relay.example.com \ + --archive-relay wss://target-relay.example.com + +# Full analysis (includes git sync check - run on VPS) +./docs/how-to/migration-scripts/run-migration-analysis.sh \ + --prod-relay wss://source-relay.example.com \ + --archive-relay wss://target-relay.example.com \ + --prod-git /var/lib/grasp-relay/git \ + --archive-git /var/lib/ngit-grasp/git \ + --service ngit-grasp.service +``` + +The script produces three output files: +- `results/no-action-required.txt` - Repos ready for migration +- `results/action-required.txt` - Repos needing intervention +- `results/manual-investigation.txt` - Repos needing human review + +See [Running the Analysis](#running-the-analysis) for detailed options. + +## Prerequisites + +### Required Tools + +- **nak** - Nostr Army Knife for fetching events ([install](https://github.com/fiatjaf/nak)) +- **jq** - JSON processing (install via package manager) + +### For Full Analysis (VPS) + +- SSH access to the VPS running your source relay +- Read access to git data directories +- Access to systemd journal (for log extraction) + +### Verify Installation + +```bash +# Check required tools +nak --version +jq --version + +# Check optional tools (for VPS phases) +journalctl --version +``` + +## Migration Overview + +The migration process has three stages: + +### Stage 1: Deploy Archive Instance + +Deploy ngit-grasp alongside your production relay: + +1. Configure ngit-grasp with: + - `domain` set to `.internal` (temporary) + - `archiveService` set to your production domain + - Running on a different port + +2. Let it sync for ~1 hour to gather all events and git data + +### Stage 2: Analyze Data + +Run the migration analysis to identify: +- Repositories successfully migrated (no action needed) +- Repositories with incomplete data (need investigation) +- Repositories with parse failures (may need re-announcement) + +### Stage 3: Switch Over + +Once all issues are resolved: +1. Set `domain` to your production URL +2. Disable archive mode +3. Update your reverse proxy to point to ngit-grasp + +## Running the Analysis + +### Basic Usage + +```bash +# Preview what will happen (dry run) +./run-migration-analysis.sh \ + --prod-relay wss://source-relay.example.com \ + --archive-relay wss://target-relay.example.com \ + --dry-run + +# Run the analysis +./run-migration-analysis.sh \ + --prod-relay wss://source-relay.example.com \ + --archive-relay wss://target-relay.example.com +``` + +### Full Analysis on VPS + +```bash +./run-migration-analysis.sh \ + --prod-relay wss://source-relay.example.com \ + --archive-relay wss://target-relay.example.com \ + --prod-git /var/lib/grasp-relay/git \ + --archive-git /var/lib/ngit-grasp/git \ + --service ngit-grasp.service +``` + +### Phase Control + +Skip or run specific phases: + +```bash +# Skip Phase 2 (use cached git sync data) +./run-migration-analysis.sh ... --skip-phase-2 + +# Run only Phase 1 (fetch events) +./run-migration-analysis.sh ... --only-phase-1 + +# Resume from Phase 3 (using existing data) +./run-migration-analysis.sh ... --from-phase-3 --output work/migration-analysis-20260122-1430 +``` + +### All Options + +| Option | Description | +|--------|-------------| +| `--prod-relay ` | Source relay WebSocket URL (required) | +| `--archive-relay ` | Target relay WebSocket URL (required) | +| `--prod-git ` | Git base directory for prod (enables Phase 2) | +| `--archive-git ` | Git base directory for archive (enables Phase 2) | +| `--service ` | Systemd service name (enables Phase 4) | +| `--output ` | Output directory (default: auto-generated) | +| `--skip-phase-N` | Skip phase N (1-5) | +| `--only-phase-N` | Run only phase N | +| `--from-phase-N` | Start from phase N | +| `--dry-run` | Show what would be executed | +| `--continue-on-error` | Continue even if a phase fails | + +## Understanding Results + +### Summary File + +The `results/summary.txt` file provides an overview: + +``` +## Overview + +| Category | Count | Percentage | +|----------|-------|------------| +| No Action Required | 450 | 85.7% | +| Action Required | 52 | 9.9% | +| Manual Investigation | 23 | 4.4% | +``` + +### No Action Required + +Repositories in `no-action-required.txt` are ready for migration: + +``` +myrepo | npub1abc... | complete in both prod and archive +oldrepo | npub1def... | deleted by user +testrepo | npub1ghi... | empty/blank in both (user never pushed) +``` + +**Common reasons:** +- `complete in both prod and archive` - Successfully migrated +- `deleted by user` - User requested deletion (kind 5 event) +- `empty/blank in both` - No git data was ever pushed +- `purgatory expired` - System already handled the timeout + +### Action Required + +Repositories in `action-required.txt` need intervention: + +``` +myrepo | npub1abc... | complete in prod, missing from archive | trigger re-sync or investigate +otherrepo | npub1def... | incomplete in both (prod=cat3, archive=cat2) | investigate git data source +``` + +**Common actions:** +- **Re-sync needed**: Trigger the archive to re-fetch from the source +- **Wait for sync**: Archive sync may still be in progress +- **Investigate git source**: Original git data may be incomplete +- **Fix parse failure**: Event format issue, may need re-announcement + +### Manual Investigation + +Repositories in `manual-investigation.txt` have unusual states: + +``` +weirdrepo | npub1abc... | in archive (cat1) but not in prod | may be new announcement or deleted from prod +conflictrepo | npub1def... | complete in prod, missing from archive, parse failure logged | investigate parse failure +``` + +These require human judgment to determine the correct action. + +## Troubleshooting + +### "nak not found" + +Install nak from https://github.com/fiatjaf/nak: + +```bash +# Using Go +go install github.com/fiatjaf/nak@latest + +# Or download binary from releases +``` + +### "Permission denied" on git directories + +Run with sudo or ensure your user has read access: + +```bash +# Check permissions +ls -la /var/lib/grasp-relay/git + +# Run with sudo if needed +sudo ./run-migration-analysis.sh ... +``` + +### Phase 2 takes too long + +The git sync check processes each repository individually (~20 minutes total). To speed up iteration: + +1. Run Phase 2 once and save the output +2. Use `--skip-phase-2` for subsequent runs +3. Use `--from-phase-3` to re-run classification with existing data + +### No parse failures found + +This is expected if: +- ngit-grasp logging improvements aren't deployed yet +- No events actually failed to parse + +The analysis will continue without log data. + +### Event counts are multiples of 250 + +This suggests pagination may have failed. The scripts use `--paginate` by default, but if you see exactly 250, 500, 750 events, verify the relay is responding correctly. + +## Architecture + +### Analysis Phases + +The analysis is split into 5 modular phases: + +| Phase | Name | Time | Location | Description | +|-------|------|------|----------|-------------| +| 1 | Fetch Events | ~30s each | Local | Fetch events from both relays | +| 2 | Git Sync Check | ~20 min each | VPS | Compare state events to git data | +| 3 | Categorize & Compare | <1s | Local | Categorize and compare results | +| 4 | Extract Logs | <30s | VPS | Extract parse failures and purgatory expiry | +| 5 | Final Classification | <5s | Local | Combine all data into actionable results | + +### Phase Flow Diagram + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ PHASE 1: Fetch Events (~30s, local) │ +│ Fetches kind 30618 (state), 30617 (announcements), 5 (deletion) │ +│ Run twice: once for prod, once for archive │ +└─────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────┐ +│ PHASE 2: Git Sync Check (~20 mins, VPS required) │ +│ Compares state event refs to actual git data on disk │ +│ Categorizes into: complete, empty, partial, no-match │ +└─────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────┐ +│ PHASE 3: Categorize & Compare (fast, local) │ +│ Compares prod vs archive categories │ +│ Identifies gaps and sync issues │ +└─────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────┐ +│ PHASE 4: Log-Based Categories (VPS required) │ +│ Extracts [PARSE_FAIL] and [PURGATORY_EXPIRED] from logs │ +│ Provides context for why repos failed to sync │ +└─────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────┐ +│ PHASE 5: Final Classification (fast, local) │ +│ Combines all data sources │ +│ Outputs: no-action, action-required, manual-investigation │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### Git Sync Categories + +Phase 2 categorizes repositories into 4 categories: + +| Category | Description | Meaning | +|----------|-------------|---------| +| 1 | Complete Match | All refs in state event match git data | +| 2 | Empty/Blank | No git data available | +| 3 | Partial Match | Some refs match, some don't | +| 4 | No Match | Git data exists but refs don't match | + +### Output Directory Structure + +``` +work/migration-analysis-YYYYMMDD-HHMM/ +├── prod/ +│ ├── raw/ +│ │ ├── state-events.json # Phase 1 +│ │ ├── announcements.json # Phase 1 +│ │ └── deletions.json # Phase 1 +│ ├── git-sync-status.tsv # Phase 2 +│ └── category*.txt # Phase 2/3 +├── archive/ +│ └── (same structure as prod) +├── comparison/ +│ ├── complete-in-both.txt # Phase 3 +│ ├── complete-prod-missing-archive.txt +│ ├── complete-prod-incomplete-archive.txt +│ ├── incomplete-in-both.txt +│ ├── in-archive-not-prod.txt +│ └── summary.txt +├── logs/ +│ ├── parse-failures.txt # Phase 4 +│ └── purgatory-expired.txt # Phase 4 +└── results/ + ├── no-action-required.txt # Phase 5 + ├── action-required.txt # Phase 5 + ├── manual-investigation.txt # Phase 5 + └── summary.txt # Phase 5 +``` + +## Why Migration May Require Attention + +Different GRASP implementations may handle edge cases differently. ngit-grasp has stricter validation and better observability, which can surface issues that were previously hidden: + +| Aspect | Typical Source Relay | ngit-grasp | +|--------|---------------------|------------| +| Git data validation | May accept partial data | Requires all git data to reproduce state | +| PR refs cleanup | May not clear `refs/nostr/` | Properly manages PR refs | +| Parse failures | May silently ignore | Logs structured `[PARSE_FAIL]` entries | +| Sync timeout | May have no timeout | Purgatory expires after configurable period | + +These differences explain why some repositories may need attention during migration - ngit-grasp's stricter validation catches issues that other implementations may have silently accepted. + +## Next Steps + +After running the analysis: + +1. **Review the summary** - Check `results/summary.txt` for the overview +2. **Address action items** - Work through `results/action-required.txt` +3. **Investigate edge cases** - Review `results/manual-investigation.txt` +4. **Re-run analysis** - After fixing issues, re-run to verify +5. **Plan cutover** - Schedule the switch when all issues are resolved + +### When to Re-run + +Re-run the analysis when: +- Archive sync has had time to complete +- You've fixed parse failures or re-announced events +- You want to verify fixes before cutover + +```bash +# Re-run with existing Phase 2 data (faster) +./run-migration-analysis.sh ... --skip-phase-2 --output work/migration-analysis-20260122-1430 +``` + +## Individual Scripts + +For advanced usage, you can run individual phase scripts: + +```bash +# Phase 1: Fetch events +./migration-scripts/01-fetch-events.sh wss://source-relay.example.com output/prod + +# Phase 2: Git sync check +./migration-scripts/10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod --categorize + +# Phase 3a: Categorize +./migration-scripts/20-categorize.sh output/prod/git-sync-status.tsv output/prod + +# Phase 3b: Compare relays +./migration-scripts/21-compare-relays.sh output/prod output/archive output/comparison + +# Phase 4a: Extract parse failures +./migration-scripts/30-extract-parse-failures.sh ngit-grasp.service output/logs + +# Phase 4b: Extract purgatory expiry +./migration-scripts/31-extract-purgatory-expiry.sh ngit-grasp.service output/logs + +# Phase 5: Final classification +./migration-scripts/40-classify-actions.sh work/migration-analysis-20260122-1430 +``` + +Each script has detailed help available with `--help` or by reading the script header. diff --git a/docs/how-to/migration-scripts/01-fetch-events.sh b/docs/how-to/migration-scripts/01-fetch-events.sh index 6870659..e0d6f26 100755 --- a/docs/how-to/migration-scripts/01-fetch-events.sh +++ b/docs/how-to/migration-scripts/01-fetch-events.sh @@ -2,7 +2,7 @@ # # 01-fetch-events.sh - Fetch nostr events from a relay for migration analysis # -# PHASE 1 of the ngit-relay to ngit-grasp migration analysis pipeline. +# PHASE 1 of the GRASP relay to ngit-grasp migration analysis pipeline. # Fetches kind 30618 (state), 30617 (announcement), and 5 (deletion) events. # # USAGE: @@ -41,7 +41,7 @@ # - Run Phase 1 and Phase 2 back-to-back for accurate snapshot # # SEE ALSO: -# docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide +# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide # set -euo pipefail diff --git a/docs/how-to/migration-scripts/10-check-git-sync.sh b/docs/how-to/migration-scripts/10-check-git-sync.sh index 493d50a..1f2ce04 100755 --- a/docs/how-to/migration-scripts/10-check-git-sync.sh +++ b/docs/how-to/migration-scripts/10-check-git-sync.sh @@ -2,21 +2,21 @@ # # 10-check-git-sync.sh - Compare state events to actual git data on disk # -# PHASE 2 of the ngit-relay to ngit-grasp migration analysis pipeline. +# PHASE 2 of the GRASP relay to ngit-grasp migration analysis pipeline. # Compares kind 30618 state events against actual git refs on disk. # # USAGE: # ./10-check-git-sync.sh [--categorize] # # EXAMPLES: -# # Check prod relay against prod git data -# ./10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/ngit-relay/git output/prod +# # Check source relay against source git data +# ./10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod # -# # Check archive relay against archive git data -# ./10-check-git-sync.sh output/archive/raw/state-events.json /var/lib/ngit-relay-archive/git output/archive +# # Check target relay against target git data +# ./10-check-git-sync.sh output/archive/raw/state-events.json /var/lib/ngit-grasp/git output/archive # # # Check and categorize in one step (convenience mode) -# ./10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/ngit-relay/git output/prod --categorize +# ./10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod --categorize # # INPUT: # state-events.json - JSONL file from Phase 1 (01-fetch-events.sh) @@ -53,7 +53,7 @@ # - Handles packed refs (git show-ref) and loose refs # # SEE ALSO: -# docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide +# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide # 01-fetch-events.sh - Phase 1 script that produces input for this script # 20-categorize.sh - Phase 3a script that consumes output from this script # @@ -101,13 +101,13 @@ usage() { echo "" echo "Arguments:" echo " state-events.json JSONL file from Phase 1 (kind 30618 events)" - echo " git-base-dir Base directory for git repos (e.g., /var/lib/ngit-relay/git)" + echo " git-base-dir Base directory for git repos (e.g., /var/lib/grasp-relay/git)" echo " output-dir Directory to store output files" echo " --categorize Optional: also output category files (like Phase 3)" echo "" echo "Examples:" - echo " $0 output/prod/raw/state-events.json /var/lib/ngit-relay/git output/prod" - echo " $0 output/archive/raw/state-events.json /var/lib/ngit-relay-archive/git output/archive" + echo " $0 output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod" + echo " $0 output/archive/raw/state-events.json /var/lib/ngit-grasp/git output/archive" echo "" echo "Output:" echo " git-sync-status.tsv - TSV with: repo, npub, state_refs, git_refs, matches, reason" diff --git a/docs/how-to/migration-scripts/20-categorize.sh b/docs/how-to/migration-scripts/20-categorize.sh index f47eb55..b38dc00 100755 --- a/docs/how-to/migration-scripts/20-categorize.sh +++ b/docs/how-to/migration-scripts/20-categorize.sh @@ -2,7 +2,7 @@ # # 20-categorize.sh - Categorize git sync status into 4 categories # -# PHASE 3a of the ngit-relay to ngit-grasp migration analysis pipeline. +# PHASE 3a of the GRASP relay to ngit-grasp migration analysis pipeline. # Takes git-sync-status.tsv from Phase 2 and categorizes into 4 files. # # USAGE: @@ -39,7 +39,7 @@ # RUNTIME: < 1 second (local processing only) # # SEE ALSO: -# docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide +# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide # 10-check-git-sync.sh - Phase 2 script that produces input for this script # diff --git a/docs/how-to/migration-scripts/21-compare-relays.sh b/docs/how-to/migration-scripts/21-compare-relays.sh index 6b40dc8..b9c0d30 100755 --- a/docs/how-to/migration-scripts/21-compare-relays.sh +++ b/docs/how-to/migration-scripts/21-compare-relays.sh @@ -2,7 +2,7 @@ # # 21-compare-relays.sh - Compare prod vs archive category files to find gaps # -# PHASE 3b of the ngit-relay to ngit-grasp migration analysis pipeline. +# PHASE 3b of the GRASP relay to ngit-grasp migration analysis pipeline. # Compares categorized output from prod and archive to identify: # - Repos complete in prod but missing/incomplete in archive # - Repos in archive but not in prod @@ -39,7 +39,7 @@ # RUNTIME: < 1 second (local processing only) # # SEE ALSO: -# docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide +# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide # 20-categorize.sh - Phase 3a script that produces input for this script # diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh index 753fd3e..bc2049a 100755 --- a/docs/how-to/migration-scripts/30-extract-parse-failures.sh +++ b/docs/how-to/migration-scripts/30-extract-parse-failures.sh @@ -2,7 +2,7 @@ # # 30-extract-parse-failures.sh - Extract parse failure events from systemd logs # -# PHASE 4a of the ngit-relay to ngit-grasp migration analysis pipeline. +# PHASE 4a of the GRASP relay to ngit-grasp migration analysis pipeline. # Extracts structured [PARSE_FAIL] log entries from journalctl. # # USAGE: @@ -42,7 +42,7 @@ # [PARSE_FAIL] log entries. Until those are implemented, this script will # find no matching entries (which is handled gracefully). # -# See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section) +# See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section) # # Expected Rust logging code: # tracing::warn!( @@ -59,7 +59,7 @@ # RUNTIME: Depends on log volume, typically < 30 seconds # # SEE ALSO: -# docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide +# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide # 31-extract-purgatory-expiry.sh - Companion script for purgatory expiry logs # @@ -231,7 +231,7 @@ main() { if [[ "$sample_count" -eq 0 ]]; then log_warn "No [PARSE_FAIL] entries found in logs." log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." - log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)" + log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" fi exit 0 @@ -259,7 +259,7 @@ main() { log_warn "" log_warn " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." log_warn "" - log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)" + log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" log_warn "" # Create empty output file with header comment diff --git a/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh index 38b2ca3..8cadad9 100755 --- a/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh +++ b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh @@ -2,7 +2,7 @@ # # 31-extract-purgatory-expiry.sh - Extract purgatory expiry events from systemd logs # -# PHASE 4b of the ngit-relay to ngit-grasp migration analysis pipeline. +# PHASE 4b of the GRASP relay to ngit-grasp migration analysis pipeline. # Extracts structured [PURGATORY_EXPIRED] log entries from journalctl. # # USAGE: @@ -53,7 +53,7 @@ # [PURGATORY_EXPIRED] log entries. Until those are implemented, this script # will find no matching entries (which is handled gracefully). # -# See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section) +# See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section) # # Expected Rust logging code: # tracing::warn!( @@ -70,7 +70,7 @@ # RUNTIME: Depends on log volume, typically < 30 seconds # # SEE ALSO: -# docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide +# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide # 30-extract-parse-failures.sh - Companion script for parse failure logs # @@ -238,7 +238,7 @@ main() { if [[ "$sample_count" -eq 0 ]]; then log_warn "No [PURGATORY_EXPIRED] entries found in logs." log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." - log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)" + log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" fi exit 0 @@ -266,7 +266,7 @@ main() { log_warn "" log_warn " [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason=\"...\"" log_warn "" - log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)" + log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" log_warn "" # Create empty output file with header comment diff --git a/docs/how-to/migration-scripts/40-classify-actions.sh b/docs/how-to/migration-scripts/40-classify-actions.sh index 9fc718f..1706e47 100755 --- a/docs/how-to/migration-scripts/40-classify-actions.sh +++ b/docs/how-to/migration-scripts/40-classify-actions.sh @@ -2,7 +2,7 @@ # # 40-classify-actions.sh - Final classification of repos for migration action # -# PHASE 5 of the ngit-relay to ngit-grasp migration analysis pipeline. +# PHASE 5 of the GRASP relay to ngit-grasp migration analysis pipeline. # Combines all data sources from previous phases to produce actionable results. # # USAGE: @@ -82,7 +82,7 @@ # RUNTIME: < 5 seconds (local processing only) # # SEE ALSO: -# docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide +# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide # 01-fetch-events.sh - Phase 1 (fetch events) # 10-check-git-sync.sh - Phase 2 (git sync check) # 20-categorize.sh, 21-compare-relays.sh - Phase 3 (categorize and compare) diff --git a/docs/how-to/migration-scripts/run-migration-analysis.sh b/docs/how-to/migration-scripts/run-migration-analysis.sh index 2ba25ab..84aec2a 100755 --- a/docs/how-to/migration-scripts/run-migration-analysis.sh +++ b/docs/how-to/migration-scripts/run-migration-analysis.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# run-migration-analysis.sh - Orchestrate the complete ngit-relay to ngit-grasp migration analysis +# run-migration-analysis.sh - Orchestrate the complete GRASP relay to ngit-grasp migration analysis # # This script runs all 5 phases of the migration analysis pipeline in sequence, # with proper error handling, progress reporting, and timing information. @@ -13,8 +13,8 @@ # ./run-migration-analysis.sh \ # --prod-relay wss://relay.ngit.dev \ # --archive-relay wss://archive.relay.ngit.dev \ -# --prod-git /var/lib/ngit-relay/git \ -# --archive-git /var/lib/ngit-relay-archive/git +# --prod-git /var/lib/grasp-relay/git \ +# --archive-git /var/lib/ngit-grasp/git # # USAGE: # ./run-migration-analysis.sh [options] @@ -64,12 +64,12 @@ # ./run-migration-analysis.sh \ # --prod-relay wss://relay.ngit.dev \ # --archive-relay wss://archive.relay.ngit.dev \ -# --prod-git /var/lib/ngit-relay/git \ -# --archive-git /var/lib/ngit-relay-archive/git \ +# --prod-git /var/lib/grasp-relay/git \ +# --archive-git /var/lib/ngit-grasp/git \ # --service ngit-grasp.service # # SEE ALSO: -# docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide +# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide # set -euo pipefail @@ -658,7 +658,7 @@ display_summary() { main() { parse_args "$@" - log_header "ngit-relay to ngit-grasp Migration Analysis" + log_header "GRASP Relay to ngit-grasp Migration Analysis" validate_args check_prerequisites -- cgit v1.2.3 From b61d388162438d12df37aa3fcd40bc9d3344d5bd Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 14:26:54 +0000 Subject: Add git prerequisite checks to migration scripts - 10-check-git-sync.sh: Check for git before running - run-migration-analysis.sh: Include git in prerequisite checks - Fixes script failures when git is not installed --- docs/how-to/migration-scripts/10-check-git-sync.sh | 15 +++++++++++---- docs/how-to/migration-scripts/run-migration-analysis.sh | 4 ++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/docs/how-to/migration-scripts/10-check-git-sync.sh b/docs/how-to/migration-scripts/10-check-git-sync.sh index 1f2ce04..b4536cb 100755 --- a/docs/how-to/migration-scripts/10-check-git-sync.sh +++ b/docs/how-to/migration-scripts/10-check-git-sync.sh @@ -118,6 +118,11 @@ usage() { check_prerequisites() { local missing=0 + if ! command -v git &> /dev/null; then + log_error "git not found. Install with your package manager." + missing=1 + fi + if ! command -v nak &> /dev/null; then log_error "nak not found. Install from: https://github.com/fiatjaf/nak" log_error "Or run: nix-shell -p nak jq --run \"$0 $*\"" @@ -161,13 +166,15 @@ count_git_refs() { return fi - # Use git show-ref to handle both packed and loose refs - # Fall back to counting loose refs if git show-ref fails - if git --git-dir="$git_dir" show-ref --heads 2>/dev/null | wc -l | tr -d ' '; then + # Try git show-ref first (handles packed refs correctly) + # Note: We capture output separately to avoid pipefail issues + local count + if count=$(git --git-dir="$git_dir" show-ref --heads 2>/dev/null | wc -l); then + echo "$count" | tr -d ' ' return fi - # Fallback: count loose refs + # Fallback: count loose refs (when git is not available or fails) if [[ -d "$git_dir/refs/heads" ]]; then find "$git_dir/refs/heads" -type f 2>/dev/null | wc -l | tr -d ' ' else diff --git a/docs/how-to/migration-scripts/run-migration-analysis.sh b/docs/how-to/migration-scripts/run-migration-analysis.sh index 84aec2a..65d9d17 100755 --- a/docs/how-to/migration-scripts/run-migration-analysis.sh +++ b/docs/how-to/migration-scripts/run-migration-analysis.sh @@ -155,7 +155,7 @@ FROM_PHASE="" declare -A PHASE_TIMES usage() { - head -80 "$0" | tail -n +3 | sed 's/^# //' | sed 's/^#//' + head -73 "$0" | tail -n +3 | sed 's/^# //' | sed 's/^#//' exit 0 } @@ -285,7 +285,7 @@ check_prerequisites() { log_info "Checking prerequisites..." # Required tools - for tool in nak jq awk sort; do + for tool in git nak jq awk sort; do if command -v "$tool" &> /dev/null; then log_step "$tool: found" else -- cgit v1.2.3 From 6a6c8cf8b70bc387ea7241b5c9ec457cb525eb40 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 14:58:31 +0000 Subject: Update migration guide with lessons learned from relay.ngit.dev analysis - Add Gotchas section with common issues: git installation, localhost-only archive relays, non-standard git paths, service name variations, and permission requirements - Add relay.ngit.dev-specific migration notes with actual paths, service names, and analysis results (315 repos need re-sync, 382 purgatory expired) - Enhance Running the Analysis section with path discovery guidance - Expand Troubleshooting section with solutions for git not found, archive connection failures, and wrong git paths - Add git --version to prerequisite checks - Update examples to use realistic localhost archive URLs --- docs/how-to/migrate-to-ngit-grasp.md | 257 ++++++++++++++++++++++++++++++++++- 1 file changed, 252 insertions(+), 5 deletions(-) diff --git a/docs/how-to/migrate-to-ngit-grasp.md b/docs/how-to/migrate-to-ngit-grasp.md index f4dff86..9b812a5 100644 --- a/docs/how-to/migrate-to-ngit-grasp.md +++ b/docs/how-to/migrate-to-ngit-grasp.md @@ -62,11 +62,112 @@ See [Running the Analysis](#running-the-analysis) for detailed options. # Check required tools nak --version jq --version +git --version # Check optional tools (for VPS phases) journalctl --version ``` +## Gotchas and Common Issues + +Before running the analysis, be aware of these common issues discovered during real migrations: + +### Git Must Be Installed + +The analysis scripts require `git` to be installed and in PATH. This may not be present on minimal VPS installations. + +```bash +# Check if git is available +which git || echo "Git not found - install it first" + +# Install on Debian/Ubuntu +apt install git + +# Install on NixOS (add to configuration.nix) +environment.systemPackages = [ pkgs.git ]; +``` + +### Archive Relay May Only Be Accessible Locally + +If your archive relay is configured to listen only on localhost (e.g., `ws://localhost:7443`), you must run the analysis **on the VPS itself**, not from a remote machine. + +```bash +# Check if archive relay is accessible +# This will fail if run remotely against a localhost-only relay +nak req -k 30618 --limit 1 ws://localhost:7443 + +# Solution: SSH into the VPS and run analysis there +ssh user@your-vps +cd /path/to/scripts +./run-migration-analysis.sh --archive-relay ws://localhost:7443 ... +``` + +### Git Data Paths May Differ from Defaults + +Different deployments store git data in different locations. **Always verify paths before running the analysis.** + +```bash +# Find actual git data paths from service configuration +systemctl cat ngit-relay.service | grep -E 'ExecStart|WorkingDirectory|Environment' +systemctl cat ngit-grasp-*.service | grep -E 'ExecStart|WorkingDirectory|Environment' + +# Common locations: +# - /var/lib/ngit-relay/git (default) +# - /var/lib/ngit-grasp/git (default) +# - /persistent/*/data/repos (custom deployments) + +# Verify the path exists and contains expected structure +ls /path/to/git/npub1*/ # Should show *.git directories +``` + +### Phase 4 Needs the Correct Service Name + +Phase 4 extracts structured logs (`[PARSE_FAIL]`, `[PURGATORY_EXPIRED]`) from journald. You must specify the service that has these logs - typically the **archive** service (ngit-grasp), not the production service (ngit-relay). + +```bash +# Find all ngit-related services +systemctl list-units 'ngit-*' --all + +# Check which service has structured logging +journalctl -u ngit-grasp-*.service | grep -E '\[PARSE_FAIL\]|\[PURGATORY_EXPIRED\]' | head -5 + +# Use the archive service name for Phase 4 +./run-migration-analysis.sh ... --service ngit-grasp-relay-ngit-dev.service +``` + +### Permission Issues with Service-Owned Directories + +Git data directories are typically owned by the service user and may require elevated permissions to read. + +```bash +# Check directory permissions +ls -la /var/lib/ngit-grasp/git + +# Options: +# 1. Run as root/sudo +sudo ./run-migration-analysis.sh ... + +# 2. Run as the service user +sudo -u ngit-grasp ./run-migration-analysis.sh ... + +# 3. Add your user to the service group +sudo usermod -aG ngit-grasp $USER +# (logout/login required) +``` + +### Service Names Vary by Deployment + +NixOS multi-instance deployments use service names like `ngit-grasp-.service`. Always check actual service names. + +```bash +# List all ngit services +systemctl list-units 'ngit-*' --all --no-pager + +# Example output: +# ngit-relay.service loaded active running ngit-relay +# ngit-grasp-relay-ngit-dev.service loaded active running ngit-grasp (relay-ngit-dev) +``` + ## Migration Overview The migration process has three stages: @@ -98,6 +199,26 @@ Once all issues are resolved: ## Running the Analysis +### Before You Start + +**Verify paths and service names** before running the analysis. Incorrect paths are the most common source of errors. + +```bash +# 1. Find actual git data paths +systemctl cat ngit-relay.service | grep -E 'ExecStart|data|git' +systemctl cat ngit-grasp-*.service | grep -E 'ExecStart|data|git' + +# 2. Find service names +systemctl list-units 'ngit-*' --all --no-pager + +# 3. Verify git data exists at the paths +ls /path/to/prod/git/npub1*/ | head -5 +ls /path/to/archive/git/npub1*/ | head -5 + +# 4. Check if archive relay is accessible +nak req -k 30618 --limit 1 ws://localhost:7443 # or your archive URL +``` + ### Basic Usage ```bash @@ -115,13 +236,18 @@ Once all issues are resolved: ### Full Analysis on VPS +**Important:** If your archive relay is localhost-only, you must run this on the VPS. + ```bash +# First, discover your actual paths (see "Before You Start" above) +# Then run with the correct values: + ./run-migration-analysis.sh \ --prod-relay wss://source-relay.example.com \ - --archive-relay wss://target-relay.example.com \ - --prod-git /var/lib/grasp-relay/git \ - --archive-git /var/lib/ngit-grasp/git \ - --service ngit-grasp.service + --archive-relay ws://localhost:7443 \ + --prod-git /path/to/prod/git \ + --archive-git /path/to/archive/git \ + --service ngit-grasp-your-instance.service ``` ### Phase Control @@ -226,6 +352,21 @@ go install github.com/fiatjaf/nak@latest # Or download binary from releases ``` +### "git not found" + +Git must be installed and in PATH: + +```bash +# Check if git is available +which git + +# Install on Debian/Ubuntu +sudo apt install git + +# Install on NixOS (add to configuration.nix) +environment.systemPackages = [ pkgs.git ]; +``` + ### "Permission denied" on git directories Run with sudo or ensure your user has read access: @@ -234,8 +375,38 @@ Run with sudo or ensure your user has read access: # Check permissions ls -la /var/lib/grasp-relay/git -# Run with sudo if needed +# Option 1: Run with sudo sudo ./run-migration-analysis.sh ... + +# Option 2: Run as service user +sudo -u ngit-grasp ./run-migration-analysis.sh ... +``` + +### Archive relay connection failed + +If you get connection errors to the archive relay: + +```bash +# Check if relay is running +systemctl status ngit-grasp-*.service + +# Check if it's localhost-only +# If archive is ws://localhost:7443, you MUST run on the VPS +ssh user@your-vps +./run-migration-analysis.sh --archive-relay ws://localhost:7443 ... +``` + +### Wrong git paths / "No such file or directory" + +Git data paths vary by deployment. Discover the actual paths: + +```bash +# Find paths from service configuration +systemctl cat ngit-relay.service | grep -E 'ExecStart|WorkingDirectory|Environment' +systemctl cat ngit-grasp-*.service | grep -E 'ExecStart|WorkingDirectory|Environment' + +# Verify the path contains git repos +ls /discovered/path/npub1*/ ``` ### Phase 2 takes too long @@ -254,6 +425,18 @@ This is expected if: The analysis will continue without log data. +### Phase 4 finds no structured logs + +Structured logging (`[PARSE_FAIL]`, `[PURGATORY_EXPIRED]`) is only available in ngit-grasp. If checking an ngit-relay service, no structured logs will be found. + +```bash +# Verify you're checking the right service (should be ngit-grasp) +journalctl -u ngit-grasp-*.service | grep -E '\[PARSE_FAIL\]|\[PURGATORY_EXPIRED\]' | head -5 + +# If checking ngit-relay, structured logs won't exist +# Use --service with the ngit-grasp archive service name instead +``` + ### Event counts are multiples of 250 This suggests pagination may have failed. The scripts use `--paginate` by default, but if you see exactly 250, 500, 750 events, verify the relay is responding correctly. @@ -410,3 +593,67 @@ For advanced usage, you can run individual phase scripts: ``` Each script has detailed help available with `--help` or by reading the script header. + +## relay.ngit.dev Migration Notes + +This section documents the specific configuration and lessons learned from migrating relay.ngit.dev from ngit-relay to ngit-grasp. Use this as a reference for similar deployments. + +### Deployment Configuration + +| Component | Value | +|-----------|-------| +| **Production relay** | `wss://relay.ngit.dev` | +| **Production service** | `ngit-relay.service` | +| **Production git path** | `/persistent/relay-ngit-dev-ngit-relay/data/repos` | +| **Archive relay** | `ws://localhost:7443` (localhost only) | +| **Archive service** | `ngit-grasp-relay-ngit-dev.service` | +| **Archive git path** | `/persistent/grasp/relay-ngit-dev/git` | + +### Key Differences from Defaults + +1. **Git paths are non-standard**: The production relay uses `/persistent/relay-ngit-dev-ngit-relay/data/repos` instead of `/var/lib/ngit-relay/git` + +2. **Archive is localhost-only**: The archive relay listens on `ws://localhost:7443`, not a public URL. All analysis must run on the VPS. + +3. **Service names include instance**: NixOS multi-instance deployment uses `ngit-grasp-relay-ngit-dev.service`, not `ngit-grasp.service` + +### Analysis Command + +```bash +# Run on VPS (archive is localhost-only) +./docs/how-to/migration-scripts/run-migration-analysis.sh \ + --prod-relay wss://relay.ngit.dev \ + --archive-relay ws://localhost:7443 \ + --prod-git /persistent/relay-ngit-dev-ngit-relay/data/repos \ + --archive-git /persistent/grasp/relay-ngit-dev/git \ + --service ngit-grasp-relay-ngit-dev.service +``` + +### Analysis Results (January 2026) + +| Category | Count | Notes | +|----------|-------|-------| +| Complete in both | ~400 | Ready for migration | +| Complete in prod, missing from archive | 315 | Need re-sync | +| Empty in both | 100 | Users never pushed git data | +| Manual investigation | 5 | Unusual states | +| Purgatory expired | 382 | Structured logging working | + +### Lessons Learned + +1. **Always verify paths first**: The default paths in examples didn't match the actual deployment. Use `systemctl cat ` to find real paths. + +2. **Check archive accessibility**: We initially tried to run analysis remotely, but the archive relay was localhost-only. Had to SSH to VPS. + +3. **Use archive service for Phase 4**: Structured logging (`[PARSE_FAIL]`, `[PURGATORY_EXPIRED]`) is in the ngit-grasp archive service, not the ngit-relay production service. + +4. **Install git on VPS**: Git wasn't installed on the minimal VPS. The scripts now check for this in prerequisites. + +5. **Permissions matter**: Some directories required `sudo` to access. Running as root or the service user resolved this. + +### Next Steps for relay.ngit.dev + +1. **Re-sync 315 repos**: Trigger archive to re-fetch from production +2. **Investigate 5 edge cases**: Manual review of unusual states +3. **Monitor purgatory**: 382 expired entries indicate sync issues to investigate +4. **Plan cutover**: Once re-sync complete, switch DNS/proxy to ngit-grasp -- cgit v1.2.3 From 2b21b807bdf6c0bab548ffceb5c41eee0902890c Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 16:12:03 +0000 Subject: Prevent Phase 4 from using wrong service (ngit-relay vs ngit-grasp) Add validation to ensure Phase 4 scripts use ngit-grasp service (with structured logging) instead of ngit-relay service. Changes: - Add validate-service.sh helper for reusable service validation - Add validation to run-migration-analysis.sh before Phase 4 - Add validation to 30-extract-parse-failures.sh - Add validation to 31-extract-purgatory-expiry.sh - Update migration guide with clear warnings about service selection - Expand troubleshooting for 'Phase 4 finds no logs' issue - Emphasize lesson learned in relay.ngit.dev notes This prevents the issue where Phase 4 was run against ngit-relay.service and found no parse failures because structured logging only exists in ngit-grasp services. --- docs/how-to/migrate-to-ngit-grasp.md | 69 ++++++++-- .../migration-scripts/30-extract-parse-failures.sh | 34 ++++- .../31-extract-purgatory-expiry.sh | 34 ++++- .../migration-scripts/run-migration-analysis.sh | 28 ++++ docs/how-to/migration-scripts/validate-service.sh | 150 +++++++++++++++++++++ 5 files changed, 304 insertions(+), 11 deletions(-) create mode 100755 docs/how-to/migration-scripts/validate-service.sh diff --git a/docs/how-to/migrate-to-ngit-grasp.md b/docs/how-to/migrate-to-ngit-grasp.md index 9b812a5..00af6c8 100644 --- a/docs/how-to/migrate-to-ngit-grasp.md +++ b/docs/how-to/migrate-to-ngit-grasp.md @@ -122,19 +122,36 @@ ls /path/to/git/npub1*/ # Should show *.git directories ### Phase 4 Needs the Correct Service Name -Phase 4 extracts structured logs (`[PARSE_FAIL]`, `[PURGATORY_EXPIRED]`) from journald. You must specify the service that has these logs - typically the **archive** service (ngit-grasp), not the production service (ngit-relay). +> **CRITICAL:** Phase 4 extracts structured logs (`[PARSE_FAIL]`, `[PURGATORY_EXPIRED]`) from journald. These logs **ONLY exist in ngit-grasp services**, NOT in ngit-relay services. + +If you specify an ngit-relay service (like `ngit-relay.service`), Phase 4 will find **zero logs** and produce empty results. This is a common mistake that wastes time and produces misleading analysis. + +**Correct service names (ngit-grasp):** +- `ngit-grasp.service` +- `ngit-grasp-relay-ngit-dev.service` (NixOS multi-instance) +- `ngit-grasp-archive.service` + +**Incorrect service names (ngit-relay - NO structured logging):** +- `ngit-relay.service` +- `relay-ngit-dev.service` ```bash # Find all ngit-related services systemctl list-units 'ngit-*' --all -# Check which service has structured logging +# Check which service has structured logging (should be ngit-grasp) journalctl -u ngit-grasp-*.service | grep -E '\[PARSE_FAIL\]|\[PURGATORY_EXPIRED\]' | head -5 +# Verify ngit-relay does NOT have structured logging +journalctl -u ngit-relay.service | grep -E '\[PARSE_FAIL\]|\[PURGATORY_EXPIRED\]' | head -5 +# ^ This should return nothing + # Use the archive service name for Phase 4 ./run-migration-analysis.sh ... --service ngit-grasp-relay-ngit-dev.service ``` +The migration scripts now validate the service name and will **error** if you specify an ngit-relay service, preventing this common mistake. + ### Permission Issues with Service-Owned Directories Git data directories are typically owned by the service user and may require elevated permissions to read. @@ -273,7 +290,7 @@ Skip or run specific phases: | `--archive-relay ` | Target relay WebSocket URL (required) | | `--prod-git ` | Git base directory for prod (enables Phase 2) | | `--archive-git ` | Git base directory for archive (enables Phase 2) | -| `--service ` | Systemd service name (enables Phase 4) | +| `--service ` | Systemd service name for Phase 4 log extraction. **MUST be an ngit-grasp service** (not ngit-relay). Structured logging only exists in ngit-grasp. | | `--output ` | Output directory (default: auto-generated) | | `--skip-phase-N` | Skip phase N (1-5) | | `--only-phase-N` | Run only phase N | @@ -427,16 +444,50 @@ The analysis will continue without log data. ### Phase 4 finds no structured logs -Structured logging (`[PARSE_FAIL]`, `[PURGATORY_EXPIRED]`) is only available in ngit-grasp. If checking an ngit-relay service, no structured logs will be found. +**Symptom:** Phase 4 completes but `parse-failures.txt` and `purgatory-expired.txt` are empty or contain only header comments. + +**Most common cause:** You're querying the wrong service (ngit-relay instead of ngit-grasp). + +Structured logging (`[PARSE_FAIL]`, `[PURGATORY_EXPIRED]`) **only exists in ngit-grasp services**. If you specify an ngit-relay service, Phase 4 will find zero logs. + +**How to diagnose:** ```bash -# Verify you're checking the right service (should be ngit-grasp) -journalctl -u ngit-grasp-*.service | grep -E '\[PARSE_FAIL\]|\[PURGATORY_EXPIRED\]' | head -5 +# 1. Check what service you configured +cat /path/to/output/config.txt | grep SERVICE_NAME -# If checking ngit-relay, structured logs won't exist -# Use --service with the ngit-grasp archive service name instead +# 2. If it contains "ngit-relay", that's the problem! +# ngit-relay does NOT have structured logging + +# 3. Find the correct ngit-grasp service +systemctl list-units 'ngit-grasp*' --all + +# 4. Verify the ngit-grasp service has structured logs +journalctl -u ngit-grasp-relay-ngit-dev.service --since "7 days ago" | \ + grep -E '\[PARSE_FAIL\]|\[PURGATORY_EXPIRED\]' | head -5 ``` +**How to fix:** + +```bash +# Update SERVICE_NAME to the ngit-grasp archive service and re-run +./run-migration-analysis.sh \ + --prod-relay wss://relay.ngit.dev \ + --archive-relay ws://localhost:7443 \ + --service ngit-grasp-relay-ngit-dev.service \ + --from-phase-4 # Skip phases 1-3, just re-run phase 4 +``` + +**Other possible causes:** + +1. **Structured logging not deployed:** If the ngit-grasp instance doesn't have the logging improvements deployed, no structured logs will exist. Check the ngit-grasp version. + +2. **No events in time window:** If there genuinely were no parse failures or purgatory expiry events, the files will be empty. This is valid - it means everything parsed successfully. + +3. **Wrong time range:** The default is 30 days. If your archive has been running longer, you may need `--since` to extend the range. + +**Prevention:** The migration scripts now validate the service name and will error if you specify an ngit-relay service. + ### Event counts are multiples of 250 This suggests pagination may have failed. The scripts use `--paginate` by default, but if you see exactly 250, 500, 750 events, verify the relay is responding correctly. @@ -645,7 +696,7 @@ This section documents the specific configuration and lessons learned from migra 2. **Check archive accessibility**: We initially tried to run analysis remotely, but the archive relay was localhost-only. Had to SSH to VPS. -3. **Use archive service for Phase 4**: Structured logging (`[PARSE_FAIL]`, `[PURGATORY_EXPIRED]`) is in the ngit-grasp archive service, not the ngit-relay production service. +3. **Use archive service for Phase 4 (CRITICAL)**: Structured logging (`[PARSE_FAIL]`, `[PURGATORY_EXPIRED]`) is **ONLY** in the ngit-grasp archive service, NOT the ngit-relay production service. Running Phase 4 against `ngit-relay.service` produces zero results because ngit-relay doesn't emit structured logs. The scripts now validate this and error if you specify an ngit-relay service. 4. **Install git on VPS**: Git wasn't installed on the minimal VPS. The scripts now check for this in prerequisites. diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh index bc2049a..410fcbc 100755 --- a/docs/how-to/migration-scripts/30-extract-parse-failures.sh +++ b/docs/how-to/migration-scripts/30-extract-parse-failures.sh @@ -65,6 +65,14 @@ set -euo pipefail +# Get script directory for sourcing helpers +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Source the service validation helper +if [[ -f "$SCRIPT_DIR/validate-service.sh" ]]; then + source "$SCRIPT_DIR/validate-service.sh" +fi + # Colors for output (disabled if not a terminal) if [[ -t 1 ]]; then RED='\033[0;31m' @@ -188,11 +196,35 @@ main() { esac done - # Validate service name + # Validate service name format if [[ ! "$service" =~ \.service$ ]]; then service="${service}.service" fi + # Validate service is appropriate for structured logging + # This prevents the common mistake of using ngit-relay instead of ngit-grasp + if type validate_service_for_structured_logging &>/dev/null; then + # Use non-interactive mode if not a terminal, skip log check (we'll do our own) + local interactive="true" + [[ ! -t 0 ]] && interactive="false" + + if ! validate_service_for_structured_logging "$service" "false" "$interactive"; then + log_error "Service validation failed. Use an ngit-grasp service for structured logging." + exit 1 + fi + else + # Fallback validation if helper not available + if [[ "$service" == *"ngit-relay"* ]]; then + log_error "Service name appears to be ngit-relay: $service" + log_error "Structured logging ([PARSE_FAIL]) only exists in ngit-grasp services." + log_error "Please use the ngit-grasp archive service instead." + log_error "" + log_error "To find the correct service:" + log_error " systemctl list-units 'ngit-grasp*' --all" + exit 1 + fi + fi + log_info "Extracting parse failures from systemd logs" log_info "Service: $service" log_info "Output: $output_dir" diff --git a/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh index 8cadad9..a20780e 100755 --- a/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh +++ b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh @@ -76,6 +76,14 @@ set -euo pipefail +# Get script directory for sourcing helpers +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Source the service validation helper +if [[ -f "$SCRIPT_DIR/validate-service.sh" ]]; then + source "$SCRIPT_DIR/validate-service.sh" +fi + # Colors for output (disabled if not a terminal) if [[ -t 1 ]]; then RED='\033[0;31m' @@ -195,11 +203,35 @@ main() { esac done - # Validate service name + # Validate service name format if [[ ! "$service" =~ \.service$ ]]; then service="${service}.service" fi + # Validate service is appropriate for structured logging + # This prevents the common mistake of using ngit-relay instead of ngit-grasp + if type validate_service_for_structured_logging &>/dev/null; then + # Use non-interactive mode if not a terminal, skip log check (we'll do our own) + local interactive="true" + [[ ! -t 0 ]] && interactive="false" + + if ! validate_service_for_structured_logging "$service" "false" "$interactive"; then + log_error "Service validation failed. Use an ngit-grasp service for structured logging." + exit 1 + fi + else + # Fallback validation if helper not available + if [[ "$service" == *"ngit-relay"* ]]; then + log_error "Service name appears to be ngit-relay: $service" + log_error "Structured logging ([PURGATORY_EXPIRED]) only exists in ngit-grasp services." + log_error "Please use the ngit-grasp archive service instead." + log_error "" + log_error "To find the correct service:" + log_error " systemctl list-units 'ngit-grasp*' --all" + exit 1 + fi + fi + log_info "Extracting purgatory expiry events from systemd logs" log_info "Service: $service" log_info "Output: $output_dir" diff --git a/docs/how-to/migration-scripts/run-migration-analysis.sh b/docs/how-to/migration-scripts/run-migration-analysis.sh index 65d9d17..b2ca142 100755 --- a/docs/how-to/migration-scripts/run-migration-analysis.sh +++ b/docs/how-to/migration-scripts/run-migration-analysis.sh @@ -548,6 +548,34 @@ run_phase_4() { return 0 fi + # Validate service name before running Phase 4 + # Structured logging only exists in ngit-grasp, not ngit-relay + if [[ "$SERVICE_NAME" == *"ngit-relay"* ]]; then + log_error "SERVICE_NAME appears to be ngit-relay: $SERVICE_NAME" + log_error "" + log_error "Phase 4 requires an ngit-grasp service with structured logging." + log_error "Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) only exists" + log_error "in ngit-grasp services, NOT in ngit-relay services." + log_error "" + log_error "Please update --service to use the ngit-grasp archive service." + log_error "" + log_error "To find the correct service name:" + log_error " systemctl list-units 'ngit-grasp*' --all" + log_error "" + log_error "Common ngit-grasp service names:" + log_error " - ngit-grasp.service" + log_error " - ngit-grasp-relay-ngit-dev.service (NixOS multi-instance)" + log_error " - ngit-grasp-archive.service" + return 1 + fi + + # Warn if service name doesn't look like ngit-grasp + if [[ "$SERVICE_NAME" != *"ngit-grasp"* && "$SERVICE_NAME" != *"grasp"* ]]; then + log_warn "SERVICE_NAME doesn't contain 'ngit-grasp': $SERVICE_NAME" + log_warn "Structured logging only exists in ngit-grasp services." + log_warn "If this is not an ngit-grasp service, Phase 4 will find no logs." + fi + local cmds=() cmds+=("'$SCRIPT_DIR/30-extract-parse-failures.sh' '$SERVICE_NAME' '$OUTPUT_DIR/logs'") diff --git a/docs/how-to/migration-scripts/validate-service.sh b/docs/how-to/migration-scripts/validate-service.sh new file mode 100755 index 0000000..2525a3f --- /dev/null +++ b/docs/how-to/migration-scripts/validate-service.sh @@ -0,0 +1,150 @@ +#!/usr/bin/env bash +# +# validate-service.sh - Validate service name for structured logging +# +# This helper script validates that a service name is appropriate for +# Phase 4 log extraction. Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) +# only exists in ngit-grasp services, NOT in ngit-relay services. +# +# USAGE: +# Source this script and call the validation function: +# +# source validate-service.sh +# validate_service_for_structured_logging "$SERVICE_NAME" || exit 1 +# +# BACKGROUND: +# Phase 4 of the migration analysis extracts structured log entries from +# journald. These log entries only exist in ngit-grasp services. If you +# accidentally specify an ngit-relay service, Phase 4 will find no logs +# and produce empty results. +# +# This validation prevents that common mistake by: +# 1. Checking if the service name contains "ngit-relay" (error) +# 2. Warning if the service name doesn't contain "ngit-grasp" +# 3. Optionally checking if structured logs actually exist +# +# SEE ALSO: +# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide +# 30-extract-parse-failures.sh - Uses this validation +# 31-extract-purgatory-expiry.sh - Uses this validation +# + +# Colors for output (disabled if not a terminal) +if [[ -t 1 ]]; then + _VS_RED='\033[0;31m' + _VS_YELLOW='\033[0;33m' + _VS_NC='\033[0m' +else + _VS_RED='' + _VS_YELLOW='' + _VS_NC='' +fi + +# Validates that the service name is appropriate for structured logging +# +# Arguments: +# $1 - service_name: The systemd service name to validate +# $2 - check_logs: Whether to check if logs actually exist (default: "true") +# $3 - interactive: Whether to prompt for confirmation (default: "true") +# +# Returns: +# 0 - Service is valid for structured logging +# 1 - Service is invalid or user declined to continue +# +# Example: +# validate_service_for_structured_logging "ngit-grasp.service" || exit 1 +# validate_service_for_structured_logging "ngit-grasp.service" "false" # Skip log check +# validate_service_for_structured_logging "ngit-grasp.service" "true" "false" # Non-interactive +# +validate_service_for_structured_logging() { + local service_name="$1" + local check_logs="${2:-true}" + local interactive="${3:-true}" + + # Check if service name looks like ngit-relay (ERROR - wrong service type) + if [[ "$service_name" == *"ngit-relay"* ]]; then + echo -e "${_VS_RED}ERROR: Service name appears to be ngit-relay: $service_name${_VS_NC}" >&2 + echo "" >&2 + echo "Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) only exists in" >&2 + echo "ngit-grasp services, NOT in ngit-relay services." >&2 + echo "" >&2 + echo "Please use the ngit-grasp archive service instead." >&2 + echo "" >&2 + echo "To find the correct service name:" >&2 + echo " systemctl list-units 'ngit-grasp*' --all" >&2 + echo "" >&2 + echo "Common ngit-grasp service names:" >&2 + echo " - ngit-grasp.service" >&2 + echo " - ngit-grasp-relay-ngit-dev.service (NixOS multi-instance)" >&2 + echo " - ngit-grasp-archive.service" >&2 + return 1 + fi + + # Check if service name looks like ngit-grasp (WARNING if not) + if [[ "$service_name" != *"ngit-grasp"* && "$service_name" != *"grasp"* ]]; then + echo -e "${_VS_YELLOW}WARNING: Service name doesn't contain 'ngit-grasp': $service_name${_VS_NC}" >&2 + echo "" >&2 + echo "Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) only exists in" >&2 + echo "ngit-grasp services." >&2 + echo "" >&2 + + if [[ "$interactive" == "true" ]]; then + read -p "Continue anyway? (y/N) " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + return 1 + fi + else + echo "Non-interactive mode: proceeding despite warning" >&2 + fi + fi + + # Optionally check if structured logs actually exist + if [[ "$check_logs" == "true" ]]; then + # Check if journalctl is available + if ! command -v journalctl &> /dev/null; then + echo -e "${_VS_YELLOW}WARNING: journalctl not available, cannot verify logs exist${_VS_NC}" >&2 + return 0 + fi + + # Check for structured log entries + local has_parse_fail has_purgatory + has_parse_fail=$(journalctl -u "$service_name" --since "7 days ago" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0") + has_purgatory=$(journalctl -u "$service_name" --since "7 days ago" 2>/dev/null | grep -c '\[PURGATORY_EXPIRED\]' || echo "0") + + # Strip any non-numeric characters (grep -c can have trailing whitespace) + has_parse_fail="${has_parse_fail//[^0-9]/}" + has_purgatory="${has_purgatory//[^0-9]/}" + has_parse_fail="${has_parse_fail:-0}" + has_purgatory="${has_purgatory:-0}" + + if [[ "$has_parse_fail" -eq 0 && "$has_purgatory" -eq 0 ]]; then + echo -e "${_VS_YELLOW}WARNING: No structured logs found in $service_name (last 7 days)${_VS_NC}" >&2 + echo "" >&2 + echo "This may indicate:" >&2 + echo " 1. Wrong service (should be ngit-grasp archive service, not ngit-relay)" >&2 + echo " 2. Structured logging not yet deployed to this ngit-grasp instance" >&2 + echo " 3. No parse failures or purgatory expiry events in the time window" >&2 + echo "" >&2 + echo "To verify you have the right service:" >&2 + echo " systemctl list-units 'ngit-grasp*' --all" >&2 + echo " journalctl -u | grep -E '\\[PARSE_FAIL\\]|\\[PURGATORY_EXPIRED\\]' | head -5" >&2 + echo "" >&2 + + if [[ "$interactive" == "true" ]]; then + read -p "Continue anyway? (y/N) " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + return 1 + fi + else + echo "Non-interactive mode: proceeding despite warning" >&2 + fi + fi + fi + + return 0 +} + +# Export the function so it can be used after sourcing +export -f validate_service_for_structured_logging -- cgit v1.2.3 From 0b9527ede03521a40f1174a5a6e40a943bf27e2d Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 16:25:42 +0000 Subject: Fix Phase 4 scripts to run flawlessly without manual intervention Make scripts fully automatic with no manual intervention needed. Changes: - Add --no-pager to journalctl commands in validate-service.sh - Add service existence validation with helpful error messages - Capture and report journalctl stderr for better error visibility - Improve error handling without failing on empty logs The main issue was missing --no-pager in validate-service.sh which could cause scripts to hang when run non-interactively (e.g., via SSH). Tested locally - scripts run without hanging and produce correct output. --- .../migration-scripts/30-extract-parse-failures.sh | 29 ++++++++++++++++++++-- .../31-extract-purgatory-expiry.sh | 29 ++++++++++++++++++++-- docs/how-to/migration-scripts/validate-service.sh | 5 ++-- 3 files changed, 57 insertions(+), 6 deletions(-) diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh index 410fcbc..d4f0ff2 100755 --- a/docs/how-to/migration-scripts/30-extract-parse-failures.sh +++ b/docs/how-to/migration-scripts/30-extract-parse-failures.sh @@ -236,6 +236,19 @@ main() { exit 1 fi + # Validate service exists (check if journalctl can find any logs for it) + # Note: We don't require the service to be running, just that it has logs + if ! journalctl --no-pager -u "$service" -n 1 &>/dev/null; then + log_warn "Could not query logs for service: $service" + log_warn "This may indicate the service doesn't exist or you lack permissions." + log_warn "" + log_warn "To list available ngit-grasp services:" + log_warn " systemctl list-units 'ngit-grasp*' --all" + log_warn " journalctl --list-boots # Check if you have journal access" + log_warn "" + # Continue anyway - the service might exist but have no logs yet + fi + # Build journalctl command local journal_cmd="journalctl -u $service --no-pager -o short-iso" @@ -280,8 +293,20 @@ main() { log_info "Extracting log entries..." # Get raw log lines containing [PARSE_FAIL] - local raw_lines - raw_lines=$(eval "$journal_cmd" 2>/dev/null | grep '\[PARSE_FAIL\]' || true) + # Capture stderr separately to detect journalctl errors + local raw_lines journal_stderr journal_exit + local temp_stderr + temp_stderr=$(mktemp) + + raw_lines=$(eval "$journal_cmd" 2>"$temp_stderr" | grep '\[PARSE_FAIL\]' || true) + journal_exit=$? + journal_stderr=$(cat "$temp_stderr" 2>/dev/null || true) + rm -f "$temp_stderr" + + # Report any journalctl errors (but don't fail - empty logs are valid) + if [[ -n "$journal_stderr" ]]; then + log_warn "journalctl reported: $journal_stderr" + fi if [[ -z "$raw_lines" ]]; then log_warn "No [PARSE_FAIL] entries found in logs." diff --git a/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh index a20780e..a603a1e 100755 --- a/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh +++ b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh @@ -243,6 +243,19 @@ main() { exit 1 fi + # Validate service exists (check if journalctl can find any logs for it) + # Note: We don't require the service to be running, just that it has logs + if ! journalctl --no-pager -u "$service" -n 1 &>/dev/null; then + log_warn "Could not query logs for service: $service" + log_warn "This may indicate the service doesn't exist or you lack permissions." + log_warn "" + log_warn "To list available ngit-grasp services:" + log_warn " systemctl list-units 'ngit-grasp*' --all" + log_warn " journalctl --list-boots # Check if you have journal access" + log_warn "" + # Continue anyway - the service might exist but have no logs yet + fi + # Build journalctl command local journal_cmd="journalctl -u $service --no-pager -o short-iso" @@ -287,8 +300,20 @@ main() { log_info "Extracting log entries..." # Get raw log lines containing [PURGATORY_EXPIRED] - local raw_lines - raw_lines=$(eval "$journal_cmd" 2>/dev/null | grep '\[PURGATORY_EXPIRED\]' || true) + # Capture stderr separately to detect journalctl errors + local raw_lines journal_stderr journal_exit + local temp_stderr + temp_stderr=$(mktemp) + + raw_lines=$(eval "$journal_cmd" 2>"$temp_stderr" | grep '\[PURGATORY_EXPIRED\]' || true) + journal_exit=$? + journal_stderr=$(cat "$temp_stderr" 2>/dev/null || true) + rm -f "$temp_stderr" + + # Report any journalctl errors (but don't fail - empty logs are valid) + if [[ -n "$journal_stderr" ]]; then + log_warn "journalctl reported: $journal_stderr" + fi if [[ -z "$raw_lines" ]]; then log_warn "No [PURGATORY_EXPIRED] entries found in logs." diff --git a/docs/how-to/migration-scripts/validate-service.sh b/docs/how-to/migration-scripts/validate-service.sh index 2525a3f..6988af3 100755 --- a/docs/how-to/migration-scripts/validate-service.sh +++ b/docs/how-to/migration-scripts/validate-service.sh @@ -108,9 +108,10 @@ validate_service_for_structured_logging() { fi # Check for structured log entries + # IMPORTANT: Use --no-pager to prevent hanging when run non-interactively (e.g., via SSH) local has_parse_fail has_purgatory - has_parse_fail=$(journalctl -u "$service_name" --since "7 days ago" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0") - has_purgatory=$(journalctl -u "$service_name" --since "7 days ago" 2>/dev/null | grep -c '\[PURGATORY_EXPIRED\]' || echo "0") + has_parse_fail=$(journalctl --no-pager -u "$service_name" --since "7 days ago" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0") + has_purgatory=$(journalctl --no-pager -u "$service_name" --since "7 days ago" 2>/dev/null | grep -c '\[PURGATORY_EXPIRED\]' || echo "0") # Strip any non-numeric characters (grep -c can have trailing whitespace) has_parse_fail="${has_parse_fail//[^0-9]/}" -- cgit v1.2.3 From cbb0e768641a6ca0cbd7e7013437cc71b920004d Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 16:59:24 +0000 Subject: Capture invalid announcement rejections in Phase 4 Update parse failures script to also extract 'Invalid announcement' rejections from logs. These are announcement events that failed validation (e.g., multiple clone tags instead of single tag with multiple values). Changes: - Search for 'Event rejected by write policy' pattern with 'Invalid announcement' - Search for 'Rejected repository announcement' pattern from builder - Extract event_id, kind, and reason from rejection logs - Combine with [PARSE_FAIL] entries in output - Deduplicate entries by event_id - Update header to clarify both patterns are captured - Update migration guide to document this - Fix SIGPIPE handling in purgatory script (minor) This captures the ~446 unique announcements rejected for NIP-34 format violations (multiple clone tags), which were previously unexplained in the migration analysis. --- docs/how-to/migrate-to-ngit-grasp.md | 20 +- .../migration-scripts/30-extract-parse-failures.sh | 289 +++++++++++++++++---- .../31-extract-purgatory-expiry.sh | 15 +- 3 files changed, 256 insertions(+), 68 deletions(-) diff --git a/docs/how-to/migrate-to-ngit-grasp.md b/docs/how-to/migrate-to-ngit-grasp.md index 00af6c8..62cad87 100644 --- a/docs/how-to/migrate-to-ngit-grasp.md +++ b/docs/how-to/migrate-to-ngit-grasp.md @@ -122,7 +122,7 @@ ls /path/to/git/npub1*/ # Should show *.git directories ### Phase 4 Needs the Correct Service Name -> **CRITICAL:** Phase 4 extracts structured logs (`[PARSE_FAIL]`, `[PURGATORY_EXPIRED]`) from journald. These logs **ONLY exist in ngit-grasp services**, NOT in ngit-relay services. +> **CRITICAL:** Phase 4 extracts structured logs (`[PARSE_FAIL]`, `[PURGATORY_EXPIRED]`, `Invalid announcement` rejections) from journald. These logs **ONLY exist in ngit-grasp services**, NOT in ngit-relay services. If you specify an ngit-relay service (like `ngit-relay.service`), Phase 4 will find **zero logs** and produce empty results. This is a common mistake that wastes time and produces misleading analysis. @@ -140,10 +140,10 @@ If you specify an ngit-relay service (like `ngit-relay.service`), Phase 4 will f systemctl list-units 'ngit-*' --all # Check which service has structured logging (should be ngit-grasp) -journalctl -u ngit-grasp-*.service | grep -E '\[PARSE_FAIL\]|\[PURGATORY_EXPIRED\]' | head -5 +journalctl -u ngit-grasp-*.service | grep -E '\[PARSE_FAIL\]|\[PURGATORY_EXPIRED\]|Invalid announcement' | head -5 # Verify ngit-relay does NOT have structured logging -journalctl -u ngit-relay.service | grep -E '\[PARSE_FAIL\]|\[PURGATORY_EXPIRED\]' | head -5 +journalctl -u ngit-relay.service | grep -E '\[PARSE_FAIL\]|\[PURGATORY_EXPIRED\]|Invalid announcement' | head -5 # ^ This should return nothing # Use the archive service name for Phase 4 @@ -448,7 +448,7 @@ The analysis will continue without log data. **Most common cause:** You're querying the wrong service (ngit-relay instead of ngit-grasp). -Structured logging (`[PARSE_FAIL]`, `[PURGATORY_EXPIRED]`) **only exists in ngit-grasp services**. If you specify an ngit-relay service, Phase 4 will find zero logs. +Structured logging (`[PARSE_FAIL]`, `[PURGATORY_EXPIRED]`, `Invalid announcement` rejections) **only exists in ngit-grasp services**. If you specify an ngit-relay service, Phase 4 will find zero logs. **How to diagnose:** @@ -464,7 +464,7 @@ systemctl list-units 'ngit-grasp*' --all # 4. Verify the ngit-grasp service has structured logs journalctl -u ngit-grasp-relay-ngit-dev.service --since "7 days ago" | \ - grep -E '\[PARSE_FAIL\]|\[PURGATORY_EXPIRED\]' | head -5 + grep -E '\[PARSE_FAIL\]|\[PURGATORY_EXPIRED\]|Invalid announcement' | head -5 ``` **How to fix:** @@ -482,12 +482,14 @@ journalctl -u ngit-grasp-relay-ngit-dev.service --since "7 days ago" | \ 1. **Structured logging not deployed:** If the ngit-grasp instance doesn't have the logging improvements deployed, no structured logs will exist. Check the ngit-grasp version. -2. **No events in time window:** If there genuinely were no parse failures or purgatory expiry events, the files will be empty. This is valid - it means everything parsed successfully. +2. **No events in time window:** If there genuinely were no parse failures, purgatory expiry events, or invalid announcement rejections, the files will be empty. This is valid - it means everything parsed successfully. 3. **Wrong time range:** The default is 30 days. If your archive has been running longer, you may need `--since` to extend the range. **Prevention:** The migration scripts now validate the service name and will error if you specify an ngit-relay service. +**Note on "Invalid announcement" rejections:** These are announcements (kind 30617) that were rejected by the write policy due to format violations. The most common reason is "multiple clone tags found" - the NIP-34 spec requires a single clone tag with multiple values, not multiple clone tags. These rejections are logged as `Event rejected by write policy ... reason=Invalid announcement: ...`. + ### Event counts are multiples of 250 This suggests pagination may have failed. The scripts use `--paginate` by default, but if you see exactly 250, 500, 750 events, verify the relay is responding correctly. @@ -529,7 +531,11 @@ The analysis is split into 5 modular phases: ↓ ┌─────────────────────────────────────────────────────────────────┐ │ PHASE 4: Log-Based Categories (VPS required) │ -│ Extracts [PARSE_FAIL] and [PURGATORY_EXPIRED] from logs │ +│ Extracts structured logs from the archive service: │ +│ - [PARSE_FAIL] - Events that failed to parse │ +│ - [PURGATORY_EXPIRED] - Repos where git data never arrived │ +│ - "Invalid announcement" - Announcements rejected for format │ +│ violations (e.g., multiple clone tags) │ │ Provides context for why repos failed to sync │ └─────────────────────────────────────────────────────────────────┘ ↓ diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh index d4f0ff2..114a44d 100755 --- a/docs/how-to/migration-scripts/30-extract-parse-failures.sh +++ b/docs/how-to/migration-scripts/30-extract-parse-failures.sh @@ -3,7 +3,8 @@ # 30-extract-parse-failures.sh - Extract parse failure events from systemd logs # # PHASE 4a of the GRASP relay to ngit-grasp migration analysis pipeline. -# Extracts structured [PARSE_FAIL] log entries from journalctl. +# Extracts structured [PARSE_FAIL] log entries AND "Invalid announcement" +# rejections from journalctl. # # USAGE: # ./30-extract-parse-failures.sh [options] @@ -27,24 +28,34 @@ # /parse-failures.txt # # OUTPUT FORMAT (TSV): -# reponpubkindevent_idreason +# event_idkindreasonreponpub # -# EXPECTED LOG FORMAT: -# The script looks for structured log entries in this format: +# EXPECTED LOG FORMATS: +# The script looks for two types of log entries: # -# 2026-01-22T10:30:45Z ngit-grasp[1234]: [PARSE_FAIL] kind=30618 event_id=abc123... reason="invalid refs format" repo=myrepo npub=npub1... +# 1. Structured [PARSE_FAIL] entries: +# 2026-01-22T10:30:45Z ngit-grasp[1234]: [PARSE_FAIL] kind=30618 event_id=abc123... reason="invalid refs format" repo=myrepo npub=npub1... +# +# 2. "Invalid announcement" rejections (write policy): +# Event rejected by write policy event_id=abc123... relay=wss://... kind=30617 reason=Invalid announcement: multiple clone tags found... +# +# 3. "Rejected repository announcement" (builder): +# Rejected repository announcement note1xxx: Invalid announcement: multiple clone tags found... # # Required fields: kind, event_id, reason -# Optional fields: repo, npub (may not be available if parsing failed early) +# Optional fields: repo, npub (may not be available for all entry types) # # DEPENDENCY: # This script requires logging improvements in ngit-grasp to emit structured # [PARSE_FAIL] log entries. Until those are implemented, this script will # find no matching entries (which is handled gracefully). # +# "Invalid announcement" rejections are logged by the write policy and +# should be present in any ngit-grasp deployment. +# # See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section) # -# Expected Rust logging code: +# Expected Rust logging code for [PARSE_FAIL]: # tracing::warn!( # target: "migration", # "[PARSE_FAIL] kind={} event_id={} reason=\"{}\" repo={} npub={}", @@ -53,7 +64,7 @@ # # PREREQUISITES: # - journalctl (systemd) -# - grep, awk (standard Unix tools) +# - grep, awk, sed (standard Unix tools) # - Access to systemd journal (may require sudo or journal group membership) # # RUNTIME: Depends on log volume, typically < 30 seconds @@ -121,15 +132,17 @@ usage() { echo " $0 ngit-grasp.service output/logs --since '2026-01-01'" echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'" echo "" - echo "Expected log format:" + echo "Expected log formats:" echo " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." + echo " Event rejected by write policy event_id=abc123 ... kind=30617 reason=Invalid announcement: ..." + echo " Rejected repository announcement note1xxx: Invalid announcement: ..." exit 1 } -# Parse a single log line and extract fields +# Parse a [PARSE_FAIL] log line and extract fields # Input: log line containing [PARSE_FAIL] -# Output: TSV line: reponpubkindevent_idreason -parse_log_line() { +# Output: TSV line: event_idkindreasonreponpub +parse_parse_fail_line() { local line="$1" # Extract fields using grep -oP (Perl regex) or awk @@ -154,7 +167,56 @@ parse_log_line() { # Only output if we have the required fields if [[ -n "$kind" && -n "$event_id" && -n "$reason" ]]; then - printf '%s\t%s\t%s\t%s\t%s\n' "$repo" "$npub" "$kind" "$event_id" "$reason" + printf '%s\t%s\t%s\t%s\t%s\n' "$event_id" "$kind" "$reason" "$repo" "$npub" + fi +} + +# Parse an "Invalid announcement" rejection log line from write policy +# Input: log line containing "Event rejected by write policy" with "Invalid announcement" +# Output: TSV line: event_idkindreasonreponpub +# Note: repo and npub are empty for these entries (not available in log format) +parse_write_policy_rejection_line() { + local line="$1" + + local kind event_id reason + + # Extract event_id=VALUE (hex string) + event_id=$(echo "$line" | grep -oP 'event_id=\K[a-f0-9]+' || echo "") + + # Extract kind=VALUE + kind=$(echo "$line" | grep -oP 'kind=\K[0-9]+' || echo "") + + # Extract reason=VALUE (everything after "reason=") + # The reason is unquoted and goes to end of line + reason=$(echo "$line" | grep -oP 'reason=\K.*$' || echo "") + + # Only output if we have the required fields + if [[ -n "$kind" && -n "$event_id" && -n "$reason" ]]; then + # repo and npub are empty for invalid announcement entries + printf '%s\t%s\t%s\t\t\n' "$event_id" "$kind" "$reason" + fi +} + +# Parse a "Rejected repository announcement" log line from builder +# Input: log line containing "Rejected repository announcement : Invalid announcement:" +# Output: TSV line: event_idkindreasonreponpub +# Note: The note_id is bech32 encoded, we need to extract it +parse_builder_rejection_line() { + local line="$1" + + local note_id reason + + # Extract note_id (note1...) from "Rejected repository announcement note1xxx:" + note_id=$(echo "$line" | grep -oP 'Rejected repository announcement \Knote1[a-z0-9]+' || echo "") + + # Extract reason (everything after the note_id and colon) + reason=$(echo "$line" | grep -oP 'Rejected repository announcement note1[a-z0-9]+: \K.*$' || echo "") + + # Only output if we have the required fields + # Kind is always 30617 for announcements + if [[ -n "$note_id" && -n "$reason" ]]; then + # Use note_id as event_id (bech32 format), kind=30617, repo and npub empty + printf '%s\t%s\t%s\t\t\n' "$note_id" "30617" "$reason" fi } @@ -260,21 +322,27 @@ main() { journal_cmd="$journal_cmd --until '$until_date'" fi - log_info "Running: $journal_cmd | grep '\\[PARSE_FAIL\\]'" + log_info "Running: $journal_cmd | grep '[PARSE_FAIL]' or 'Invalid announcement'" if [[ "$dry_run" == true ]]; then log_info "[DRY RUN] Would extract to: $output_dir/parse-failures.txt" # Show sample of what would be extracted log_info "Checking for matching log entries..." - local sample_count - sample_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0") - sample_count="${sample_count//[^0-9]/}" # Strip non-numeric characters - sample_count="${sample_count:-0}" - log_info "Found $sample_count matching log entries" + local parse_fail_count invalid_announcement_count + parse_fail_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0") + parse_fail_count="${parse_fail_count//[^0-9]/}" # Strip non-numeric characters + parse_fail_count="${parse_fail_count:-0}" + + invalid_announcement_count=$(eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep -c 'Invalid announcement' || echo "0") + invalid_announcement_count="${invalid_announcement_count//[^0-9]/}" + invalid_announcement_count="${invalid_announcement_count:-0}" - if [[ "$sample_count" -eq 0 ]]; then - log_warn "No [PARSE_FAIL] entries found in logs." + log_info "Found $parse_fail_count [PARSE_FAIL] entries" + log_info "Found $invalid_announcement_count 'Invalid announcement' rejections" + + if [[ "$parse_fail_count" -eq 0 && "$invalid_announcement_count" -eq 0 ]]; then + log_warn "No matching entries found in logs." log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" fi @@ -289,69 +357,159 @@ main() { local temp_file temp_file=$(mktemp) - # Extract and parse log entries + # Extract and parse log entries using streaming (avoids loading all logs into memory) log_info "Extracting log entries..." - # Get raw log lines containing [PARSE_FAIL] - # Capture stderr separately to detect journalctl errors - local raw_lines journal_stderr journal_exit - local temp_stderr + # Create temp files for intermediate results + local temp_stderr temp_parse_fail temp_write_policy_rejection temp_builder_rejection temp_stderr=$(mktemp) + temp_parse_fail=$(mktemp) + temp_write_policy_rejection=$(mktemp) + temp_builder_rejection=$(mktemp) - raw_lines=$(eval "$journal_cmd" 2>"$temp_stderr" | grep '\[PARSE_FAIL\]' || true) - journal_exit=$? - journal_stderr=$(cat "$temp_stderr" 2>/dev/null || true) - rm -f "$temp_stderr" + # Extract [PARSE_FAIL] entries directly to temp file (streaming) + log_info " Searching for [PARSE_FAIL] entries..." + eval "$journal_cmd" 2>"$temp_stderr" | grep '\[PARSE_FAIL\]' > "$temp_parse_fail" || true - # Report any journalctl errors (but don't fail - empty logs are valid) + local journal_stderr + journal_stderr=$(cat "$temp_stderr" 2>/dev/null || true) if [[ -n "$journal_stderr" ]]; then log_warn "journalctl reported: $journal_stderr" fi - if [[ -z "$raw_lines" ]]; then - log_warn "No [PARSE_FAIL] entries found in logs." + # Extract "Event rejected by write policy" with "Invalid announcement" (streaming) + log_info " Searching for write policy rejections..." + eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep 'Invalid announcement' > "$temp_write_policy_rejection" || true + + # Extract "Rejected repository announcement" from builder (streaming) + log_info " Searching for builder rejections..." + eval "$journal_cmd" 2>/dev/null | grep 'Rejected repository announcement' | grep 'Invalid announcement' > "$temp_builder_rejection" || true + + rm -f "$temp_stderr" + + # Check if we found anything + local parse_fail_line_count write_policy_line_count builder_line_count + parse_fail_line_count=$(wc -l < "$temp_parse_fail") + parse_fail_line_count="${parse_fail_line_count//[^0-9]/}" + write_policy_line_count=$(wc -l < "$temp_write_policy_rejection") + write_policy_line_count="${write_policy_line_count//[^0-9]/}" + builder_line_count=$(wc -l < "$temp_builder_rejection") + builder_line_count="${builder_line_count//[^0-9]/}" + + log_info " Found $parse_fail_line_count [PARSE_FAIL] log lines" + log_info " Found $write_policy_line_count write policy rejection log lines" + log_info " Found $builder_line_count builder rejection log lines" + + local total_invalid_announcement_lines=$((write_policy_line_count + builder_line_count)) + + if [[ "$parse_fail_line_count" -eq 0 && "$total_invalid_announcement_lines" -eq 0 ]]; then + log_warn "No matching entries found in logs." log_warn "" log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." - log_warn "The structured log format required by this script:" + log_warn "The script looks for:" log_warn "" - log_warn " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." + log_warn " 1. [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." + log_warn " 2. Event rejected by write policy event_id=... kind=30617 reason=Invalid announcement: ..." log_warn "" log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" log_warn "" # Create empty output file with header comment { - echo "# Parse failures extracted from $service" + echo "# Parse failures and invalid announcements extracted from $service" echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" echo "# Extracted: $(date -Iseconds)" - echo "# Format: reponpubkindevent_idreason" echo "#" - echo "# NOTE: No [PARSE_FAIL] entries found." + echo "# Includes:" + echo "# - [PARSE_FAIL] structured log entries" + echo "# - \"Invalid announcement\" rejections" + echo "#" + echo "# Format: event_idkindreasonreponpub" + echo "# Note: repo and npub may be empty for some entries" + echo "#" + echo "# NOTE: No matching entries found." echo "# This is expected if ngit-grasp logging improvements are not yet deployed." } > "$output_file" + rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_builder_rejection" log_info "Created empty output file: $output_file" exit 0 fi # Write header { - echo "# Parse failures extracted from $service" + echo "# Parse failures and invalid announcements extracted from $service" echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" echo "# Extracted: $(date -Iseconds)" - echo "# Format: reponpubkindevent_idreason" + echo "#" + echo "# Includes:" + echo "# - [PARSE_FAIL] structured log entries" + echo "# - \"Invalid announcement\" rejections" + echo "#" + echo "# Format: event_idkindreasonreponpub" + echo "# Note: repo and npub may be empty for some entries" } > "$output_file" - # Parse each line - local count=0 - while IFS= read -r line; do - local parsed - parsed=$(parse_log_line "$line") - if [[ -n "$parsed" ]]; then - echo "$parsed" >> "$output_file" - ((count++)) - fi - done <<< "$raw_lines" + # Parse [PARSE_FAIL] entries + log_info " Parsing [PARSE_FAIL] entries..." + local parse_fail_count=0 + if [[ "$parse_fail_line_count" -gt 0 ]]; then + while IFS= read -r line; do + local parsed + parsed=$(parse_parse_fail_line "$line") + if [[ -n "$parsed" ]]; then + echo "$parsed" >> "$output_file" + parse_fail_count=$((parse_fail_count + 1)) + fi + done < "$temp_parse_fail" + fi + + # Parse write policy rejection entries + log_info " Parsing write policy rejection entries..." + local write_policy_count=0 + if [[ "$write_policy_line_count" -gt 0 ]]; then + while IFS= read -r line; do + local parsed + parsed=$(parse_write_policy_rejection_line "$line") + if [[ -n "$parsed" ]]; then + echo "$parsed" >> "$output_file" + write_policy_count=$((write_policy_count + 1)) + fi + done < "$temp_write_policy_rejection" + fi + + # Parse builder rejection entries + log_info " Parsing builder rejection entries..." + local builder_count=0 + if [[ "$builder_line_count" -gt 0 ]]; then + while IFS= read -r line; do + local parsed + parsed=$(parse_builder_rejection_line "$line") + if [[ -n "$parsed" ]]; then + echo "$parsed" >> "$output_file" + builder_count=$((builder_count + 1)) + fi + done < "$temp_builder_rejection" + fi + + local invalid_announcement_count=$((write_policy_count + builder_count)) + + rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_builder_rejection" + + # Deduplicate by event_id (first column) - keep first occurrence + log_info " Deduplicating entries..." + local deduped_file + deduped_file=$(mktemp) + # Preserve header lines (starting with #) and deduplicate data lines + grep '^#' "$output_file" > "$deduped_file" + grep -v '^#' "$output_file" | sort -t$'\t' -k1,1 -u >> "$deduped_file" + mv "$deduped_file" "$output_file" + + # Count final entries (excluding header lines) + local count + count=$(grep -v '^#' "$output_file" | wc -l) + count="${count//[^0-9]/}" # Strip whitespace + count="${count:-0}" rm -f "$temp_file" @@ -360,26 +518,45 @@ main() { log_info "=== Extraction Summary ===" log_info "Service: $service" log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" - log_success "Extracted $count parse failure entries" + log_success "Extracted $count total entries" + log_info " - [PARSE_FAIL] entries: $parse_fail_count" + log_info " - Invalid announcement rejections: $invalid_announcement_count" echo "" log_info "Output file: $output_file" if [[ $count -gt 0 ]]; then echo "" log_info "Sample entries (first 5):" - tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub kind event_id reason; do - echo " kind=$kind repo=$repo reason=\"$reason\"" - done + # Use a subshell to avoid SIGPIPE issues with set -e + # New format: event_idkindreasonreponpub + (grep -v '^#' "$output_file" | head -5 | while IFS=$'\t' read -r event_id kind reason repo npub; do + echo " kind=$kind event_id=${event_id:0:16}... reason=\"${reason:0:60}...\"" + done) || true fi # Breakdown by kind if [[ $count -gt 0 ]]; then echo "" log_info "Breakdown by event kind:" - tail -n +5 "$output_file" | awk -F'\t' '{print $3}' | sort | uniq -c | sort -rn | while read -r cnt kind; do + # Use a subshell to avoid SIGPIPE issues with set -e + # kind is now column 2 + (grep -v '^#' "$output_file" | awk -F'\t' '{print $2}' | sort | uniq -c | sort -rn | while read -r cnt kind; do echo " kind $kind: $cnt failures" - done + done) || true + fi + + # Breakdown by reason pattern (for invalid announcements) + if [[ $invalid_announcement_count -gt 0 ]]; then + echo "" + log_info "Breakdown by reason pattern:" + # Extract the main reason type (before the colon details) + (grep -v '^#' "$output_file" | awk -F'\t' '{print $3}' | sed 's/:.*//' | sort | uniq -c | sort -rn | head -10 | while read -r cnt reason; do + echo " $reason: $cnt" + done) || true fi + + # Explicit success exit + exit 0 } main "$@" diff --git a/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh index a603a1e..a0c8ad0 100755 --- a/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh +++ b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh @@ -356,7 +356,7 @@ main() { parsed=$(parse_log_line "$line") if [[ -n "$parsed" ]]; then echo "$parsed" >> "$output_file" - ((count++)) + count=$((count + 1)) fi done <<< "$raw_lines" @@ -374,9 +374,10 @@ main() { if [[ $count -gt 0 ]]; then echo "" log_info "Sample entries (first 5):" - tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub timestamp reason; do + # Use a subshell to avoid SIGPIPE issues with set -e + (tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub timestamp reason; do echo " repo=$repo npub=${npub:0:20}... timestamp=$timestamp" - done + done) || true fi # Show unique repos affected @@ -388,9 +389,10 @@ main() { echo "" log_info "Repositories with purgatory expiry:" - tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort | uniq -c | sort -rn | head -10 | while read -r cnt repo; do + # Use a subshell to avoid SIGPIPE issues with set -e + (tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort | uniq -c | sort -rn | head -10 | while read -r cnt repo; do echo " $repo: $cnt expiry events" - done + done) || true local total_repos total_repos=$(tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort -u | wc -l) @@ -398,6 +400,9 @@ main() { echo " ... and $((total_repos - 10)) more repositories" fi fi + + # Explicit success exit + exit 0 } main "$@" -- cgit v1.2.3 From 26e3f24e491ac0b9a61eaa2831de250b68bd9d96 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 17:19:48 +0000 Subject: Fix double-counting bug in parse failures extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The script was counting the same invalid announcement twice because: - Write policy logs use hex event IDs - Builder logs use note1 (bech32) event IDs - Deduplication only worked within each format Fix: Only extract from write policy logs (hex IDs) to avoid the format mismatch. Builder logs contain the same events, so we don't lose any data. Result: 446 entries → 223 unique invalid announcements (correct count) --- .../migration-scripts/30-extract-parse-failures.sh | 70 ++++++---------------- 1 file changed, 17 insertions(+), 53 deletions(-) diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh index 114a44d..7870c61 100755 --- a/docs/how-to/migration-scripts/30-extract-parse-failures.sh +++ b/docs/how-to/migration-scripts/30-extract-parse-failures.sh @@ -39,8 +39,10 @@ # 2. "Invalid announcement" rejections (write policy): # Event rejected by write policy event_id=abc123... relay=wss://... kind=30617 reason=Invalid announcement: multiple clone tags found... # -# 3. "Rejected repository announcement" (builder): -# Rejected repository announcement note1xxx: Invalid announcement: multiple clone tags found... +# NOTE: Builder logs ("Rejected repository announcement note1xxx:") are NOT extracted +# because they use bech32 (note1) IDs while write policy logs use hex IDs. Extracting +# both would cause double-counting since deduplication only works within each format. +# Write policy logs contain the same events, so we don't lose any data. # # Required fields: kind, event_id, reason # Optional fields: repo, npub (may not be available for all entry types) @@ -135,7 +137,6 @@ usage() { echo "Expected log formats:" echo " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." echo " Event rejected by write policy event_id=abc123 ... kind=30617 reason=Invalid announcement: ..." - echo " Rejected repository announcement note1xxx: Invalid announcement: ..." exit 1 } @@ -197,28 +198,11 @@ parse_write_policy_rejection_line() { fi } -# Parse a "Rejected repository announcement" log line from builder -# Input: log line containing "Rejected repository announcement : Invalid announcement:" -# Output: TSV line: event_idkindreasonreponpub -# Note: The note_id is bech32 encoded, we need to extract it -parse_builder_rejection_line() { - local line="$1" - - local note_id reason - - # Extract note_id (note1...) from "Rejected repository announcement note1xxx:" - note_id=$(echo "$line" | grep -oP 'Rejected repository announcement \Knote1[a-z0-9]+' || echo "") - - # Extract reason (everything after the note_id and colon) - reason=$(echo "$line" | grep -oP 'Rejected repository announcement note1[a-z0-9]+: \K.*$' || echo "") - - # Only output if we have the required fields - # Kind is always 30617 for announcements - if [[ -n "$note_id" && -n "$reason" ]]; then - # Use note_id as event_id (bech32 format), kind=30617, repo and npub empty - printf '%s\t%s\t%s\t\t\n' "$note_id" "30617" "$reason" - fi -} +# NOTE: parse_builder_rejection_line() was removed to fix double-counting bug. +# Builder logs use bech32 (note1) IDs while write policy logs use hex IDs. +# Since deduplication only works within each format, extracting both caused +# the same event to be counted twice. Write policy logs contain the same +# events, so we don't lose any data by only extracting from that source. # Main main() { @@ -361,11 +345,10 @@ main() { log_info "Extracting log entries..." # Create temp files for intermediate results - local temp_stderr temp_parse_fail temp_write_policy_rejection temp_builder_rejection + local temp_stderr temp_parse_fail temp_write_policy_rejection temp_stderr=$(mktemp) temp_parse_fail=$(mktemp) temp_write_policy_rejection=$(mktemp) - temp_builder_rejection=$(mktemp) # Extract [PARSE_FAIL] entries directly to temp file (streaming) log_info " Searching for [PARSE_FAIL] entries..." @@ -378,29 +361,24 @@ main() { fi # Extract "Event rejected by write policy" with "Invalid announcement" (streaming) + # NOTE: We only extract from write policy logs (hex IDs), not builder logs (note1 IDs) + # to avoid double-counting. Both log sources contain the same events. log_info " Searching for write policy rejections..." eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep 'Invalid announcement' > "$temp_write_policy_rejection" || true - # Extract "Rejected repository announcement" from builder (streaming) - log_info " Searching for builder rejections..." - eval "$journal_cmd" 2>/dev/null | grep 'Rejected repository announcement' | grep 'Invalid announcement' > "$temp_builder_rejection" || true - rm -f "$temp_stderr" # Check if we found anything - local parse_fail_line_count write_policy_line_count builder_line_count + local parse_fail_line_count write_policy_line_count parse_fail_line_count=$(wc -l < "$temp_parse_fail") parse_fail_line_count="${parse_fail_line_count//[^0-9]/}" write_policy_line_count=$(wc -l < "$temp_write_policy_rejection") write_policy_line_count="${write_policy_line_count//[^0-9]/}" - builder_line_count=$(wc -l < "$temp_builder_rejection") - builder_line_count="${builder_line_count//[^0-9]/}" log_info " Found $parse_fail_line_count [PARSE_FAIL] log lines" log_info " Found $write_policy_line_count write policy rejection log lines" - log_info " Found $builder_line_count builder rejection log lines" - local total_invalid_announcement_lines=$((write_policy_line_count + builder_line_count)) + local total_invalid_announcement_lines=$write_policy_line_count if [[ "$parse_fail_line_count" -eq 0 && "$total_invalid_announcement_lines" -eq 0 ]]; then log_warn "No matching entries found in logs." @@ -431,7 +409,7 @@ main() { echo "# This is expected if ngit-grasp logging improvements are not yet deployed." } > "$output_file" - rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_builder_rejection" + rm -f "$temp_parse_fail" "$temp_write_policy_rejection" log_info "Created empty output file: $output_file" exit 0 fi @@ -478,23 +456,9 @@ main() { done < "$temp_write_policy_rejection" fi - # Parse builder rejection entries - log_info " Parsing builder rejection entries..." - local builder_count=0 - if [[ "$builder_line_count" -gt 0 ]]; then - while IFS= read -r line; do - local parsed - parsed=$(parse_builder_rejection_line "$line") - if [[ -n "$parsed" ]]; then - echo "$parsed" >> "$output_file" - builder_count=$((builder_count + 1)) - fi - done < "$temp_builder_rejection" - fi - - local invalid_announcement_count=$((write_policy_count + builder_count)) + local invalid_announcement_count=$write_policy_count - rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_builder_rejection" + rm -f "$temp_parse_fail" "$temp_write_policy_rejection" # Deduplicate by event_id (first column) - keep first occurrence log_info " Deduplicating entries..." -- cgit v1.2.3 From 0e00db4decfa779c26c6c7648b2badcc5704e6f8 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 17:38:32 +0000 Subject: Add --analysis-root filter to parse failures script Filter parse failures to only those for announcements that are in production but missing from the archive. This eliminates noise from rejections of events from other relays that don't affect migration. Before: 223 parse failures (all rejections from all relays) After: 18 parse failures (only for missing announcements) The filter works by: 1. Reading missing announcements from comparison data 2. Extracting event IDs from production announcements JSON 3. Filtering parse failures to only matching event IDs --- .../migration-scripts/30-extract-parse-failures.sh | 178 ++++++++++++++++++++- 1 file changed, 175 insertions(+), 3 deletions(-) diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh index 7870c61..f821834 100755 --- a/docs/how-to/migration-scripts/30-extract-parse-failures.sh +++ b/docs/how-to/migration-scripts/30-extract-parse-failures.sh @@ -125,18 +125,25 @@ usage() { echo " output-dir Directory to store extracted log data" echo "" echo "Options:" - echo " --since Start date (default: 30 days ago)" - echo " --until End date (default: now)" - echo " --dry-run Show what would be extracted without writing" + echo " --since Start date (default: 30 days ago)" + echo " --until End date (default: now)" + echo " --analysis-root Filter to only missing announcements from analysis" + echo " --dry-run Show what would be extracted without writing" echo "" echo "Examples:" echo " $0 ngit-grasp.service output/logs" echo " $0 ngit-grasp.service output/logs --since '2026-01-01'" echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'" + echo " $0 ngit-grasp.service output/logs --analysis-root /tmp/migration-analysis-20260123" echo "" echo "Expected log formats:" echo " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." echo " Event rejected by write policy event_id=abc123 ... kind=30617 reason=Invalid announcement: ..." + echo "" + echo "Filtering with --analysis-root:" + echo " When provided, only parse failures for announcements that are in production" + echo " but missing from the archive will be included. This filters out rejections" + echo " for events from other relays that don't affect the migration." exit 1 } @@ -204,6 +211,155 @@ parse_write_policy_rejection_line() { # the same event to be counted twice. Write policy logs contain the same # events, so we don't lose any data by only extracting from that source. +# Filter parse failures to only those for missing announcements +# This is used when --analysis-root is provided to scope results to the migration +# +# Arguments: +# $1 - parse failures file to filter (modified in place) +# $2 - analysis root directory containing comparison/ and prod/ subdirs +# +# The function: +# 1. Reads missing announcements from comparison/complete-prod-missing-archive.txt +# 2. Extracts pubkey/identifier pairs for those announcements +# 3. Reads production announcements from prod/raw/announcements.json +# 4. Gets event IDs for announcements matching the missing pubkey/identifier pairs +# 5. Filters parse failures to only those event IDs +filter_to_missing_announcements() { + local parse_failures_file="$1" + local analysis_root="$2" + + local missing_file="$analysis_root/comparison/complete-prod-missing-archive.txt" + local prod_announcements="$analysis_root/prod/raw/announcements.json" + + # Validate required files exist + if [[ ! -f "$missing_file" ]]; then + log_warn "Missing announcements file not found: $missing_file" + log_warn "Skipping filter - all parse failures will be included" + return 0 + fi + + if [[ ! -f "$prod_announcements" ]]; then + log_warn "Production announcements file not found: $prod_announcements" + log_warn "Skipping filter - all parse failures will be included" + return 0 + fi + + # Check if jq is available + if ! command -v jq &> /dev/null; then + log_warn "jq not found - cannot filter parse failures" + log_warn "Install jq or run without --analysis-root" + return 0 + fi + + log_info "Filtering parse failures to missing announcements only..." + + # Step 1: Extract pubkey/identifier pairs from missing announcements + # Format: identifier | npub | prod=complete | archive=missing + local missing_pairs_file + missing_pairs_file=$(mktemp) + + # Extract identifier and npub, convert npub to hex pubkey for matching + while IFS=' | ' read -r identifier npub rest; do + # Skip empty lines + [[ -z "$identifier" ]] && continue + # Trim whitespace + identifier=$(echo "$identifier" | xargs) + npub=$(echo "$npub" | xargs) + echo "${identifier}|${npub}" + done < "$missing_file" > "$missing_pairs_file" + + local missing_count + missing_count=$(wc -l < "$missing_pairs_file") + missing_count="${missing_count//[^0-9]/}" + log_info " Found $missing_count missing announcements to filter for" + + # Step 2: Get event IDs from production announcements for these pairs + # We need to match on 'd' tag (identifier) and pubkey + local missing_event_ids_file + missing_event_ids_file=$(mktemp) + + # Create a lookup of identifier|npub -> event_id from production announcements + # The JSON has: id, pubkey (hex), tags (array with ["d", identifier]) + log_info " Extracting event IDs from production announcements..." + + # Use jq to extract id, pubkey, and d-tag value, then filter + # Output format: event_id|identifier|pubkey_hex + # Note: The JSON file is NDJSON (newline-delimited), not an array + jq -r 'select(.kind == 30617) | + .id as $id | + .pubkey as $pubkey | + (.tags[] | select(.[0] == "d") | .[1]) as $dtag | + "\($id)|\($dtag)|\($pubkey)"' "$prod_announcements" > "$missing_event_ids_file.all" 2>/dev/null || { + log_warn "Failed to parse production announcements JSON" + rm -f "$missing_pairs_file" "$missing_event_ids_file" "$missing_event_ids_file.all" + return 0 + } + + # Now filter to only event IDs for missing announcements + # We need to convert npub to hex pubkey for comparison + # npub is bech32, pubkey in JSON is hex + # For simplicity, we'll match on identifier only (d-tag) since it should be unique per pubkey + # Actually, we need both because same identifier can exist for different pubkeys + + # Create a set of "identifier|pubkey_hex" to match against + # First, we need to convert npub to hex - but that requires a tool + # Alternative: match on identifier only and accept some false positives + # Better: use the comparison file which has npub, and match against announcements + + # Let's match on identifier only for now (simpler, may have minor false positives) + # Extract just the identifiers from missing announcements + local missing_identifiers_file + missing_identifiers_file=$(mktemp) + cut -d'|' -f1 "$missing_pairs_file" | sort -u > "$missing_identifiers_file" + + # Filter event IDs to only those with matching identifiers + while IFS='|' read -r event_id identifier pubkey_hex; do + if grep -qFx "$identifier" "$missing_identifiers_file"; then + echo "$event_id" + fi + done < "$missing_event_ids_file.all" | sort -u > "$missing_event_ids_file" + + local event_id_count + event_id_count=$(wc -l < "$missing_event_ids_file") + event_id_count="${event_id_count//[^0-9]/}" + log_info " Found $event_id_count event IDs for missing announcements" + + # Step 3: Filter parse failures to only those event IDs + local filtered_file + filtered_file=$(mktemp) + + # Copy header lines + grep '^#' "$parse_failures_file" > "$filtered_file" + + # Add a note about filtering + echo "# Filtered to missing announcements only (--analysis-root)" >> "$filtered_file" + echo "# Analysis root: $analysis_root" >> "$filtered_file" + echo "# Missing announcements: $missing_count" >> "$filtered_file" + echo "# Matching event IDs: $event_id_count" >> "$filtered_file" + + # Filter data lines - only include if event_id is in our list + local filtered_count=0 + while IFS=$'\t' read -r event_id kind reason repo npub; do + # Skip header lines (already copied) + [[ "$event_id" =~ ^# ]] && continue + + # Check if this event_id is in our missing list + if grep -qFx "$event_id" "$missing_event_ids_file"; then + printf '%s\t%s\t%s\t%s\t%s\n' "$event_id" "$kind" "$reason" "$repo" "$npub" >> "$filtered_file" + filtered_count=$((filtered_count + 1)) + fi + done < "$parse_failures_file" + + # Replace original with filtered version + mv "$filtered_file" "$parse_failures_file" + + # Cleanup temp files + rm -f "$missing_pairs_file" "$missing_event_ids_file" "$missing_event_ids_file.all" "$missing_identifiers_file" + + log_info " Filtered from $(grep -v '^#' "$parse_failures_file" | wc -l | xargs) to $filtered_count parse failures" + log_success "Filtered to parse failures for missing announcements only" +} + # Main main() { if [[ $# -lt 2 ]]; then @@ -219,6 +375,7 @@ main() { since_date=$(date -d "30 days ago" "+%Y-%m-%d" 2>/dev/null || date -v-30d "+%Y-%m-%d" 2>/dev/null || echo "") local until_date="" local dry_run=false + local analysis_root="" # Parse options while [[ $# -gt 0 ]]; do @@ -231,6 +388,10 @@ main() { until_date="$2" shift 2 ;; + --analysis-root) + analysis_root="$2" + shift 2 + ;; --dry-run) dry_run=true shift @@ -469,6 +630,11 @@ main() { grep -v '^#' "$output_file" | sort -t$'\t' -k1,1 -u >> "$deduped_file" mv "$deduped_file" "$output_file" + # Filter to missing announcements only if analysis root provided + if [[ -n "$analysis_root" ]]; then + filter_to_missing_announcements "$output_file" "$analysis_root" + fi + # Count final entries (excluding header lines) local count count=$(grep -v '^#' "$output_file" | wc -l) @@ -482,9 +648,15 @@ main() { log_info "=== Extraction Summary ===" log_info "Service: $service" log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" + if [[ -n "$analysis_root" ]]; then + log_info "Filtered to: missing announcements only" + fi log_success "Extracted $count total entries" log_info " - [PARSE_FAIL] entries: $parse_fail_count" log_info " - Invalid announcement rejections: $invalid_announcement_count" + if [[ -n "$analysis_root" ]]; then + log_info " (filtered from original extraction)" + fi echo "" log_info "Output file: $output_file" -- cgit v1.2.3 From ef50f4bd9775e86011d8d636df13422c0dcec287 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Mon, 26 Jan 2026 07:19:19 +0000 Subject: Fix parse failure output to show repo|npub instead of event_id|kind Phase 4 (30-extract-parse-failures.sh) now enriches parse failures with repo name and npub by looking up event_id in announcements.json. This is critical because 'Invalid announcement' rejections only log event_id and kind, not the repo name or npub. Phase 5 (40-classify-actions.sh) was also fixed to extract columns 4 and 5 (repo|npub) instead of columns 1 and 2 (event_id|kind) from parse-failures.txt. Without this fix, action-required.txt showed unusable output like: 000014b2... | 30617 | parse failure logged | fix event format... Now it correctly shows: scripts | npub1hs5244... | parse failure logged | fix event format... The enrichment uses jq to build a lookup table from announcements.json and optionally uses 'nak' to convert hex pubkeys to npub format. --- .../migration-scripts/30-extract-parse-failures.sh | 141 +++++++++++++++++++++ .../migration-scripts/40-classify-actions.sh | 6 +- 2 files changed, 145 insertions(+), 2 deletions(-) diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh index f821834..f86e9f8 100755 --- a/docs/how-to/migration-scripts/30-extract-parse-failures.sh +++ b/docs/how-to/migration-scripts/30-extract-parse-failures.sh @@ -211,6 +211,140 @@ parse_write_policy_rejection_line() { # the same event to be counted twice. Write policy logs contain the same # events, so we don't lose any data by only extracting from that source. +# Enrich parse failures with repo/npub by looking up event_id in announcements.json +# This is critical because "Invalid announcement" rejections only log event_id and kind, +# not the repo name or npub. Without enrichment, Phase 5 shows event_id|kind instead +# of repo|npub in action-required.txt, making the output unusable. +# +# Arguments: +# $1 - parse failures file to enrich (modified in place) +# $2 - analysis root directory containing prod/raw/announcements.json +# +# The function: +# 1. Builds a lookup table from announcements.json: event_id -> repo|npub +# 2. For each parse failure with empty repo/npub, looks up the event_id +# 3. Populates repo and npub columns from the lookup +enrich_with_repo_npub() { + local parse_failures_file="$1" + local analysis_root="$2" + + local prod_announcements="$analysis_root/prod/raw/announcements.json" + + # Validate required file exists + if [[ ! -f "$prod_announcements" ]]; then + log_warn "Production announcements file not found: $prod_announcements" + log_warn "Skipping enrichment - repo/npub columns will remain empty" + return 0 + fi + + # Check if jq is available + if ! command -v jq &> /dev/null; then + log_warn "jq not found - cannot enrich parse failures with repo/npub" + log_warn "Install jq or run without --analysis-root" + return 0 + fi + + log_info "Enriching parse failures with repo/npub from announcements..." + + # Step 1: Build lookup table from announcements.json + # Output format: event_idreponpub + local lookup_file + lookup_file=$(mktemp) + + # Extract id, d-tag (repo identifier), and pubkey from announcements + # Convert pubkey to npub using bech32 encoding + # Note: We use a simple hex-to-npub conversion via external tool if available, + # otherwise we'll use the hex pubkey (Phase 5 can still match on it) + log_info " Building event_id -> repo/npub lookup table..." + + # First, extract the raw data: id, d-tag, pubkey (hex) + jq -r 'select(.kind == 30617) | + .id as $id | + .pubkey as $pubkey | + ((.tags[] | select(.[0] == "d") | .[1]) // "") as $dtag | + "\($id)\t\($dtag)\t\($pubkey)"' "$prod_announcements" > "$lookup_file.raw" 2>/dev/null || { + log_warn "Failed to parse production announcements JSON" + rm -f "$lookup_file" "$lookup_file.raw" + return 0 + } + + # Convert hex pubkeys to npub format + # Check if we have a tool to do bech32 encoding (nak, nostr-tool, etc.) + local can_convert_npub=false + if command -v nak &> /dev/null; then + can_convert_npub=true + log_info " Using 'nak' for pubkey->npub conversion" + fi + + # Process the lookup file, converting pubkeys to npubs if possible + while IFS=$'\t' read -r event_id repo pubkey_hex; do + local npub + if [[ "$can_convert_npub" == true && -n "$pubkey_hex" ]]; then + # Use nak to encode pubkey as npub + npub=$(nak encode npub "$pubkey_hex" 2>/dev/null || echo "") + fi + # Fall back to hex pubkey if conversion failed + [[ -z "$npub" ]] && npub="$pubkey_hex" + printf '%s\t%s\t%s\n' "$event_id" "$repo" "$npub" + done < "$lookup_file.raw" > "$lookup_file" + + rm -f "$lookup_file.raw" + + local lookup_count + lookup_count=$(wc -l < "$lookup_file") + lookup_count="${lookup_count//[^0-9]/}" + log_info " Built lookup table with $lookup_count announcements" + + # Step 2: Enrich parse failures + local enriched_file + enriched_file=$(mktemp) + + # Copy header lines + grep '^#' "$parse_failures_file" > "$enriched_file" + + # Process data lines + local enriched_count=0 + local total_count=0 + while IFS=$'\t' read -r event_id kind reason repo npub; do + # Skip header lines (already copied) + [[ "$event_id" =~ ^# ]] && continue + + total_count=$((total_count + 1)) + + # If repo and npub are already populated, keep them + if [[ -n "$repo" && -n "$npub" ]]; then + printf '%s\t%s\t%s\t%s\t%s\n' "$event_id" "$kind" "$reason" "$repo" "$npub" >> "$enriched_file" + continue + fi + + # Look up event_id in our table + local lookup_result + lookup_result=$(grep "^${event_id}"$'\t' "$lookup_file" 2>/dev/null | head -1 || echo "") + + if [[ -n "$lookup_result" ]]; then + local looked_up_repo looked_up_npub + looked_up_repo=$(echo "$lookup_result" | cut -f2) + looked_up_npub=$(echo "$lookup_result" | cut -f3) + + # Use looked-up values if original was empty + [[ -z "$repo" ]] && repo="$looked_up_repo" + [[ -z "$npub" ]] && npub="$looked_up_npub" + enriched_count=$((enriched_count + 1)) + fi + + printf '%s\t%s\t%s\t%s\t%s\n' "$event_id" "$kind" "$reason" "$repo" "$npub" >> "$enriched_file" + done < "$parse_failures_file" + + # Replace original with enriched version + mv "$enriched_file" "$parse_failures_file" + + # Cleanup + rm -f "$lookup_file" + + log_info " Enriched $enriched_count of $total_count parse failures with repo/npub" + log_success "Enrichment complete" +} + # Filter parse failures to only those for missing announcements # This is used when --analysis-root is provided to scope results to the migration # @@ -630,6 +764,13 @@ main() { grep -v '^#' "$output_file" | sort -t$'\t' -k1,1 -u >> "$deduped_file" mv "$deduped_file" "$output_file" + # Enrich with repo/npub from announcements.json if analysis root provided + # This is critical for usability - without it, action-required.txt shows + # event_id|kind instead of repo|npub, making parse failures unidentifiable + if [[ -n "$analysis_root" ]]; then + enrich_with_repo_npub "$output_file" "$analysis_root" + fi + # Filter to missing announcements only if analysis root provided if [[ -n "$analysis_root" ]]; then filter_to_missing_announcements "$output_file" "$analysis_root" diff --git a/docs/how-to/migration-scripts/40-classify-actions.sh b/docs/how-to/migration-scripts/40-classify-actions.sh index 1706e47..53c0f9d 100755 --- a/docs/how-to/migration-scripts/40-classify-actions.sh +++ b/docs/how-to/migration-scripts/40-classify-actions.sh @@ -329,9 +329,11 @@ main() { # ========================================================================= log_info "Parsing log-based categories..." - # Parse failures: reponpubkindevent_idreason + # Parse failures: event_idkindreasonreponpub + # Note: repo and npub are in columns 4 and 5 (enriched by Phase 4 from announcements.json) + # Some entries may have empty repo/npub if the event_id wasn't found in announcements if [[ -f "$logs_dir/parse-failures.txt" ]] && file_has_content "$logs_dir/parse-failures.txt"; then - grep -v '^#' "$logs_dir/parse-failures.txt" | awk -F'\t' '{print $1 "|" $2}' | sort -u > "$tmp_dir/parse_failures.txt" + grep -v '^#' "$logs_dir/parse-failures.txt" | awk -F'\t' '{print $4 "|" $5}' | sort -u > "$tmp_dir/parse_failures.txt" log_info "Found $(wc -l < "$tmp_dir/parse_failures.txt" | tr -d ' ') parse failure entries" else touch "$tmp_dir/parse_failures.txt" -- cgit v1.2.3 From e25b9591d1f2cb4f54649d728104993e11e53d24 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Mon, 26 Jan 2026 08:53:46 +0000 Subject: Add classification script with Option B redesign Implements the redesigned migration analysis classification system: Tier 1 - Ready for Migration (no action required): - Complete in both prod and archive - Deleted by user (kind 5 event) - Empty in prod (cat2) - always no action, regardless of archive - Archive-only (not in prod) - Purgatory-only (not in prod) Tier 2 - Needs Re-sync (action required): - Complete in prod, missing/incomplete in archive - Includes purgatory context (expired vs never-tried) Tier 3 - Manual Review (investigation needed): - Partial in prod (cat3) - No-match in prod (cat4) - Parse failures with complete prod Key fixes: - Use safe arithmetic ($((x + 1))) instead of ((x++)) with set -e - Batch nak hex-to-npub conversions for deletion processing - Handle NDJSON format for deletion files Output: 352 ready, 295 resync, 46 review (693 total) --- scripts/40-classify-actions.sh | 588 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 588 insertions(+) create mode 100755 scripts/40-classify-actions.sh diff --git a/scripts/40-classify-actions.sh b/scripts/40-classify-actions.sh new file mode 100755 index 0000000..021a2da --- /dev/null +++ b/scripts/40-classify-actions.sh @@ -0,0 +1,588 @@ +#!/usr/bin/env bash +# +# 40-classify-actions.sh - Classify repos by migration action required +# +# Implements the redesigned classification system (Option B) with user feedback: +# +# Tier 1: No Action Required (ready-for-migration.txt) +# - Complete in both (prod=cat1, archive=cat1) +# - Deleted by user (kind 5 event) +# - Empty in prod (prod=cat2, any archive status) +# - Archive-only (archive=any, prod=missing) +# - Not in prod (purgatory-only, prod=missing) +# +# Tier 2: Action Required (needs-resync.txt) +# - Complete in prod, missing from archive (with purgatory context) +# - Complete in prod, incomplete in archive (with purgatory context) +# +# Tier 3: Manual Investigation (manual-review.txt) +# - Partial in prod (prod=cat3) +# - No-match in prod (prod=cat4) +# - Parse failures +# - Conflicting states +# +# Usage: ./40-classify-actions.sh +# +# Output format: repo | npub | prod_status | archive_status | context | action +# + +set -euo pipefail + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +log_info() { echo -e "${BLUE}[INFO]${NC} $*"; } +log_success() { echo -e "${GREEN}[OK]${NC} $*"; } +log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; } + +# Check arguments +if [[ $# -lt 1 ]]; then + echo "Usage: $0 " + echo "Example: $0 work/migration-analysis-20260123-200701" + exit 1 +fi + +ANALYSIS_DIR="$1" + +# Validate analysis directory +if [[ ! -d "$ANALYSIS_DIR" ]]; then + log_error "Analysis directory not found: $ANALYSIS_DIR" + exit 1 +fi + +# Define paths +PROD_DIR="$ANALYSIS_DIR/prod" +ARCHIVE_DIR="$ANALYSIS_DIR/archive" +COMPARISON_DIR="$ANALYSIS_DIR/comparison" +LOGS_DIR="$ANALYSIS_DIR/logs" +RESULTS_DIR="$ANALYSIS_DIR/results" + +# Validate required directories +for dir in "$PROD_DIR" "$ARCHIVE_DIR" "$COMPARISON_DIR" "$LOGS_DIR"; do + if [[ ! -d "$dir" ]]; then + log_error "Required directory not found: $dir" + exit 1 + fi +done + +# Create results directory +mkdir -p "$RESULTS_DIR" + +# Output files +READY_FILE="$RESULTS_DIR/ready-for-migration.txt" +RESYNC_FILE="$RESULTS_DIR/needs-resync.txt" +REVIEW_FILE="$RESULTS_DIR/manual-review.txt" +SUMMARY_FILE="$RESULTS_DIR/summary.txt" + +# Temporary files for processing +TMP_DIR=$(mktemp -d) +trap 'rm -rf "$TMP_DIR"' EXIT + +log_info "Starting classification with revised system (Option B)" +log_info "Analysis directory: $ANALYSIS_DIR" + +# ============================================================================ +# Phase 1: Build lookup tables from source data +# ============================================================================ + +log_info "Building lookup tables..." + +# Build prod category lookup: repo|npub -> category +declare -A PROD_CAT +while IFS='|' read -r repo npub rest; do + repo=$(echo "$repo" | xargs) + npub=$(echo "$npub" | xargs) + [[ -z "$repo" || -z "$npub" ]] && continue + PROD_CAT["$repo|$npub"]="cat1" +done < "$PROD_DIR/category1-complete-match.txt" + +while IFS='|' read -r repo npub rest; do + repo=$(echo "$repo" | xargs) + npub=$(echo "$npub" | xargs) + [[ -z "$repo" || -z "$npub" ]] && continue + PROD_CAT["$repo|$npub"]="cat2" +done < "$PROD_DIR/category2-empty-blank.txt" + +while IFS='|' read -r repo npub rest; do + repo=$(echo "$repo" | xargs) + npub=$(echo "$npub" | xargs) + [[ -z "$repo" || -z "$npub" ]] && continue + PROD_CAT["$repo|$npub"]="cat3" +done < "$PROD_DIR/category3-partial-match.txt" + +while IFS='|' read -r repo npub rest; do + repo=$(echo "$repo" | xargs) + npub=$(echo "$npub" | xargs) + [[ -z "$repo" || -z "$npub" ]] && continue + PROD_CAT["$repo|$npub"]="cat4" +done < "$PROD_DIR/category4-no-match.txt" + +log_info "Loaded ${#PROD_CAT[@]} prod entries" + +# Build archive category lookup: repo|npub -> category +declare -A ARCHIVE_CAT +while IFS='|' read -r repo npub rest; do + repo=$(echo "$repo" | xargs) + npub=$(echo "$npub" | xargs) + [[ -z "$repo" || -z "$npub" ]] && continue + ARCHIVE_CAT["$repo|$npub"]="cat1" +done < "$ARCHIVE_DIR/category1-complete-match.txt" + +while IFS='|' read -r repo npub rest; do + repo=$(echo "$repo" | xargs) + npub=$(echo "$npub" | xargs) + [[ -z "$repo" || -z "$npub" ]] && continue + ARCHIVE_CAT["$repo|$npub"]="cat2" +done < "$ARCHIVE_DIR/category2-empty-blank.txt" + +while IFS='|' read -r repo npub rest; do + repo=$(echo "$repo" | xargs) + npub=$(echo "$npub" | xargs) + [[ -z "$repo" || -z "$npub" ]] && continue + ARCHIVE_CAT["$repo|$npub"]="cat3" +done < "$ARCHIVE_DIR/category3-partial-match.txt" + +while IFS='|' read -r repo npub rest; do + repo=$(echo "$repo" | xargs) + npub=$(echo "$npub" | xargs) + [[ -z "$repo" || -z "$npub" ]] && continue + ARCHIVE_CAT["$repo|$npub"]="cat4" +done < "$ARCHIVE_DIR/category4-no-match.txt" + +log_info "Loaded ${#ARCHIVE_CAT[@]} archive entries" + +# Build purgatory lookup: repo|npub -> 1 (if purgatory expired) +declare -A PURGATORY +if [[ -f "$LOGS_DIR/purgatory-expired.txt" ]]; then + while IFS=$'\t' read -r repo npub timestamp reason; do + # Skip comments and empty lines + [[ "$repo" =~ ^# ]] && continue + [[ -z "$repo" || -z "$npub" ]] && continue + PURGATORY["$repo|$npub"]=1 + done < "$LOGS_DIR/purgatory-expired.txt" +fi +log_info "Loaded ${#PURGATORY[@]} purgatory entries" + +# Build parse failure lookup: repo|npub -> 1 (if parse failure logged) +# Parse failures file format: event_idkindreasonreponpub +declare -A PARSE_FAIL +if [[ -f "$LOGS_DIR/parse-failures.txt" ]]; then + while IFS=$'\t' read -r event_id kind reason repo npub; do + # Skip comments and empty lines + [[ "$event_id" =~ ^# ]] && continue + [[ -z "$repo" || -z "$npub" ]] && continue + PARSE_FAIL["$repo|$npub"]=1 + done < "$LOGS_DIR/parse-failures.txt" +fi +log_info "Loaded ${#PARSE_FAIL[@]} parse failure entries" + +# Build deletion lookup: repo|npub -> 1 (if kind 5 deletion event) +# Deletions are in NDJSON format with "a" tags like "30617:pubkey_hex:repo" +# We need to convert hex pubkeys to npub format using nak +declare -A DELETED + +# Helper function to process deletion file (NDJSON format) +# Extracts unique pubkey_hex:repo pairs and converts to npub +process_deletions() { + local file="$1" + [[ ! -f "$file" ]] && return + + # Extract unique pubkey_hex|repo pairs from NDJSON + # Each line is a JSON object, extract "a" tags + local pairs + pairs=$(jq -r '.tags[] | select(.[0] == "a") | .[1]' "$file" 2>/dev/null | \ + sed 's/^30617://' | awk -F: '{print $1 "|" $2}' | sort -u) + + # Get unique hex pubkeys for batch conversion + local hex_keys + hex_keys=$(echo "$pairs" | cut -d'|' -f1 | sort -u) + + # Build hex->npub lookup via batch nak call + declare -A HEX_TO_NPUB + while read -r hex; do + [[ -z "$hex" ]] && continue + local npub + npub=$(nak encode npub "$hex" 2>/dev/null || echo "") + [[ -n "$npub" ]] && HEX_TO_NPUB["$hex"]="$npub" + done <<< "$hex_keys" + + # Now process pairs with cached npub values + while IFS='|' read -r pubkey_hex repo; do + [[ -z "$repo" || -z "$pubkey_hex" ]] && continue + local npub="${HEX_TO_NPUB[$pubkey_hex]:-}" + [[ -z "$npub" ]] && continue + DELETED["$repo|$npub"]=1 + done <<< "$pairs" +} + +# Process prod and archive deletions +process_deletions "$PROD_DIR/raw/deletions.json" +process_deletions "$ARCHIVE_DIR/raw/deletions.json" +log_info "Loaded ${#DELETED[@]} deletion entries" + +# ============================================================================ +# Phase 2: Build unique repo list from all sources +# ============================================================================ + +log_info "Building unique repo list..." + +declare -A ALL_REPOS +for key in "${!PROD_CAT[@]}"; do + ALL_REPOS["$key"]=1 +done +for key in "${!ARCHIVE_CAT[@]}"; do + ALL_REPOS["$key"]=1 +done +for key in "${!PURGATORY[@]}"; do + ALL_REPOS["$key"]=1 +done + +log_info "Total unique repos: ${#ALL_REPOS[@]}" + +# ============================================================================ +# Phase 3: Classify each repo according to revised decision tree +# ============================================================================ + +log_info "Classifying repos..." + +# Counters for summary +declare -A COUNTS +COUNTS[ready_complete_both]=0 +COUNTS[ready_deleted]=0 +COUNTS[ready_empty_prod]=0 +COUNTS[ready_archive_only]=0 +COUNTS[ready_not_in_prod]=0 +COUNTS[resync_missing_archive]=0 +COUNTS[resync_incomplete_archive]=0 +COUNTS[review_partial_prod]=0 +COUNTS[review_nomatch_prod]=0 +COUNTS[review_parse_failure]=0 +COUNTS[review_conflicting]=0 + +# Output arrays +declare -a READY_LINES +declare -a RESYNC_LINES +declare -a REVIEW_LINES + +# Helper function to get context string +get_context() { + local key="$1" + local prod_status="$2" + local archive_status="$3" + local context="" + + # Check purgatory + if [[ -n "${PURGATORY[$key]:-}" ]]; then + context="purgatory-expired" + fi + + # Check parse failure + if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then + if [[ -n "$context" ]]; then + context="$context, parse-failure" + else + context="parse-failure" + fi + fi + + # Add archive context for unexpected states + if [[ "$prod_status" == "empty" && "$archive_status" != "missing" && "$archive_status" != "empty" ]]; then + if [[ -n "$context" ]]; then + context="$context, archive-has-data" + else + context="archive-has-data" + fi + fi + + echo "${context:-none}" +} + +# Helper to convert category to human-readable status +cat_to_status() { + case "$1" in + cat1) echo "complete" ;; + cat2) echo "empty" ;; + cat3) echo "partial" ;; + cat4) echo "no-match" ;; + missing) echo "missing" ;; + *) echo "$1" ;; + esac +} + +LOOP_COUNT=0 +for key in "${!ALL_REPOS[@]}"; do + LOOP_COUNT=$((LOOP_COUNT + 1)) + [[ $((LOOP_COUNT % 100)) -eq 0 ]] && log_info "Processed $LOOP_COUNT repos..." + IFS='|' read -r repo npub <<< "$key" + + prod_cat="${PROD_CAT[$key]:-missing}" + archive_cat="${ARCHIVE_CAT[$key]:-missing}" + prod_status=$(cat_to_status "$prod_cat") + archive_status=$(cat_to_status "$archive_cat") + + # Decision tree implementation + + # 1. Is there a kind 5 deletion event? + if [[ -n "${DELETED[$key]:-}" ]]; then + context=$(get_context "$key" "$prod_status" "$archive_status") + READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | deleted by user") + COUNTS[ready_deleted]=$((COUNTS[ready_deleted] + 1)) + continue + fi + + # 2. What is the prod status? + case "$prod_cat" in + missing) + # Not in prod + if [[ "$archive_cat" != "missing" ]]; then + # In archive but not in prod -> no action (archive-only) + context=$(get_context "$key" "$prod_status" "$archive_status") + READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive-only (not in prod)") + COUNTS[ready_archive_only]=$((COUNTS[ready_archive_only] + 1)) + elif [[ -n "${PURGATORY[$key]:-}" ]]; then + # Purgatory only, not in prod -> no action + context="purgatory-expired" + READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | purgatory-only (not in prod)") + COUNTS[ready_not_in_prod]=$((COUNTS[ready_not_in_prod] + 1)) + fi + # Otherwise skip (not a real repo - no data anywhere) + ;; + + cat2) + # Empty in prod -> ALWAYS no action required + context=$(get_context "$key" "$prod_status" "$archive_status") + READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | empty in prod (user never pushed)") + COUNTS[ready_empty_prod]=$((COUNTS[ready_empty_prod] + 1)) + ;; + + cat1) + # Complete in prod + if [[ "$archive_cat" == "cat1" ]]; then + # Complete in both -> no action + context=$(get_context "$key" "$prod_status" "$archive_status") + READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in both") + COUNTS[ready_complete_both]=$((COUNTS[ready_complete_both] + 1)) + else + # Complete in prod, missing/incomplete in archive + # Check for parse failure - if so, needs manual review + if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then + context=$(get_context "$key" "$prod_status" "$archive_status") + REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in prod with parse failure") + COUNTS[review_parse_failure]=$((COUNTS[review_parse_failure] + 1)) + else + # Needs resync - include purgatory context + context=$(get_context "$key" "$prod_status" "$archive_status") + if [[ "$archive_cat" == "missing" ]]; then + RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync to archive") + COUNTS[resync_missing_archive]=$((COUNTS[resync_missing_archive] + 1)) + else + RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync (archive incomplete)") + COUNTS[resync_incomplete_archive]=$((COUNTS[resync_incomplete_archive] + 1)) + fi + fi + fi + ;; + + cat3) + # Partial in prod -> ALWAYS manual investigation + context=$(get_context "$key" "$prod_status" "$archive_status") + REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | partial in prod (investigate git data)") + COUNTS[review_partial_prod]=$((COUNTS[review_partial_prod] + 1)) + ;; + + cat4) + # No-match in prod -> ALWAYS manual investigation + context=$(get_context "$key" "$prod_status" "$archive_status") + REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | no-match in prod (git corruption)") + COUNTS[review_nomatch_prod]=$((COUNTS[review_nomatch_prod] + 1)) + ;; + esac +done + +# ============================================================================ +# Phase 4: Write output files +# ============================================================================ + +log_info "Writing output files..." + +TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%S+00:00") + +# Write ready-for-migration.txt +{ + echo "# Ready for Migration - No action required" + echo "# Generated: $TIMESTAMP" + echo "# Format: repo | npub | prod_status | archive_status | context | reason" + echo "#" + for line in "${READY_LINES[@]}"; do + echo "$line" + done +} > "$READY_FILE" + +# Write needs-resync.txt +{ + echo "# Needs Re-sync - Action required" + echo "# Generated: $TIMESTAMP" + echo "# Format: repo | npub | prod_status | archive_status | context | action" + echo "#" + echo "# Context meanings:" + echo "# purgatory-expired = archive tried to sync but failed (30min timeout)" + echo "# none = archive never tried or announcement missing" + echo "#" + for line in "${RESYNC_LINES[@]}"; do + echo "$line" + done +} > "$RESYNC_FILE" + +# Write manual-review.txt +{ + echo "# Manual Review Required - Investigation needed" + echo "# Generated: $TIMESTAMP" + echo "# Format: repo | npub | prod_status | archive_status | context | reason" + echo "#" + for line in "${REVIEW_LINES[@]}"; do + echo "$line" + done +} > "$REVIEW_FILE" + +# ============================================================================ +# Phase 5: Generate summary +# ============================================================================ + +log_info "Generating summary..." + +TOTAL_READY=${#READY_LINES[@]} +TOTAL_RESYNC=${#RESYNC_LINES[@]} +TOTAL_REVIEW=${#REVIEW_LINES[@]} +TOTAL=$((TOTAL_READY + TOTAL_RESYNC + TOTAL_REVIEW)) + +# Calculate percentages +if [[ $TOTAL -gt 0 ]]; then + PCT_READY=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_READY / $TOTAL) * 100}") + PCT_RESYNC=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_RESYNC / $TOTAL) * 100}") + PCT_REVIEW=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_REVIEW / $TOTAL) * 100}") +else + PCT_READY="0.0" + PCT_RESYNC="0.0" + PCT_REVIEW="0.0" +fi + +{ + echo "# Migration Classification Summary" + echo "Generated: $TIMESTAMP" + echo "Analysis Directory: $ANALYSIS_DIR" + echo "" + echo "## Overview" + echo "" + echo "| Category | Count | Percentage |" + echo "|----------|-------|------------|" + echo "| Ready for Migration | $TOTAL_READY | $PCT_READY% |" + echo "| Needs Re-sync | $TOTAL_RESYNC | $PCT_RESYNC% |" + echo "| Manual Review | $TOTAL_REVIEW | $PCT_REVIEW% |" + echo "| **Total** | **$TOTAL** | **100%** |" + echo "" + echo "## Tier 1: Ready for Migration ($TOTAL_READY repos)" + echo "" + echo "These repositories are ready for migration or don't need migration:" + echo "" + echo "| Reason | Count |" + echo "|--------|-------|" + echo "| complete in both prod and archive | ${COUNTS[ready_complete_both]} |" + echo "| deleted by user | ${COUNTS[ready_deleted]} |" + echo "| empty in prod (user never pushed) | ${COUNTS[ready_empty_prod]} |" + echo "| archive-only (not in prod) | ${COUNTS[ready_archive_only]} |" + echo "| purgatory-only (not in prod) | ${COUNTS[ready_not_in_prod]} |" + echo "" + echo "## Tier 2: Needs Re-sync ($TOTAL_RESYNC repos)" + echo "" + echo "These repositories need re-sync to archive before migration:" + echo "" + echo "| Reason | Count | Action |" + echo "|--------|-------|--------|" + echo "| complete in prod, missing from archive | ${COUNTS[resync_missing_archive]} | trigger re-sync |" + echo "| complete in prod, incomplete in archive | ${COUNTS[resync_incomplete_archive]} | trigger re-sync |" + echo "" + echo "### Purgatory Context" + echo "" + echo "Repos in needs-resync.txt include purgatory context:" + echo "- **purgatory-expired**: Archive tried to sync but failed (30min timeout)" + echo "- **none**: Archive never tried or announcement missing" + echo "" + echo "## Tier 3: Manual Review ($TOTAL_REVIEW repos)" + echo "" + echo "These repositories require human investigation:" + echo "" + echo "| Reason | Count |" + echo "|--------|-------|" + echo "| partial in prod (cat3) | ${COUNTS[review_partial_prod]} |" + echo "| no-match in prod (cat4) | ${COUNTS[review_nomatch_prod]} |" + echo "| complete in prod with parse failure | ${COUNTS[review_parse_failure]} |" + echo "" + echo "## Input Data Summary" + echo "" + echo "### Prod Categories" + echo "- Category 1 (complete): $(wc -l < "$PROD_DIR/category1-complete-match.txt")" + echo "- Category 2 (empty): $(wc -l < "$PROD_DIR/category2-empty-blank.txt")" + echo "- Category 3 (partial): $(wc -l < "$PROD_DIR/category3-partial-match.txt")" + echo "- Category 4 (no match): $(wc -l < "$PROD_DIR/category4-no-match.txt")" + echo "" + echo "### Archive Categories" + echo "- Category 1 (complete): $(wc -l < "$ARCHIVE_DIR/category1-complete-match.txt")" + echo "- Category 2 (empty): $(wc -l < "$ARCHIVE_DIR/category2-empty-blank.txt")" + echo "- Category 3 (partial): $(wc -l < "$ARCHIVE_DIR/category3-partial-match.txt")" + echo "- Category 4 (no match): $(wc -l < "$ARCHIVE_DIR/category4-no-match.txt")" + echo "" + echo "### Logs" + echo "- Parse failures: $(grep -c -v '^#' "$LOGS_DIR/parse-failures.txt" 2>/dev/null || echo 0)" + echo "- Purgatory expired: $(grep -c -v '^#' "$LOGS_DIR/purgatory-expired.txt" 2>/dev/null || echo 0)" + echo "" + echo "## Output Files" + echo "" + echo "- \`results/ready-for-migration.txt\` - $TOTAL_READY repos ready for migration" + echo "- \`results/needs-resync.txt\` - $TOTAL_RESYNC repos needing re-sync" + echo "- \`results/manual-review.txt\` - $TOTAL_REVIEW repos needing investigation" + echo "- \`results/summary.txt\` - This summary file" + echo "" + echo "## Recommended Next Steps" + echo "" + echo "1. **Review needs-resync.txt** - Trigger re-sync for these repos" + echo "2. **Review manual-review.txt** - Investigate unusual states" + echo "3. **Verify ready-for-migration.txt** - Spot-check a few repos" + echo "4. **Plan migration window** - Schedule cutover when action items resolved" +} > "$SUMMARY_FILE" + +# ============================================================================ +# Phase 6: Print summary to console +# ============================================================================ + +echo "" +log_success "Classification complete!" +echo "" +echo "=== Summary ===" +echo "Ready for Migration: $TOTAL_READY ($PCT_READY%)" +echo " - Complete in both: ${COUNTS[ready_complete_both]}" +echo " - Deleted by user: ${COUNTS[ready_deleted]}" +echo " - Empty in prod: ${COUNTS[ready_empty_prod]}" +echo " - Archive-only: ${COUNTS[ready_archive_only]}" +echo " - Purgatory-only: ${COUNTS[ready_not_in_prod]}" +echo "" +echo "Needs Re-sync: $TOTAL_RESYNC ($PCT_RESYNC%)" +echo " - Missing from archive: ${COUNTS[resync_missing_archive]}" +echo " - Incomplete in archive: ${COUNTS[resync_incomplete_archive]}" +echo "" +echo "Manual Review: $TOTAL_REVIEW ($PCT_REVIEW%)" +echo " - Partial in prod: ${COUNTS[review_partial_prod]}" +echo " - No-match in prod: ${COUNTS[review_nomatch_prod]}" +echo " - Parse failures: ${COUNTS[review_parse_failure]}" +echo "" +echo "Total: $TOTAL repos" +echo "" +echo "Output files:" +echo " $READY_FILE" +echo " $RESYNC_FILE" +echo " $REVIEW_FILE" +echo " $SUMMARY_FILE" -- cgit v1.2.3 From e93bf707bb5f8d690393449cee1b402f123ac923 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Mon, 26 Jan 2026 09:39:48 +0000 Subject: fix: git naughty list DNS failure identication caught a production bug where npub in url string contained "dns" triggering false positive --- src/sync/naughty_list.rs | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/src/sync/naughty_list.rs b/src/sync/naughty_list.rs index 35fcc0f..097affe 100644 --- a/src/sync/naughty_list.rs +++ b/src/sync/naughty_list.rs @@ -114,11 +114,15 @@ impl NaughtyListTracker { pub fn classify_error(error: &str) -> Option { let error_lower = error.to_lowercase(); - // DNS lookup failures + // DNS lookup failures - use specific patterns to avoid false positives + // from URLs containing "dns" (e.g., npubs like "...cdns7..." or domains) if error_lower.contains("failed to lookup address") || error_lower.contains("name or service not known") || error_lower.contains("nodename nor servname provided") - || (error_lower.contains("dns") && !error_lower.contains("timeout")) + || error_lower.contains("dns error") + || error_lower.contains("dns lookup") + || error_lower.contains("dns resolution") + || error_lower.contains("getaddrinfo") { return Some(NaughtyCategory::DnsLookupFailed); } @@ -373,6 +377,34 @@ mod tests { NaughtyListTracker::classify_error("network unreachable"), None ); + + // Repository not found is transient (not an infrastructure issue) + assert_eq!( + NaughtyListTracker::classify_error( + "fatal: repository 'https://example.com/repo.git/' not found" + ), + None + ); + } + + #[test] + fn test_classify_false_positive_npub_with_dns() { + // This npub contains "dns" in its encoding: npub17plqkxhsv66g8quxxc9p5t9mxazzn20m426exqnl8lxnh5a4cDNS7jezx0 + // A "not found" error with this npub should NOT be classified as DNS failure + let error = "fatal: repository 'https://git.shakespeare.diy/npub17plqkxhsv66g8quxxc9p5t9mxazzn20m426exqnl8lxnh5a4cdns7jezx0/kuboslopp%20by%20Shakespeare.git/' not found"; + assert_eq!( + NaughtyListTracker::classify_error(error), + None, + "npub containing 'dns' should not trigger DNS failure classification" + ); + + // Same for relay.ngit.dev + let error2 = "fatal: repository 'https://relay.ngit.dev/npub17plqkxhsv66g8quxxc9p5t9mxazzn20m426exqnl8lxnh5a4cdns7jezx0/kuboslopp%20by%20Shakespeare.git/' not found"; + assert_eq!( + NaughtyListTracker::classify_error(error2), + None, + "npub containing 'dns' should not trigger DNS failure classification" + ); } #[test] -- cgit v1.2.3 From 4b43bbcee7c1ad2b331881ed73c3de0b52e52e2d Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Mon, 26 Jan 2026 12:26:29 +0000 Subject: Replace old classification script with redesigned version (Option B) The new script implements the redesigned classification system with: - Tier 1: No Action Required (complete in both, deleted, empty, archive-only) - Tier 2: Action Required (complete in prod but missing/incomplete in archive) - Tier 3: Manual Investigation (partial/no-match in prod, archive-only anomalies) Produces cleaner output format with actionable categories and reasons. --- .../migration-scripts/40-classify-actions.sh | 1260 +++++++++----------- 1 file changed, 541 insertions(+), 719 deletions(-) diff --git a/docs/how-to/migration-scripts/40-classify-actions.sh b/docs/how-to/migration-scripts/40-classify-actions.sh index 53c0f9d..81559aa 100755 --- a/docs/how-to/migration-scripts/40-classify-actions.sh +++ b/docs/how-to/migration-scripts/40-classify-actions.sh @@ -1,772 +1,594 @@ #!/usr/bin/env bash # -# 40-classify-actions.sh - Final classification of repos for migration action +# 40-classify-actions.sh - Classify repos by migration action required # -# PHASE 5 of the GRASP relay to ngit-grasp migration analysis pipeline. -# Combines all data sources from previous phases to produce actionable results. +# Implements the redesigned classification system (Option B) with user feedback: # -# USAGE: -# ./40-classify-actions.sh +# Tier 1: No Action Required (ready-for-migration.txt) +# - Complete in both (prod=cat1, archive=cat1) +# - Deleted by user (kind 5 event) +# - Empty in prod (prod=cat2, any archive status) +# - Archive-only (archive=any, prod=missing) +# - Not in prod (purgatory-only, prod=missing) # -# EXAMPLES: -# ./40-classify-actions.sh work/migration-analysis-20260122-1430 +# Tier 2: Action Required (needs-resync.txt) +# - Complete in prod, missing from archive (with purgatory context) +# - Complete in prod, incomplete in archive (with purgatory context) # -# INPUT DIRECTORY STRUCTURE: -# / -# ├── prod/ -# │ ├── raw/ -# │ │ └── deletions.json # Phase 1: kind 5 deletion events -# │ ├── category1-complete-match.txt # Phase 3: complete git sync -# │ ├── category2-empty-blank.txt # Phase 3: no git data -# │ ├── category3-partial-match.txt # Phase 3: partial git sync -# │ └── category4-no-match.txt # Phase 3: git exists, refs don't match -# ├── archive/ -# │ ├── raw/ -# │ │ └── deletions.json -# │ ├── category1-complete-match.txt -# │ ├── category2-empty-blank.txt -# │ ├── category3-partial-match.txt -# │ └── category4-no-match.txt -# ├── comparison/ -# │ ├── complete-in-both.txt # Phase 3: no action needed -# │ ├── complete-prod-missing-archive.txt # Phase 3: needs investigation -# │ ├── complete-prod-incomplete-archive.txt # Phase 3: sync in progress? -# │ ├── incomplete-in-both.txt # Phase 3: git incomplete -# │ └── in-archive-not-prod.txt # Phase 3: deleted or new -# └── logs/ -# ├── parse-failures.txt # Phase 4: events that failed to parse -# └── purgatory-expired.txt # Phase 4: repos that expired from purgatory +# Tier 3: Manual Investigation (manual-review.txt) +# - Partial in prod (prod=cat3) +# - No-match in prod (prod=cat4) +# - Parse failures +# - Conflicting states # -# OUTPUT: -# /results/ -# ├── no-action-required.txt # Repos that are fine as-is -# ├── action-required.txt # Repos needing intervention -# ├── manual-investigation.txt # Repos needing human review -# └── summary.txt # Human-readable summary +# Usage: ./40-classify-actions.sh # -# OUTPUT FORMATS: -# no-action-required.txt: -# repo | npub | reason -# -# action-required.txt: -# repo | npub | reason | suggested_action -# -# manual-investigation.txt: -# repo | npub | reason | context -# -# CLASSIFICATION LOGIC: -# -# NO ACTION REQUIRED: -# - Complete in both prod and archive (successfully migrated) -# - Empty/blank in both (user never pushed any data) -# - Deleted by user (kind 5 deletion event exists) -# - In purgatory expiry logs (system already handled it) -# -# ACTION REQUIRED: -# - Complete in prod, missing from archive → Re-sync needed -# - Complete in prod, incomplete in archive → Wait for sync or re-trigger -# - Partial match in prod → Investigate why refs don't match -# - No match (category 4) → Investigate git data corruption -# - Parse failures → Fix event format or re-announce -# -# MANUAL INVESTIGATION: -# - Conflicting states (e.g., complete in prod but parse failure logged) -# - In archive but not prod (deleted? or new announcement?) -# - Multiple issues for same repo -# - Unexpected state combinations -# -# PREREQUISITES: -# - jq (for parsing JSON) -# - awk, sort, comm (standard Unix tools) -# -# RUNTIME: < 5 seconds (local processing only) -# -# SEE ALSO: -# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide -# 01-fetch-events.sh - Phase 1 (fetch events) -# 10-check-git-sync.sh - Phase 2 (git sync check) -# 20-categorize.sh, 21-compare-relays.sh - Phase 3 (categorize and compare) -# 30-extract-parse-failures.sh, 31-extract-purgatory-expiry.sh - Phase 4 (logs) +# Output format: repo | npub | prod_status | archive_status | context | action # set -euo pipefail -# Colors for output (disabled if not a terminal) -if [[ -t 1 ]]; then - RED='\033[0;31m' - GREEN='\033[0;32m' - YELLOW='\033[0;33m' - BLUE='\033[0;34m' - BOLD='\033[1m' - NC='\033[0m' -else - RED='' - GREEN='' - YELLOW='' - BLUE='' - BOLD='' - NC='' -fi +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color -log_info() { - echo -e "${BLUE}[INFO]${NC} $*" >&2 -} +log_info() { echo -e "${BLUE}[INFO]${NC} $*"; } +log_success() { echo -e "${GREEN}[OK]${NC} $*"; } +log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; } -log_success() { - echo -e "${GREEN}[OK]${NC} $*" >&2 -} - -log_warn() { - echo -e "${YELLOW}[WARN]${NC} $*" >&2 -} - -log_error() { - echo -e "${RED}[ERROR]${NC} $*" >&2 -} - -usage() { +# Check arguments +if [[ $# -lt 1 ]]; then echo "Usage: $0 " - echo "" - echo "Arguments:" - echo " analysis-dir Directory containing Phase 1-4 output" - echo "" - echo "Examples:" - echo " $0 work/migration-analysis-20260122-1430" - echo "" - echo "Required input structure:" - echo " /prod/category*.txt" - echo " /archive/category*.txt" - echo " /comparison/*.txt" - echo " /logs/*.txt (optional)" - echo " /prod/raw/deletions.json" - echo "" - echo "Output:" - echo " /results/no-action-required.txt" - echo " /results/action-required.txt" - echo " /results/manual-investigation.txt" - echo " /results/summary.txt" + echo "Example: $0 work/migration-analysis-20260123-200701" exit 1 -} - -# Extract repo|npub key from category line -# Input: "repo | npub | state_refs=N | ..." -# Output: "repo|npub" -extract_key() { - awk -F' \\| ' '{print $1 "|" $2}' -} - -# Extract repo from category line -# Input: "repo | npub | ..." -# Output: "repo" -extract_repo() { - awk -F' \\| ' '{print $1}' -} +fi -# Extract npub from category line -# Input: "repo | npub | ..." -# Output: "npub" -extract_npub() { - awk -F' \\| ' '{print $2}' -} +ANALYSIS_DIR="$1" -# Check if a file exists and has content (ignoring comment lines) -file_has_content() { - local file="$1" - if [[ ! -f "$file" ]]; then - return 1 - fi - # Check for non-comment, non-empty lines - grep -v '^#' "$file" 2>/dev/null | grep -q '.' 2>/dev/null -} +# Validate analysis directory +if [[ ! -d "$ANALYSIS_DIR" ]]; then + log_error "Analysis directory not found: $ANALYSIS_DIR" + exit 1 +fi -# Count non-comment lines in a file -count_lines() { - local file="$1" - if [[ ! -f "$file" ]]; then - echo "0" - return +# Define paths +PROD_DIR="$ANALYSIS_DIR/prod" +ARCHIVE_DIR="$ANALYSIS_DIR/archive" +COMPARISON_DIR="$ANALYSIS_DIR/comparison" +LOGS_DIR="$ANALYSIS_DIR/logs" +RESULTS_DIR="$ANALYSIS_DIR/results" + +# Validate required directories +for dir in "$PROD_DIR" "$ARCHIVE_DIR" "$COMPARISON_DIR" "$LOGS_DIR"; do + if [[ ! -d "$dir" ]]; then + log_error "Required directory not found: $dir" + exit 1 fi - local count - count=$(grep -v '^#' "$file" 2>/dev/null | grep -c '.' 2>/dev/null) || count=0 - # Ensure we return a clean integer - echo "${count:-0}" -} +done + +# Create results directory +mkdir -p "$RESULTS_DIR" + +# Output files +READY_FILE="$RESULTS_DIR/ready-for-migration.txt" +RESYNC_FILE="$RESULTS_DIR/needs-resync.txt" +REVIEW_FILE="$RESULTS_DIR/manual-review.txt" +SUMMARY_FILE="$RESULTS_DIR/summary.txt" + +# Temporary files for processing +TMP_DIR=$(mktemp -d) +trap 'rm -rf "$TMP_DIR"' EXIT + +log_info "Starting classification with revised system (Option B)" +log_info "Analysis directory: $ANALYSIS_DIR" + +# ============================================================================ +# Phase 1: Build lookup tables from source data +# ============================================================================ + +log_info "Building lookup tables..." + +# Build prod category lookup: repo|npub -> category +declare -A PROD_CAT +while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do + repo="${repo// /}" # Remove all spaces + npub="${npub// /}" # Remove all spaces + [[ -z "$repo" || -z "$npub" ]] && continue + PROD_CAT["$repo|$npub"]="cat1" +done < "$PROD_DIR/category1-complete-match.txt" + +while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do + repo="${repo// /}" + npub="${npub// /}" + [[ -z "$repo" || -z "$npub" ]] && continue + PROD_CAT["$repo|$npub"]="cat2" +done < "$PROD_DIR/category2-empty-blank.txt" + +while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do + repo="${repo// /}" + npub="${npub// /}" + [[ -z "$repo" || -z "$npub" ]] && continue + PROD_CAT["$repo|$npub"]="cat3" +done < "$PROD_DIR/category3-partial-match.txt" + +while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do + repo="${repo// /}" + npub="${npub// /}" + [[ -z "$repo" || -z "$npub" ]] && continue + PROD_CAT["$repo|$npub"]="cat4" +done < "$PROD_DIR/category4-no-match.txt" + +log_info "Loaded ${#PROD_CAT[@]} prod entries" + +# Build archive category lookup: repo|npub -> category +declare -A ARCHIVE_CAT +while IFS='|' read -r repo npub rest; do + repo="${repo// /}" + npub="${npub// /}" + [[ -z "$repo" || -z "$npub" ]] && continue + ARCHIVE_CAT["$repo|$npub"]="cat1" +done < "$ARCHIVE_DIR/category1-complete-match.txt" + +while IFS='|' read -r repo npub rest; do + repo="${repo// /}" + npub="${npub// /}" + [[ -z "$repo" || -z "$npub" ]] && continue + ARCHIVE_CAT["$repo|$npub"]="cat2" +done < "$ARCHIVE_DIR/category2-empty-blank.txt" + +while IFS='|' read -r repo npub rest; do + repo="${repo// /}" + npub="${npub// /}" + [[ -z "$repo" || -z "$npub" ]] && continue + ARCHIVE_CAT["$repo|$npub"]="cat3" +done < "$ARCHIVE_DIR/category3-partial-match.txt" + +while IFS='|' read -r repo npub rest; do + repo="${repo// /}" + npub="${npub// /}" + [[ -z "$repo" || -z "$npub" ]] && continue + ARCHIVE_CAT["$repo|$npub"]="cat4" +done < "$ARCHIVE_DIR/category4-no-match.txt" + +log_info "Loaded ${#ARCHIVE_CAT[@]} archive entries" + +# Build purgatory lookup: repo|npub -> 1 (if purgatory expired) +declare -A PURGATORY +PURGATORY_COUNT=0 +if [[ -f "$LOGS_DIR/purgatory-expired.txt" ]]; then + while IFS=$'\t' read -r repo npub timestamp reason || [[ -n "$repo" ]]; do + # Skip comments and empty lines + [[ "$repo" =~ ^# ]] && continue + [[ -z "$repo" || -z "$npub" ]] && continue + PURGATORY["$repo|$npub"]=1 + ((PURGATORY_COUNT++)) + done < "$LOGS_DIR/purgatory-expired.txt" +fi +log_info "Loaded $PURGATORY_COUNT purgatory entries" + +# Build parse failure lookup: repo|npub -> 1 (if parse failure logged) +# Parse failures file format: event_idkindreasonreponpub +declare -A PARSE_FAIL +PARSE_FAIL_COUNT=0 +if [[ -f "$LOGS_DIR/parse-failures.txt" ]]; then + while IFS=$'\t' read -r event_id kind reason repo npub || [[ -n "$event_id" ]]; do + # Skip comments and empty lines + [[ "$event_id" =~ ^# ]] && continue + [[ -z "$repo" || -z "$npub" ]] && continue + PARSE_FAIL["$repo|$npub"]=1 + ((PARSE_FAIL_COUNT++)) + done < "$LOGS_DIR/parse-failures.txt" +fi +log_info "Loaded $PARSE_FAIL_COUNT parse failure entries" -# Parse deletions.json to extract deleted repo identifiers -# Kind 5 events have "e" tags pointing to the deleted event -# We need to cross-reference with announcements to get repo/npub -# For now, we extract the pubkey and any "a" tags (addressable event references) -parse_deletions() { - local deletions_file="$1" - local output_file="$2" - - if [[ ! -f "$deletions_file" ]]; then - touch "$output_file" - return - fi - - # Extract deletion targets from kind 5 events - # Kind 5 events can reference: - # - "e" tag: specific event ID - # - "a" tag: addressable event (kind:pubkey:identifier) - # For 30617 announcements, "a" tag format is: 30617:: - jq -r ' - select(.kind == 5) | - .pubkey as $pubkey | - .tags[] | - select(.[0] == "a") | - .[1] | - split(":") | - select(.[0] == "30617") | - "\(.[2])|\($pubkey)" - ' "$deletions_file" 2>/dev/null | sort -u > "$output_file" || touch "$output_file" -} +# Build deletion lookup: repo|npub -> 1 (if kind 5 deletion event) +# Deletions are in NDJSON format with "a" tags like "30617:pubkey_hex:repo" +# We need to convert hex pubkeys to npub format using nak +declare -A DELETED -# Build a lookup set from a file (repo|npub format) -# Returns keys one per line -build_key_set() { +# Helper function to process deletion file (NDJSON format) +# Extracts unique pubkey_hex:repo pairs and converts to npub +process_deletions() { local file="$1" - if [[ ! -f "$file" ]]; then - return 0 - fi - # Use || true to prevent pipefail from exiting on empty grep - { grep -v '^#' "$file" 2>/dev/null || true; } | extract_key | sort -u + [[ ! -f "$file" ]] && return + + # Extract unique pubkey_hex|repo pairs from NDJSON + # Each line is a JSON object, extract "a" tags + local pairs + pairs=$(jq -r '.tags[] | select(.[0] == "a") | .[1]' "$file" 2>/dev/null | \ + sed 's/^30617://' | awk -F: '{print $1 "|" $2}' | sort -u) + + # Get unique hex pubkeys for batch conversion + local hex_keys + hex_keys=$(echo "$pairs" | cut -d'|' -f1 | sort -u) + + # Build hex->npub lookup via batch nak call + declare -A HEX_TO_NPUB + while read -r hex; do + [[ -z "$hex" ]] && continue + local npub + npub=$(nak encode npub "$hex" 2>/dev/null || echo "") + [[ -n "$npub" ]] && HEX_TO_NPUB["$hex"]="$npub" + done <<< "$hex_keys" + + # Now process pairs with cached npub values + while IFS='|' read -r pubkey_hex repo; do + [[ -z "$repo" || -z "$pubkey_hex" ]] && continue + local npub="${HEX_TO_NPUB[$pubkey_hex]:-}" + [[ -z "$npub" ]] && continue + DELETED["$repo|$npub"]=1 + done <<< "$pairs" } -# Main classification logic -main() { - if [[ $# -ne 1 ]]; then - usage +# Process prod and archive deletions +process_deletions "$PROD_DIR/raw/deletions.json" +process_deletions "$ARCHIVE_DIR/raw/deletions.json" +DELETED_COUNT=0 +[[ ${#DELETED[@]} -gt 0 ]] && DELETED_COUNT=${#DELETED[@]} +log_info "Loaded $DELETED_COUNT deletion entries" + +# ============================================================================ +# Phase 2: Build unique repo list from all sources +# ============================================================================ + +log_info "Building unique repo list..." + +declare -A ALL_REPOS +for key in "${!PROD_CAT[@]}"; do + ALL_REPOS["$key"]=1 +done +for key in "${!ARCHIVE_CAT[@]}"; do + ALL_REPOS["$key"]=1 +done +for key in "${!PURGATORY[@]}"; do + ALL_REPOS["$key"]=1 +done + +log_info "Total unique repos: ${#ALL_REPOS[@]}" + +# ============================================================================ +# Phase 3: Classify each repo according to revised decision tree +# ============================================================================ + +log_info "Classifying repos..." + +# Counters for summary +declare -A COUNTS +COUNTS[ready_complete_both]=0 +COUNTS[ready_deleted]=0 +COUNTS[ready_empty_prod]=0 +COUNTS[ready_archive_only]=0 +COUNTS[ready_not_in_prod]=0 +COUNTS[resync_missing_archive]=0 +COUNTS[resync_incomplete_archive]=0 +COUNTS[review_partial_prod]=0 +COUNTS[review_nomatch_prod]=0 +COUNTS[review_parse_failure]=0 +COUNTS[review_conflicting]=0 + +# Output arrays +declare -a READY_LINES +declare -a RESYNC_LINES +declare -a REVIEW_LINES + +# Helper function to get context string +get_context() { + local key="$1" + local prod_status="$2" + local archive_status="$3" + local context="" + + # Check purgatory + if [[ -n "${PURGATORY[$key]:-}" ]]; then + context="purgatory-expired" fi - local analysis_dir="$1" - - # Validate input directory - if [[ ! -d "$analysis_dir" ]]; then - log_error "Analysis directory not found: $analysis_dir" - exit 1 - fi - - # Check for required subdirectories - local prod_dir="$analysis_dir/prod" - local archive_dir="$analysis_dir/archive" - local comparison_dir="$analysis_dir/comparison" - local logs_dir="$analysis_dir/logs" - local results_dir="$analysis_dir/results" - - for dir in "$prod_dir" "$archive_dir" "$comparison_dir"; do - if [[ ! -d "$dir" ]]; then - log_error "Required directory not found: $dir" - log_error "Run Phases 1-3 first to generate input data." - exit 1 + # Check parse failure + if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then + if [[ -n "$context" ]]; then + context="$context, parse-failure" + else + context="parse-failure" fi - done - - # Check for required category files - if [[ ! -f "$prod_dir/category1-complete-match.txt" ]]; then - log_error "Missing category files in $prod_dir" - log_error "Run Phase 3 (20-categorize.sh) first." - exit 1 - fi - - log_info "Starting final classification" - log_info "Analysis directory: $analysis_dir" - - # Create output directory - mkdir -p "$results_dir" - - # Create temp directory for intermediate files - local tmp_dir - tmp_dir=$(mktemp -d) - # shellcheck disable=SC2064 - trap "rm -rf '$tmp_dir'" EXIT - - # Initialize output files - local no_action="$results_dir/no-action-required.txt" - local action_req="$results_dir/action-required.txt" - local manual_inv="$results_dir/manual-investigation.txt" - local summary="$results_dir/summary.txt" - - # Write headers - { - echo "# No Action Required - Repos that are fine as-is" - echo "# Generated: $(date -Iseconds)" - echo "# Format: repo | npub | reason" - echo "#" - } > "$no_action" - - { - echo "# Action Required - Repos needing intervention" - echo "# Generated: $(date -Iseconds)" - echo "# Format: repo | npub | reason | suggested_action" - echo "#" - } > "$action_req" - - { - echo "# Manual Investigation Required - Repos needing human review" - echo "# Generated: $(date -Iseconds)" - echo "# Format: repo | npub | reason | context" - echo "#" - } > "$manual_inv" - - # ========================================================================= - # STEP 1: Parse deletion events - # ========================================================================= - log_info "Parsing deletion events..." - - parse_deletions "$prod_dir/raw/deletions.json" "$tmp_dir/prod_deletions.txt" - parse_deletions "$archive_dir/raw/deletions.json" "$tmp_dir/archive_deletions.txt" - - # Combine deletions (union of both) - cat "$tmp_dir/prod_deletions.txt" "$tmp_dir/archive_deletions.txt" 2>/dev/null | sort -u > "$tmp_dir/all_deletions.txt" - - local deletion_count - deletion_count=$(wc -l < "$tmp_dir/all_deletions.txt" | tr -d ' ') - log_info "Found $deletion_count deletion requests" - - # ========================================================================= - # STEP 2: Parse log-based categories (Phase 4) - # ========================================================================= - log_info "Parsing log-based categories..." - - # Parse failures: event_idkindreasonreponpub - # Note: repo and npub are in columns 4 and 5 (enriched by Phase 4 from announcements.json) - # Some entries may have empty repo/npub if the event_id wasn't found in announcements - if [[ -f "$logs_dir/parse-failures.txt" ]] && file_has_content "$logs_dir/parse-failures.txt"; then - grep -v '^#' "$logs_dir/parse-failures.txt" | awk -F'\t' '{print $4 "|" $5}' | sort -u > "$tmp_dir/parse_failures.txt" - log_info "Found $(wc -l < "$tmp_dir/parse_failures.txt" | tr -d ' ') parse failure entries" - else - touch "$tmp_dir/parse_failures.txt" - log_info "No parse failures found (logs may be empty or not yet generated)" - fi - - # Purgatory expired: reponpubtimestampreason - if [[ -f "$logs_dir/purgatory-expired.txt" ]] && file_has_content "$logs_dir/purgatory-expired.txt"; then - grep -v '^#' "$logs_dir/purgatory-expired.txt" | awk -F'\t' '{print $1 "|" $2}' | sort -u > "$tmp_dir/purgatory_expired.txt" - log_info "Found $(wc -l < "$tmp_dir/purgatory_expired.txt" | tr -d ' ') purgatory expiry entries" - else - touch "$tmp_dir/purgatory_expired.txt" - log_info "No purgatory expiry entries found (logs may be empty or not yet generated)" - fi - - # ========================================================================= - # STEP 3: Build lookup tables from category files - # ========================================================================= - log_info "Building lookup tables..." - - # Build key sets for each category (prod) - build_key_set "$prod_dir/category1-complete-match.txt" > "$tmp_dir/prod_cat1.txt" - build_key_set "$prod_dir/category2-empty-blank.txt" > "$tmp_dir/prod_cat2.txt" - build_key_set "$prod_dir/category3-partial-match.txt" > "$tmp_dir/prod_cat3.txt" - build_key_set "$prod_dir/category4-no-match.txt" > "$tmp_dir/prod_cat4.txt" - - # Build key sets for each category (archive) - build_key_set "$archive_dir/category1-complete-match.txt" > "$tmp_dir/archive_cat1.txt" - build_key_set "$archive_dir/category2-empty-blank.txt" > "$tmp_dir/archive_cat2.txt" - build_key_set "$archive_dir/category3-partial-match.txt" > "$tmp_dir/archive_cat3.txt" - build_key_set "$archive_dir/category4-no-match.txt" > "$tmp_dir/archive_cat4.txt" - - # All repos in prod - cat "$tmp_dir"/prod_cat*.txt 2>/dev/null | sort -u > "$tmp_dir/all_prod.txt" || true - - # All repos in archive - cat "$tmp_dir"/archive_cat*.txt 2>/dev/null | sort -u > "$tmp_dir/all_archive.txt" || true - - # ========================================================================= - # STEP 4: Process comparison files and apply classification - # ========================================================================= - log_info "Applying classification logic..." - - # Track processed repos to detect duplicates/conflicts - > "$tmp_dir/processed.txt" - - # Counters - local count_no_action=0 - local count_action=0 - local count_manual=0 - - # --- NO ACTION: Complete in both --- - if [[ -f "$comparison_dir/complete-in-both.txt" ]]; then - while IFS= read -r line; do - [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue - - repo=$(echo "$line" | extract_repo) - npub=$(echo "$line" | extract_npub) - key="${repo}|${npub}" - - # Check if deleted (still no action, but different reason) - if grep -qF "$key" "$tmp_dir/all_deletions.txt" 2>/dev/null; then - echo "$repo | $npub | deleted by user (also complete in both)" >> "$no_action" - else - echo "$repo | $npub | complete in both prod and archive" >> "$no_action" - fi - echo "$key" >> "$tmp_dir/processed.txt" - ((count_no_action++)) || true - done < "$comparison_dir/complete-in-both.txt" fi - # --- NO ACTION: Deleted by user (not already processed) --- - while IFS='|' read -r repo npub; do - [[ -z "$repo" ]] && continue - key="${repo}|${npub}" - - # Skip if already processed - if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then - continue - fi - - # Convert pubkey to npub if needed (deletions use hex pubkey) - # For now, just use the pubkey as-is since we're matching by repo - echo "$repo | $npub | deleted by user" >> "$no_action" - echo "$key" >> "$tmp_dir/processed.txt" - ((count_no_action++)) || true - done < "$tmp_dir/all_deletions.txt" - - # --- NO ACTION: Empty/blank in both --- - # Find repos that are category 2 in both prod and archive - comm -12 "$tmp_dir/prod_cat2.txt" "$tmp_dir/archive_cat2.txt" 2>/dev/null | while IFS='|' read -r repo npub; do - [[ -z "$repo" ]] && continue - key="${repo}|${npub}" - - if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then - continue + # Add archive context for unexpected states + if [[ "$prod_status" == "empty" && "$archive_status" != "missing" && "$archive_status" != "empty" ]]; then + if [[ -n "$context" ]]; then + context="$context, archive-has-data" + else + context="archive-has-data" fi - - echo "$repo | $npub | empty/blank in both (user never pushed)" >> "$no_action" - echo "$key" >> "$tmp_dir/processed.txt" - done - - # --- NO ACTION: Purgatory expired (system handled it) --- - while IFS='|' read -r repo npub; do - [[ -z "$repo" ]] && continue - key="${repo}|${npub}" - - if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then - continue - fi - - echo "$repo | $npub | purgatory expired (system already handled)" >> "$no_action" - echo "$key" >> "$tmp_dir/processed.txt" - ((count_no_action++)) || true - done < "$tmp_dir/purgatory_expired.txt" - - # --- ACTION REQUIRED: Complete in prod, missing from archive --- - if [[ -f "$comparison_dir/complete-prod-missing-archive.txt" ]]; then - while IFS= read -r line; do - [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue - - repo=$(echo "$line" | extract_repo) - npub=$(echo "$line" | extract_npub) - key="${repo}|${npub}" - - if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then - continue - fi - - # Check for parse failure - if grep -qF "$key" "$tmp_dir/parse_failures.txt" 2>/dev/null; then - echo "$repo | $npub | complete in prod, missing from archive, parse failure logged | investigate parse failure, may need re-announcement" >> "$manual_inv" - echo "$key" >> "$tmp_dir/processed.txt" - ((count_manual++)) || true - else - echo "$repo | $npub | complete in prod, missing from archive | trigger re-sync or investigate why not archived" >> "$action_req" - echo "$key" >> "$tmp_dir/processed.txt" - ((count_action++)) || true - fi - done < "$comparison_dir/complete-prod-missing-archive.txt" - fi - - # --- ACTION REQUIRED: Complete in prod, incomplete in archive --- - if [[ -f "$comparison_dir/complete-prod-incomplete-archive.txt" ]]; then - while IFS= read -r line; do - [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue - - repo=$(echo "$line" | extract_repo) - npub=$(echo "$line" | extract_npub) - key="${repo}|${npub}" - - if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then - continue - fi - - # Extract archive status from line - archive_status=$(echo "$line" | grep -oP 'archive=\K[^ ]+' || echo "unknown") - - echo "$repo | $npub | complete in prod, $archive_status in archive | wait for sync to complete or trigger re-sync" >> "$action_req" - echo "$key" >> "$tmp_dir/processed.txt" - ((count_action++)) || true - done < "$comparison_dir/complete-prod-incomplete-archive.txt" fi - # --- ACTION REQUIRED: Incomplete in both --- - if [[ -f "$comparison_dir/incomplete-in-both.txt" ]]; then - while IFS= read -r line; do - [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue - - repo=$(echo "$line" | extract_repo) - npub=$(echo "$line" | extract_npub) - key="${repo}|${npub}" - - if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then - continue - fi - - # Extract statuses - prod_status=$(echo "$line" | grep -oP 'prod=\K[^ ]+' | tr -d '|' || echo "unknown") - archive_status=$(echo "$line" | grep -oP 'archive=\K[^ ]+' || echo "unknown") - - echo "$repo | $npub | incomplete in both (prod=$prod_status, archive=$archive_status) | investigate git data source, may need user to re-push" >> "$action_req" - echo "$key" >> "$tmp_dir/processed.txt" - ((count_action++)) || true - done < "$comparison_dir/incomplete-in-both.txt" + echo "${context:-none}" +} + +# Helper to convert category to human-readable status +cat_to_status() { + case "$1" in + cat1) echo "complete" ;; + cat2) echo "empty" ;; + cat3) echo "partial" ;; + cat4) echo "no-match" ;; + missing) echo "missing" ;; + *) echo "$1" ;; + esac +} + +LOOP_COUNT=0 +for key in "${!ALL_REPOS[@]}"; do + LOOP_COUNT=$((LOOP_COUNT + 1)) + [[ $((LOOP_COUNT % 100)) -eq 0 ]] && log_info "Processed $LOOP_COUNT repos..." + IFS='|' read -r repo npub <<< "$key" + + prod_cat="${PROD_CAT[$key]:-missing}" + archive_cat="${ARCHIVE_CAT[$key]:-missing}" + prod_status=$(cat_to_status "$prod_cat") + archive_status=$(cat_to_status "$archive_cat") + + # Decision tree implementation + + # 1. Is there a kind 5 deletion event? + if [[ -n "${DELETED[$key]:-}" ]]; then + context=$(get_context "$key" "$prod_status" "$archive_status") + READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | deleted by user") + COUNTS[ready_deleted]=$((COUNTS[ready_deleted] + 1)) + continue fi - # --- MANUAL INVESTIGATION: In archive but not prod --- - if [[ -f "$comparison_dir/in-archive-not-prod.txt" ]]; then - while IFS= read -r line; do - [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue - - repo=$(echo "$line" | extract_repo) - npub=$(echo "$line" | extract_npub) - key="${repo}|${npub}" - - if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then - continue + # 2. What is the prod status? + case "$prod_cat" in + missing) + # Not in prod + if [[ "$archive_cat" != "missing" ]]; then + # In archive but not in prod -> no action (archive-only) + context=$(get_context "$key" "$prod_status" "$archive_status") + READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive-only (not in prod)") + COUNTS[ready_archive_only]=$((COUNTS[ready_archive_only] + 1)) + elif [[ -n "${PURGATORY[$key]:-}" ]]; then + # Purgatory only, not in prod -> no action + context="purgatory-expired" + READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | purgatory-only (not in prod)") + COUNTS[ready_not_in_prod]=$((COUNTS[ready_not_in_prod] + 1)) fi + # Otherwise skip (not a real repo - no data anywhere) + ;; - archive_status=$(echo "$line" | grep -oP 'archive=\K[^ ]+' || echo "unknown") + cat2) + # Empty in prod -> ALWAYS no action required + context=$(get_context "$key" "$prod_status" "$archive_status") + READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | empty in prod (user never pushed)") + COUNTS[ready_empty_prod]=$((COUNTS[ready_empty_prod] + 1)) + ;; - # Check if it was deleted - if grep -qF "$key" "$tmp_dir/all_deletions.txt" 2>/dev/null; then - echo "$repo | $npub | in archive not prod, deletion exists | verify deletion was intentional" >> "$manual_inv" + cat1) + # Complete in prod + if [[ "$archive_cat" == "cat1" ]]; then + # Complete in both -> no action + context=$(get_context "$key" "$prod_status" "$archive_status") + READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in both") + COUNTS[ready_complete_both]=$((COUNTS[ready_complete_both] + 1)) else - echo "$repo | $npub | in archive ($archive_status) but not in prod | may be new announcement or deleted from prod" >> "$manual_inv" + # Complete in prod, missing/incomplete in archive + # Check for parse failure - if so, needs manual review + if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then + context=$(get_context "$key" "$prod_status" "$archive_status") + REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in prod with parse failure") + COUNTS[review_parse_failure]=$((COUNTS[review_parse_failure] + 1)) + else + # Needs resync - include purgatory context + context=$(get_context "$key" "$prod_status" "$archive_status") + if [[ "$archive_cat" == "missing" ]]; then + RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync to archive") + COUNTS[resync_missing_archive]=$((COUNTS[resync_missing_archive] + 1)) + else + RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync (archive incomplete)") + COUNTS[resync_incomplete_archive]=$((COUNTS[resync_incomplete_archive] + 1)) + fi + fi fi - echo "$key" >> "$tmp_dir/processed.txt" - ((count_manual++)) || true - done < "$comparison_dir/in-archive-not-prod.txt" - fi - - # --- ACTION REQUIRED: Parse failures not yet processed --- - while IFS='|' read -r repo npub; do - [[ -z "$repo" ]] && continue - key="${repo}|${npub}" - - if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then - continue - fi - - echo "$repo | $npub | parse failure logged | fix event format or request user to re-announce" >> "$action_req" - echo "$key" >> "$tmp_dir/processed.txt" - ((count_action++)) || true - done < "$tmp_dir/parse_failures.txt" - - # --- MANUAL INVESTIGATION: Prod category 3/4 not yet processed --- - for cat_file in "$tmp_dir/prod_cat3.txt" "$tmp_dir/prod_cat4.txt"; do - [[ ! -f "$cat_file" ]] && continue - cat_name=$(basename "$cat_file" .txt | sed 's/prod_//') - while IFS='|' read -r repo npub; do - [[ -z "$repo" ]] && continue - key="${repo}|${npub}" + ;; - if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then - continue - fi + cat3) + # Partial in prod -> ALWAYS manual investigation + context=$(get_context "$key" "$prod_status" "$archive_status") + REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | partial in prod (investigate git data)") + COUNTS[review_partial_prod]=$((COUNTS[review_partial_prod] + 1)) + ;; - if [[ "$cat_name" == "cat3" ]]; then - echo "$repo | $npub | partial match in prod, not in comparison results | investigate git ref mismatch" >> "$manual_inv" - else - echo "$repo | $npub | no match in prod (git exists but refs don't match) | investigate git data corruption" >> "$manual_inv" - fi - echo "$key" >> "$tmp_dir/processed.txt" - ((count_manual++)) || true - done < "$cat_file" + cat4) + # No-match in prod -> ALWAYS manual investigation + context=$(get_context "$key" "$prod_status" "$archive_status") + REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | no-match in prod (git corruption)") + COUNTS[review_nomatch_prod]=$((COUNTS[review_nomatch_prod] + 1)) + ;; + esac +done + +# ============================================================================ +# Phase 4: Write output files +# ============================================================================ + +log_info "Writing output files..." + +TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%S+00:00") + +# Write ready-for-migration.txt +{ + echo "# Ready for Migration - No action required" + echo "# Generated: $TIMESTAMP" + echo "# Format: repo | npub | prod_status | archive_status | context | reason" + echo "#" + for line in "${READY_LINES[@]}"; do + echo "$line" done - - # ========================================================================= - # STEP 5: Count final results - # ========================================================================= - count_no_action=$(count_lines "$no_action") - count_action=$(count_lines "$action_req") - count_manual=$(count_lines "$manual_inv") - - # Ensure counts are valid integers - count_no_action=${count_no_action:-0} - count_action=${count_action:-0} - count_manual=${count_manual:-0} - - local total=$((count_no_action + count_action + count_manual)) - - # Handle division by zero - if [[ $total -eq 0 ]]; then - total=1 # Avoid division by zero in percentage calculations - log_warn "No repos were classified. Check input files." - fi - - # ========================================================================= - # STEP 6: Generate summary - # ========================================================================= - log_info "Generating summary..." - - cat > "$summary" << EOF -# Migration Classification Summary -Generated: $(date -Iseconds) -Analysis Directory: $analysis_dir - -## Overview - -| Category | Count | Percentage | -|----------|-------|------------| -| No Action Required | $count_no_action | $(awk "BEGIN {printf \"%.1f\", ($count_no_action/$total)*100}")% | -| Action Required | $count_action | $(awk "BEGIN {printf \"%.1f\", ($count_action/$total)*100}")% | -| Manual Investigation | $count_manual | $(awk "BEGIN {printf \"%.1f\", ($count_manual/$total)*100}")% | -| **Total** | **$total** | **100%** | - -## No Action Required ($count_no_action repos) - -These repositories are ready for migration or don't need migration: - -EOF - - # Breakdown of no-action reasons - echo "| Reason | Count |" >> "$summary" - echo "|--------|-------|" >> "$summary" - grep -v '^#' "$no_action" 2>/dev/null | awk -F' \\| ' '{print $3}' | sort | uniq -c | sort -rn > "$tmp_dir/no_action_breakdown.txt" || true - while read -r cnt reason; do - echo "| $reason | $cnt |" >> "$summary" - done < "$tmp_dir/no_action_breakdown.txt" - - cat >> "$summary" << EOF - -## Action Required ($count_action repos) - -These repositories need intervention before migration: - -EOF - - # Breakdown of action reasons - echo "| Reason | Count | Suggested Action |" >> "$summary" - echo "|--------|-------|------------------|" >> "$summary" - grep -v '^#' "$action_req" 2>/dev/null | awk -F' \\| ' '{print $3 "|" $4}' | sort | uniq -c | sort -rn > "$tmp_dir/action_breakdown.txt" || true - while read -r cnt reason_action; do - reason=$(echo "$reason_action" | cut -d'|' -f1) - action=$(echo "$reason_action" | cut -d'|' -f2) - echo "| $reason | $cnt | $action |" >> "$summary" - done < "$tmp_dir/action_breakdown.txt" - - cat >> "$summary" << EOF - -## Manual Investigation ($count_manual repos) - -These repositories have conflicting or unexpected states requiring human review: - -EOF - - # Breakdown of manual investigation reasons - echo "| Reason | Count |" >> "$summary" - echo "|--------|-------|" >> "$summary" - grep -v '^#' "$manual_inv" 2>/dev/null | awk -F' \\| ' '{print $3}' | sort | uniq -c | sort -rn > "$tmp_dir/manual_breakdown.txt" || true - while read -r cnt reason; do - echo "| $reason | $cnt |" >> "$summary" - done < "$tmp_dir/manual_breakdown.txt" - - # Pre-compute counts from temp files before they might be cleaned up - local prod_del_count archive_del_count - local prod_cat1_count prod_cat2_count prod_cat3_count prod_cat4_count - local archive_cat1_count archive_cat2_count archive_cat3_count archive_cat4_count - local parse_fail_count purgatory_count - - prod_del_count=$(wc -l < "$tmp_dir/prod_deletions.txt" 2>/dev/null | tr -d ' ') || prod_del_count=0 - archive_del_count=$(wc -l < "$tmp_dir/archive_deletions.txt" 2>/dev/null | tr -d ' ') || archive_del_count=0 - prod_cat1_count=$(wc -l < "$tmp_dir/prod_cat1.txt" 2>/dev/null | tr -d ' ') || prod_cat1_count=0 - prod_cat2_count=$(wc -l < "$tmp_dir/prod_cat2.txt" 2>/dev/null | tr -d ' ') || prod_cat2_count=0 - prod_cat3_count=$(wc -l < "$tmp_dir/prod_cat3.txt" 2>/dev/null | tr -d ' ') || prod_cat3_count=0 - prod_cat4_count=$(wc -l < "$tmp_dir/prod_cat4.txt" 2>/dev/null | tr -d ' ') || prod_cat4_count=0 - archive_cat1_count=$(wc -l < "$tmp_dir/archive_cat1.txt" 2>/dev/null | tr -d ' ') || archive_cat1_count=0 - archive_cat2_count=$(wc -l < "$tmp_dir/archive_cat2.txt" 2>/dev/null | tr -d ' ') || archive_cat2_count=0 - archive_cat3_count=$(wc -l < "$tmp_dir/archive_cat3.txt" 2>/dev/null | tr -d ' ') || archive_cat3_count=0 - archive_cat4_count=$(wc -l < "$tmp_dir/archive_cat4.txt" 2>/dev/null | tr -d ' ') || archive_cat4_count=0 - parse_fail_count=$(wc -l < "$tmp_dir/parse_failures.txt" 2>/dev/null | tr -d ' ') || parse_fail_count=0 - purgatory_count=$(wc -l < "$tmp_dir/purgatory_expired.txt" 2>/dev/null | tr -d ' ') || purgatory_count=0 - - cat >> "$summary" << EOF - -## Input Data Summary - -### Phase 1 (Events) -- Prod deletions: $prod_del_count -- Archive deletions: $archive_del_count - -### Phase 3 (Categories) -**Prod:** -- Category 1 (complete): $prod_cat1_count -- Category 2 (empty): $prod_cat2_count -- Category 3 (partial): $prod_cat3_count -- Category 4 (no match): $prod_cat4_count - -**Archive:** -- Category 1 (complete): $archive_cat1_count -- Category 2 (empty): $archive_cat2_count -- Category 3 (partial): $archive_cat3_count -- Category 4 (no match): $archive_cat4_count - -### Phase 4 (Logs) -- Parse failures: $parse_fail_count -- Purgatory expired: $purgatory_count +} > "$READY_FILE" + +# Write needs-resync.txt +{ + echo "# Needs Re-sync - Action required" + echo "# Generated: $TIMESTAMP" + echo "# Format: repo | npub | prod_status | archive_status | context | action" + echo "#" + echo "# Context meanings:" + echo "# purgatory-expired = archive tried to sync but failed (30min timeout)" + echo "# none = archive never tried or announcement missing" + echo "#" + for line in "${RESYNC_LINES[@]}"; do + echo "$line" + done +} > "$RESYNC_FILE" + +# Write manual-review.txt +{ + echo "# Manual Review Required - Investigation needed" + echo "# Generated: $TIMESTAMP" + echo "# Format: repo | npub | prod_status | archive_status | context | reason" + echo "#" + for line in "${REVIEW_LINES[@]}"; do + echo "$line" + done +} > "$REVIEW_FILE" -## Recommended Next Steps +# ============================================================================ +# Phase 5: Generate summary +# ============================================================================ -1. **Review action-required.txt** - Address these repos before migration -2. **Review manual-investigation.txt** - Investigate unusual states -3. **Verify no-action-required.txt** - Spot-check a few repos to confirm -4. **Plan migration window** - Schedule cutover when action items are resolved +log_info "Generating summary..." -## Output Files +TOTAL_READY="${#READY_LINES[@]}" +TOTAL_RESYNC="${#RESYNC_LINES[@]}" +TOTAL_REVIEW="${#REVIEW_LINES[@]}" +TOTAL=$((TOTAL_READY + TOTAL_RESYNC + TOTAL_REVIEW)) -- \`results/no-action-required.txt\` - $count_no_action repos ready for migration -- \`results/action-required.txt\` - $count_action repos needing intervention -- \`results/manual-investigation.txt\` - $count_manual repos needing human review -- \`results/summary.txt\` - This summary file -EOF +# Calculate percentages +if [[ $TOTAL -gt 0 ]]; then + PCT_READY=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_READY / $TOTAL) * 100}") + PCT_RESYNC=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_RESYNC / $TOTAL) * 100}") + PCT_REVIEW=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_REVIEW / $TOTAL) * 100}") +else + PCT_READY="0.0" + PCT_RESYNC="0.0" + PCT_REVIEW="0.0" +fi - # ========================================================================= - # STEP 7: Display results - # ========================================================================= +{ + echo "# Migration Classification Summary" + echo "Generated: $TIMESTAMP" + echo "Analysis Directory: $ANALYSIS_DIR" echo "" - log_info "=== Classification Complete ===" + echo "## Overview" echo "" - log_success "No Action Required: $count_no_action repos" - log_warn "Action Required: $count_action repos" - log_error "Manual Investigation: $count_manual repos" + echo "| Category | Count | Percentage |" + echo "|----------|-------|------------|" + echo "| Ready for Migration | $TOTAL_READY | $PCT_READY% |" + echo "| Needs Re-sync | $TOTAL_RESYNC | $PCT_RESYNC% |" + echo "| Manual Review | $TOTAL_REVIEW | $PCT_REVIEW% |" + echo "| **Total** | **$TOTAL** | **100%** |" echo "" - log_info "Total: $total repos classified" + echo "## Tier 1: Ready for Migration ($TOTAL_READY repos)" echo "" - log_info "Output files:" - echo " $no_action" - echo " $action_req" - echo " $manual_inv" - echo " $summary" + echo "These repositories are ready for migration or don't need migration:" echo "" - - # Show top action items - if [[ $count_action -gt 0 ]]; then - log_info "Top action items:" - grep -v '^#' "$action_req" 2>/dev/null | awk -F' \\| ' '{print $3}' | sort | uniq -c | sort -rn | head -5 | while read -r cnt reason; do - echo " - $reason: $cnt repos" - done - echo "" - fi - - # Show top investigation items - if [[ $count_manual -gt 0 ]]; then - log_info "Top investigation items:" - grep -v '^#' "$manual_inv" 2>/dev/null | awk -F' \\| ' '{print $3}' | sort | uniq -c | sort -rn | head -5 | while read -r cnt reason; do - echo " - $reason: $cnt repos" - done - echo "" - fi - - log_info "See $summary for full details and recommended next steps." -} - -main "$@" + echo "| Reason | Count |" + echo "|--------|-------|" + echo "| complete in both prod and archive | ${COUNTS[ready_complete_both]} |" + echo "| deleted by user | ${COUNTS[ready_deleted]} |" + echo "| empty in prod (user never pushed) | ${COUNTS[ready_empty_prod]} |" + echo "| archive-only (not in prod) | ${COUNTS[ready_archive_only]} |" + echo "| purgatory-only (not in prod) | ${COUNTS[ready_not_in_prod]} |" + echo "" + echo "## Tier 2: Needs Re-sync ($TOTAL_RESYNC repos)" + echo "" + echo "These repositories need re-sync to archive before migration:" + echo "" + echo "| Reason | Count | Action |" + echo "|--------|-------|--------|" + echo "| complete in prod, missing from archive | ${COUNTS[resync_missing_archive]} | trigger re-sync |" + echo "| complete in prod, incomplete in archive | ${COUNTS[resync_incomplete_archive]} | trigger re-sync |" + echo "" + echo "### Purgatory Context" + echo "" + echo "Repos in needs-resync.txt include purgatory context:" + echo "- **purgatory-expired**: Archive tried to sync but failed (30min timeout)" + echo "- **none**: Archive never tried or announcement missing" + echo "" + echo "## Tier 3: Manual Review ($TOTAL_REVIEW repos)" + echo "" + echo "These repositories require human investigation:" + echo "" + echo "| Reason | Count |" + echo "|--------|-------|" + echo "| partial in prod (cat3) | ${COUNTS[review_partial_prod]} |" + echo "| no-match in prod (cat4) | ${COUNTS[review_nomatch_prod]} |" + echo "| complete in prod with parse failure | ${COUNTS[review_parse_failure]} |" + echo "" + echo "## Input Data Summary" + echo "" + echo "### Prod Categories" + echo "- Category 1 (complete): $(wc -l < "$PROD_DIR/category1-complete-match.txt")" + echo "- Category 2 (empty): $(wc -l < "$PROD_DIR/category2-empty-blank.txt")" + echo "- Category 3 (partial): $(wc -l < "$PROD_DIR/category3-partial-match.txt")" + echo "- Category 4 (no match): $(wc -l < "$PROD_DIR/category4-no-match.txt")" + echo "" + echo "### Archive Categories" + echo "- Category 1 (complete): $(wc -l < "$ARCHIVE_DIR/category1-complete-match.txt")" + echo "- Category 2 (empty): $(wc -l < "$ARCHIVE_DIR/category2-empty-blank.txt")" + echo "- Category 3 (partial): $(wc -l < "$ARCHIVE_DIR/category3-partial-match.txt")" + echo "- Category 4 (no match): $(wc -l < "$ARCHIVE_DIR/category4-no-match.txt")" + echo "" + echo "### Logs" + echo "- Parse failures: $(grep -c -v '^#' "$LOGS_DIR/parse-failures.txt" 2>/dev/null || echo 0)" + echo "- Purgatory expired: $(grep -c -v '^#' "$LOGS_DIR/purgatory-expired.txt" 2>/dev/null || echo 0)" + echo "" + echo "## Output Files" + echo "" + echo "- \`results/ready-for-migration.txt\` - $TOTAL_READY repos ready for migration" + echo "- \`results/needs-resync.txt\` - $TOTAL_RESYNC repos needing re-sync" + echo "- \`results/manual-review.txt\` - $TOTAL_REVIEW repos needing investigation" + echo "- \`results/summary.txt\` - This summary file" + echo "" + echo "## Recommended Next Steps" + echo "" + echo "1. **Review needs-resync.txt** - Trigger re-sync for these repos" + echo "2. **Review manual-review.txt** - Investigate unusual states" + echo "3. **Verify ready-for-migration.txt** - Spot-check a few repos" + echo "4. **Plan migration window** - Schedule cutover when action items resolved" +} > "$SUMMARY_FILE" + +# ============================================================================ +# Phase 6: Print summary to console +# ============================================================================ + +echo "" +log_success "Classification complete!" +echo "" +echo "=== Summary ===" +echo "Ready for Migration: $TOTAL_READY ($PCT_READY%)" +echo " - Complete in both: ${COUNTS[ready_complete_both]}" +echo " - Deleted by user: ${COUNTS[ready_deleted]}" +echo " - Empty in prod: ${COUNTS[ready_empty_prod]}" +echo " - Archive-only: ${COUNTS[ready_archive_only]}" +echo " - Purgatory-only: ${COUNTS[ready_not_in_prod]}" +echo "" +echo "Needs Re-sync: $TOTAL_RESYNC ($PCT_RESYNC%)" +echo " - Missing from archive: ${COUNTS[resync_missing_archive]}" +echo " - Incomplete in archive: ${COUNTS[resync_incomplete_archive]}" +echo "" +echo "Manual Review: $TOTAL_REVIEW ($PCT_REVIEW%)" +echo " - Partial in prod: ${COUNTS[review_partial_prod]}" +echo " - No-match in prod: ${COUNTS[review_nomatch_prod]}" +echo " - Parse failures: ${COUNTS[review_parse_failure]}" +echo "" +echo "Total: $TOTAL repos" +echo "" +echo "Output files:" +echo " $READY_FILE" +echo " $RESYNC_FILE" +echo " $REVIEW_FILE" +echo " $SUMMARY_FILE" -- cgit v1.2.3 From d6480568cd0b1de692c587b6ab9fffea2909cc72 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Mon, 26 Jan 2026 12:26:37 +0000 Subject: Remove duplicate classification script from scripts/ --- scripts/40-classify-actions.sh | 588 ----------------------------------------- 1 file changed, 588 deletions(-) delete mode 100755 scripts/40-classify-actions.sh diff --git a/scripts/40-classify-actions.sh b/scripts/40-classify-actions.sh deleted file mode 100755 index 021a2da..0000000 --- a/scripts/40-classify-actions.sh +++ /dev/null @@ -1,588 +0,0 @@ -#!/usr/bin/env bash -# -# 40-classify-actions.sh - Classify repos by migration action required -# -# Implements the redesigned classification system (Option B) with user feedback: -# -# Tier 1: No Action Required (ready-for-migration.txt) -# - Complete in both (prod=cat1, archive=cat1) -# - Deleted by user (kind 5 event) -# - Empty in prod (prod=cat2, any archive status) -# - Archive-only (archive=any, prod=missing) -# - Not in prod (purgatory-only, prod=missing) -# -# Tier 2: Action Required (needs-resync.txt) -# - Complete in prod, missing from archive (with purgatory context) -# - Complete in prod, incomplete in archive (with purgatory context) -# -# Tier 3: Manual Investigation (manual-review.txt) -# - Partial in prod (prod=cat3) -# - No-match in prod (prod=cat4) -# - Parse failures -# - Conflicting states -# -# Usage: ./40-classify-actions.sh -# -# Output format: repo | npub | prod_status | archive_status | context | action -# - -set -euo pipefail - -# Colors for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' # No Color - -log_info() { echo -e "${BLUE}[INFO]${NC} $*"; } -log_success() { echo -e "${GREEN}[OK]${NC} $*"; } -log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } -log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; } - -# Check arguments -if [[ $# -lt 1 ]]; then - echo "Usage: $0 " - echo "Example: $0 work/migration-analysis-20260123-200701" - exit 1 -fi - -ANALYSIS_DIR="$1" - -# Validate analysis directory -if [[ ! -d "$ANALYSIS_DIR" ]]; then - log_error "Analysis directory not found: $ANALYSIS_DIR" - exit 1 -fi - -# Define paths -PROD_DIR="$ANALYSIS_DIR/prod" -ARCHIVE_DIR="$ANALYSIS_DIR/archive" -COMPARISON_DIR="$ANALYSIS_DIR/comparison" -LOGS_DIR="$ANALYSIS_DIR/logs" -RESULTS_DIR="$ANALYSIS_DIR/results" - -# Validate required directories -for dir in "$PROD_DIR" "$ARCHIVE_DIR" "$COMPARISON_DIR" "$LOGS_DIR"; do - if [[ ! -d "$dir" ]]; then - log_error "Required directory not found: $dir" - exit 1 - fi -done - -# Create results directory -mkdir -p "$RESULTS_DIR" - -# Output files -READY_FILE="$RESULTS_DIR/ready-for-migration.txt" -RESYNC_FILE="$RESULTS_DIR/needs-resync.txt" -REVIEW_FILE="$RESULTS_DIR/manual-review.txt" -SUMMARY_FILE="$RESULTS_DIR/summary.txt" - -# Temporary files for processing -TMP_DIR=$(mktemp -d) -trap 'rm -rf "$TMP_DIR"' EXIT - -log_info "Starting classification with revised system (Option B)" -log_info "Analysis directory: $ANALYSIS_DIR" - -# ============================================================================ -# Phase 1: Build lookup tables from source data -# ============================================================================ - -log_info "Building lookup tables..." - -# Build prod category lookup: repo|npub -> category -declare -A PROD_CAT -while IFS='|' read -r repo npub rest; do - repo=$(echo "$repo" | xargs) - npub=$(echo "$npub" | xargs) - [[ -z "$repo" || -z "$npub" ]] && continue - PROD_CAT["$repo|$npub"]="cat1" -done < "$PROD_DIR/category1-complete-match.txt" - -while IFS='|' read -r repo npub rest; do - repo=$(echo "$repo" | xargs) - npub=$(echo "$npub" | xargs) - [[ -z "$repo" || -z "$npub" ]] && continue - PROD_CAT["$repo|$npub"]="cat2" -done < "$PROD_DIR/category2-empty-blank.txt" - -while IFS='|' read -r repo npub rest; do - repo=$(echo "$repo" | xargs) - npub=$(echo "$npub" | xargs) - [[ -z "$repo" || -z "$npub" ]] && continue - PROD_CAT["$repo|$npub"]="cat3" -done < "$PROD_DIR/category3-partial-match.txt" - -while IFS='|' read -r repo npub rest; do - repo=$(echo "$repo" | xargs) - npub=$(echo "$npub" | xargs) - [[ -z "$repo" || -z "$npub" ]] && continue - PROD_CAT["$repo|$npub"]="cat4" -done < "$PROD_DIR/category4-no-match.txt" - -log_info "Loaded ${#PROD_CAT[@]} prod entries" - -# Build archive category lookup: repo|npub -> category -declare -A ARCHIVE_CAT -while IFS='|' read -r repo npub rest; do - repo=$(echo "$repo" | xargs) - npub=$(echo "$npub" | xargs) - [[ -z "$repo" || -z "$npub" ]] && continue - ARCHIVE_CAT["$repo|$npub"]="cat1" -done < "$ARCHIVE_DIR/category1-complete-match.txt" - -while IFS='|' read -r repo npub rest; do - repo=$(echo "$repo" | xargs) - npub=$(echo "$npub" | xargs) - [[ -z "$repo" || -z "$npub" ]] && continue - ARCHIVE_CAT["$repo|$npub"]="cat2" -done < "$ARCHIVE_DIR/category2-empty-blank.txt" - -while IFS='|' read -r repo npub rest; do - repo=$(echo "$repo" | xargs) - npub=$(echo "$npub" | xargs) - [[ -z "$repo" || -z "$npub" ]] && continue - ARCHIVE_CAT["$repo|$npub"]="cat3" -done < "$ARCHIVE_DIR/category3-partial-match.txt" - -while IFS='|' read -r repo npub rest; do - repo=$(echo "$repo" | xargs) - npub=$(echo "$npub" | xargs) - [[ -z "$repo" || -z "$npub" ]] && continue - ARCHIVE_CAT["$repo|$npub"]="cat4" -done < "$ARCHIVE_DIR/category4-no-match.txt" - -log_info "Loaded ${#ARCHIVE_CAT[@]} archive entries" - -# Build purgatory lookup: repo|npub -> 1 (if purgatory expired) -declare -A PURGATORY -if [[ -f "$LOGS_DIR/purgatory-expired.txt" ]]; then - while IFS=$'\t' read -r repo npub timestamp reason; do - # Skip comments and empty lines - [[ "$repo" =~ ^# ]] && continue - [[ -z "$repo" || -z "$npub" ]] && continue - PURGATORY["$repo|$npub"]=1 - done < "$LOGS_DIR/purgatory-expired.txt" -fi -log_info "Loaded ${#PURGATORY[@]} purgatory entries" - -# Build parse failure lookup: repo|npub -> 1 (if parse failure logged) -# Parse failures file format: event_idkindreasonreponpub -declare -A PARSE_FAIL -if [[ -f "$LOGS_DIR/parse-failures.txt" ]]; then - while IFS=$'\t' read -r event_id kind reason repo npub; do - # Skip comments and empty lines - [[ "$event_id" =~ ^# ]] && continue - [[ -z "$repo" || -z "$npub" ]] && continue - PARSE_FAIL["$repo|$npub"]=1 - done < "$LOGS_DIR/parse-failures.txt" -fi -log_info "Loaded ${#PARSE_FAIL[@]} parse failure entries" - -# Build deletion lookup: repo|npub -> 1 (if kind 5 deletion event) -# Deletions are in NDJSON format with "a" tags like "30617:pubkey_hex:repo" -# We need to convert hex pubkeys to npub format using nak -declare -A DELETED - -# Helper function to process deletion file (NDJSON format) -# Extracts unique pubkey_hex:repo pairs and converts to npub -process_deletions() { - local file="$1" - [[ ! -f "$file" ]] && return - - # Extract unique pubkey_hex|repo pairs from NDJSON - # Each line is a JSON object, extract "a" tags - local pairs - pairs=$(jq -r '.tags[] | select(.[0] == "a") | .[1]' "$file" 2>/dev/null | \ - sed 's/^30617://' | awk -F: '{print $1 "|" $2}' | sort -u) - - # Get unique hex pubkeys for batch conversion - local hex_keys - hex_keys=$(echo "$pairs" | cut -d'|' -f1 | sort -u) - - # Build hex->npub lookup via batch nak call - declare -A HEX_TO_NPUB - while read -r hex; do - [[ -z "$hex" ]] && continue - local npub - npub=$(nak encode npub "$hex" 2>/dev/null || echo "") - [[ -n "$npub" ]] && HEX_TO_NPUB["$hex"]="$npub" - done <<< "$hex_keys" - - # Now process pairs with cached npub values - while IFS='|' read -r pubkey_hex repo; do - [[ -z "$repo" || -z "$pubkey_hex" ]] && continue - local npub="${HEX_TO_NPUB[$pubkey_hex]:-}" - [[ -z "$npub" ]] && continue - DELETED["$repo|$npub"]=1 - done <<< "$pairs" -} - -# Process prod and archive deletions -process_deletions "$PROD_DIR/raw/deletions.json" -process_deletions "$ARCHIVE_DIR/raw/deletions.json" -log_info "Loaded ${#DELETED[@]} deletion entries" - -# ============================================================================ -# Phase 2: Build unique repo list from all sources -# ============================================================================ - -log_info "Building unique repo list..." - -declare -A ALL_REPOS -for key in "${!PROD_CAT[@]}"; do - ALL_REPOS["$key"]=1 -done -for key in "${!ARCHIVE_CAT[@]}"; do - ALL_REPOS["$key"]=1 -done -for key in "${!PURGATORY[@]}"; do - ALL_REPOS["$key"]=1 -done - -log_info "Total unique repos: ${#ALL_REPOS[@]}" - -# ============================================================================ -# Phase 3: Classify each repo according to revised decision tree -# ============================================================================ - -log_info "Classifying repos..." - -# Counters for summary -declare -A COUNTS -COUNTS[ready_complete_both]=0 -COUNTS[ready_deleted]=0 -COUNTS[ready_empty_prod]=0 -COUNTS[ready_archive_only]=0 -COUNTS[ready_not_in_prod]=0 -COUNTS[resync_missing_archive]=0 -COUNTS[resync_incomplete_archive]=0 -COUNTS[review_partial_prod]=0 -COUNTS[review_nomatch_prod]=0 -COUNTS[review_parse_failure]=0 -COUNTS[review_conflicting]=0 - -# Output arrays -declare -a READY_LINES -declare -a RESYNC_LINES -declare -a REVIEW_LINES - -# Helper function to get context string -get_context() { - local key="$1" - local prod_status="$2" - local archive_status="$3" - local context="" - - # Check purgatory - if [[ -n "${PURGATORY[$key]:-}" ]]; then - context="purgatory-expired" - fi - - # Check parse failure - if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then - if [[ -n "$context" ]]; then - context="$context, parse-failure" - else - context="parse-failure" - fi - fi - - # Add archive context for unexpected states - if [[ "$prod_status" == "empty" && "$archive_status" != "missing" && "$archive_status" != "empty" ]]; then - if [[ -n "$context" ]]; then - context="$context, archive-has-data" - else - context="archive-has-data" - fi - fi - - echo "${context:-none}" -} - -# Helper to convert category to human-readable status -cat_to_status() { - case "$1" in - cat1) echo "complete" ;; - cat2) echo "empty" ;; - cat3) echo "partial" ;; - cat4) echo "no-match" ;; - missing) echo "missing" ;; - *) echo "$1" ;; - esac -} - -LOOP_COUNT=0 -for key in "${!ALL_REPOS[@]}"; do - LOOP_COUNT=$((LOOP_COUNT + 1)) - [[ $((LOOP_COUNT % 100)) -eq 0 ]] && log_info "Processed $LOOP_COUNT repos..." - IFS='|' read -r repo npub <<< "$key" - - prod_cat="${PROD_CAT[$key]:-missing}" - archive_cat="${ARCHIVE_CAT[$key]:-missing}" - prod_status=$(cat_to_status "$prod_cat") - archive_status=$(cat_to_status "$archive_cat") - - # Decision tree implementation - - # 1. Is there a kind 5 deletion event? - if [[ -n "${DELETED[$key]:-}" ]]; then - context=$(get_context "$key" "$prod_status" "$archive_status") - READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | deleted by user") - COUNTS[ready_deleted]=$((COUNTS[ready_deleted] + 1)) - continue - fi - - # 2. What is the prod status? - case "$prod_cat" in - missing) - # Not in prod - if [[ "$archive_cat" != "missing" ]]; then - # In archive but not in prod -> no action (archive-only) - context=$(get_context "$key" "$prod_status" "$archive_status") - READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive-only (not in prod)") - COUNTS[ready_archive_only]=$((COUNTS[ready_archive_only] + 1)) - elif [[ -n "${PURGATORY[$key]:-}" ]]; then - # Purgatory only, not in prod -> no action - context="purgatory-expired" - READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | purgatory-only (not in prod)") - COUNTS[ready_not_in_prod]=$((COUNTS[ready_not_in_prod] + 1)) - fi - # Otherwise skip (not a real repo - no data anywhere) - ;; - - cat2) - # Empty in prod -> ALWAYS no action required - context=$(get_context "$key" "$prod_status" "$archive_status") - READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | empty in prod (user never pushed)") - COUNTS[ready_empty_prod]=$((COUNTS[ready_empty_prod] + 1)) - ;; - - cat1) - # Complete in prod - if [[ "$archive_cat" == "cat1" ]]; then - # Complete in both -> no action - context=$(get_context "$key" "$prod_status" "$archive_status") - READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in both") - COUNTS[ready_complete_both]=$((COUNTS[ready_complete_both] + 1)) - else - # Complete in prod, missing/incomplete in archive - # Check for parse failure - if so, needs manual review - if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then - context=$(get_context "$key" "$prod_status" "$archive_status") - REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in prod with parse failure") - COUNTS[review_parse_failure]=$((COUNTS[review_parse_failure] + 1)) - else - # Needs resync - include purgatory context - context=$(get_context "$key" "$prod_status" "$archive_status") - if [[ "$archive_cat" == "missing" ]]; then - RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync to archive") - COUNTS[resync_missing_archive]=$((COUNTS[resync_missing_archive] + 1)) - else - RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync (archive incomplete)") - COUNTS[resync_incomplete_archive]=$((COUNTS[resync_incomplete_archive] + 1)) - fi - fi - fi - ;; - - cat3) - # Partial in prod -> ALWAYS manual investigation - context=$(get_context "$key" "$prod_status" "$archive_status") - REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | partial in prod (investigate git data)") - COUNTS[review_partial_prod]=$((COUNTS[review_partial_prod] + 1)) - ;; - - cat4) - # No-match in prod -> ALWAYS manual investigation - context=$(get_context "$key" "$prod_status" "$archive_status") - REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | no-match in prod (git corruption)") - COUNTS[review_nomatch_prod]=$((COUNTS[review_nomatch_prod] + 1)) - ;; - esac -done - -# ============================================================================ -# Phase 4: Write output files -# ============================================================================ - -log_info "Writing output files..." - -TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%S+00:00") - -# Write ready-for-migration.txt -{ - echo "# Ready for Migration - No action required" - echo "# Generated: $TIMESTAMP" - echo "# Format: repo | npub | prod_status | archive_status | context | reason" - echo "#" - for line in "${READY_LINES[@]}"; do - echo "$line" - done -} > "$READY_FILE" - -# Write needs-resync.txt -{ - echo "# Needs Re-sync - Action required" - echo "# Generated: $TIMESTAMP" - echo "# Format: repo | npub | prod_status | archive_status | context | action" - echo "#" - echo "# Context meanings:" - echo "# purgatory-expired = archive tried to sync but failed (30min timeout)" - echo "# none = archive never tried or announcement missing" - echo "#" - for line in "${RESYNC_LINES[@]}"; do - echo "$line" - done -} > "$RESYNC_FILE" - -# Write manual-review.txt -{ - echo "# Manual Review Required - Investigation needed" - echo "# Generated: $TIMESTAMP" - echo "# Format: repo | npub | prod_status | archive_status | context | reason" - echo "#" - for line in "${REVIEW_LINES[@]}"; do - echo "$line" - done -} > "$REVIEW_FILE" - -# ============================================================================ -# Phase 5: Generate summary -# ============================================================================ - -log_info "Generating summary..." - -TOTAL_READY=${#READY_LINES[@]} -TOTAL_RESYNC=${#RESYNC_LINES[@]} -TOTAL_REVIEW=${#REVIEW_LINES[@]} -TOTAL=$((TOTAL_READY + TOTAL_RESYNC + TOTAL_REVIEW)) - -# Calculate percentages -if [[ $TOTAL -gt 0 ]]; then - PCT_READY=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_READY / $TOTAL) * 100}") - PCT_RESYNC=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_RESYNC / $TOTAL) * 100}") - PCT_REVIEW=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_REVIEW / $TOTAL) * 100}") -else - PCT_READY="0.0" - PCT_RESYNC="0.0" - PCT_REVIEW="0.0" -fi - -{ - echo "# Migration Classification Summary" - echo "Generated: $TIMESTAMP" - echo "Analysis Directory: $ANALYSIS_DIR" - echo "" - echo "## Overview" - echo "" - echo "| Category | Count | Percentage |" - echo "|----------|-------|------------|" - echo "| Ready for Migration | $TOTAL_READY | $PCT_READY% |" - echo "| Needs Re-sync | $TOTAL_RESYNC | $PCT_RESYNC% |" - echo "| Manual Review | $TOTAL_REVIEW | $PCT_REVIEW% |" - echo "| **Total** | **$TOTAL** | **100%** |" - echo "" - echo "## Tier 1: Ready for Migration ($TOTAL_READY repos)" - echo "" - echo "These repositories are ready for migration or don't need migration:" - echo "" - echo "| Reason | Count |" - echo "|--------|-------|" - echo "| complete in both prod and archive | ${COUNTS[ready_complete_both]} |" - echo "| deleted by user | ${COUNTS[ready_deleted]} |" - echo "| empty in prod (user never pushed) | ${COUNTS[ready_empty_prod]} |" - echo "| archive-only (not in prod) | ${COUNTS[ready_archive_only]} |" - echo "| purgatory-only (not in prod) | ${COUNTS[ready_not_in_prod]} |" - echo "" - echo "## Tier 2: Needs Re-sync ($TOTAL_RESYNC repos)" - echo "" - echo "These repositories need re-sync to archive before migration:" - echo "" - echo "| Reason | Count | Action |" - echo "|--------|-------|--------|" - echo "| complete in prod, missing from archive | ${COUNTS[resync_missing_archive]} | trigger re-sync |" - echo "| complete in prod, incomplete in archive | ${COUNTS[resync_incomplete_archive]} | trigger re-sync |" - echo "" - echo "### Purgatory Context" - echo "" - echo "Repos in needs-resync.txt include purgatory context:" - echo "- **purgatory-expired**: Archive tried to sync but failed (30min timeout)" - echo "- **none**: Archive never tried or announcement missing" - echo "" - echo "## Tier 3: Manual Review ($TOTAL_REVIEW repos)" - echo "" - echo "These repositories require human investigation:" - echo "" - echo "| Reason | Count |" - echo "|--------|-------|" - echo "| partial in prod (cat3) | ${COUNTS[review_partial_prod]} |" - echo "| no-match in prod (cat4) | ${COUNTS[review_nomatch_prod]} |" - echo "| complete in prod with parse failure | ${COUNTS[review_parse_failure]} |" - echo "" - echo "## Input Data Summary" - echo "" - echo "### Prod Categories" - echo "- Category 1 (complete): $(wc -l < "$PROD_DIR/category1-complete-match.txt")" - echo "- Category 2 (empty): $(wc -l < "$PROD_DIR/category2-empty-blank.txt")" - echo "- Category 3 (partial): $(wc -l < "$PROD_DIR/category3-partial-match.txt")" - echo "- Category 4 (no match): $(wc -l < "$PROD_DIR/category4-no-match.txt")" - echo "" - echo "### Archive Categories" - echo "- Category 1 (complete): $(wc -l < "$ARCHIVE_DIR/category1-complete-match.txt")" - echo "- Category 2 (empty): $(wc -l < "$ARCHIVE_DIR/category2-empty-blank.txt")" - echo "- Category 3 (partial): $(wc -l < "$ARCHIVE_DIR/category3-partial-match.txt")" - echo "- Category 4 (no match): $(wc -l < "$ARCHIVE_DIR/category4-no-match.txt")" - echo "" - echo "### Logs" - echo "- Parse failures: $(grep -c -v '^#' "$LOGS_DIR/parse-failures.txt" 2>/dev/null || echo 0)" - echo "- Purgatory expired: $(grep -c -v '^#' "$LOGS_DIR/purgatory-expired.txt" 2>/dev/null || echo 0)" - echo "" - echo "## Output Files" - echo "" - echo "- \`results/ready-for-migration.txt\` - $TOTAL_READY repos ready for migration" - echo "- \`results/needs-resync.txt\` - $TOTAL_RESYNC repos needing re-sync" - echo "- \`results/manual-review.txt\` - $TOTAL_REVIEW repos needing investigation" - echo "- \`results/summary.txt\` - This summary file" - echo "" - echo "## Recommended Next Steps" - echo "" - echo "1. **Review needs-resync.txt** - Trigger re-sync for these repos" - echo "2. **Review manual-review.txt** - Investigate unusual states" - echo "3. **Verify ready-for-migration.txt** - Spot-check a few repos" - echo "4. **Plan migration window** - Schedule cutover when action items resolved" -} > "$SUMMARY_FILE" - -# ============================================================================ -# Phase 6: Print summary to console -# ============================================================================ - -echo "" -log_success "Classification complete!" -echo "" -echo "=== Summary ===" -echo "Ready for Migration: $TOTAL_READY ($PCT_READY%)" -echo " - Complete in both: ${COUNTS[ready_complete_both]}" -echo " - Deleted by user: ${COUNTS[ready_deleted]}" -echo " - Empty in prod: ${COUNTS[ready_empty_prod]}" -echo " - Archive-only: ${COUNTS[ready_archive_only]}" -echo " - Purgatory-only: ${COUNTS[ready_not_in_prod]}" -echo "" -echo "Needs Re-sync: $TOTAL_RESYNC ($PCT_RESYNC%)" -echo " - Missing from archive: ${COUNTS[resync_missing_archive]}" -echo " - Incomplete in archive: ${COUNTS[resync_incomplete_archive]}" -echo "" -echo "Manual Review: $TOTAL_REVIEW ($PCT_REVIEW%)" -echo " - Partial in prod: ${COUNTS[review_partial_prod]}" -echo " - No-match in prod: ${COUNTS[review_nomatch_prod]}" -echo " - Parse failures: ${COUNTS[review_parse_failure]}" -echo "" -echo "Total: $TOTAL repos" -echo "" -echo "Output files:" -echo " $READY_FILE" -echo " $RESYNC_FILE" -echo " $REVIEW_FILE" -echo " $SUMMARY_FILE" -- cgit v1.2.3 From 87bd544b4539fc17c7919a2185663fb9debae2d1 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Mon, 26 Jan 2026 13:55:43 +0000 Subject: fix classification script --- docs/how-to/migration-scripts/40-classify-actions.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/how-to/migration-scripts/40-classify-actions.sh b/docs/how-to/migration-scripts/40-classify-actions.sh index 81559aa..b1348f8 100755 --- a/docs/how-to/migration-scripts/40-classify-actions.sh +++ b/docs/how-to/migration-scripts/40-classify-actions.sh @@ -165,7 +165,7 @@ if [[ -f "$LOGS_DIR/purgatory-expired.txt" ]]; then [[ "$repo" =~ ^# ]] && continue [[ -z "$repo" || -z "$npub" ]] && continue PURGATORY["$repo|$npub"]=1 - ((PURGATORY_COUNT++)) + PURGATORY_COUNT=$((PURGATORY_COUNT + 1)) done < "$LOGS_DIR/purgatory-expired.txt" fi log_info "Loaded $PURGATORY_COUNT purgatory entries" @@ -180,7 +180,7 @@ if [[ -f "$LOGS_DIR/parse-failures.txt" ]]; then [[ "$event_id" =~ ^# ]] && continue [[ -z "$repo" || -z "$npub" ]] && continue PARSE_FAIL["$repo|$npub"]=1 - ((PARSE_FAIL_COUNT++)) + PARSE_FAIL_COUNT=$((PARSE_FAIL_COUNT + 1)) done < "$LOGS_DIR/parse-failures.txt" fi log_info "Loaded $PARSE_FAIL_COUNT parse failure entries" -- cgit v1.2.3 From 1ae97cd85aec95f6270f853b28e48774cefc6bf6 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Mon, 26 Jan 2026 16:17:55 +0000 Subject: feat: add NGIT_LOG_LEVEL configuration option Add proper log level configuration following standard approach: - CLI flag: --log-level - Environment variable: NGIT_LOG_LEVEL - Default: info - Supports simple levels (error, warn, info, debug, trace) - Supports filter expressions (e.g., ngit_grasp=debug,actix_web=info) Configuration is now consistent across all four sources: 1. src/config.rs - Config struct with log_level field 2. docs/reference/configuration.md - Full documentation 3. nix/module.nix - NixOS module with logLevel option 4. .env.example - Example configuration file This replaces the previous RUST_LOG approach with proper integration into the ngit-grasp configuration system, enabling trace logging from CLI, environment variables, or NixOS configuration. --- .env.example | 7 +++++-- docs/reference/configuration.md | 37 +++++++++++++++++++++++++------------ nix/module.nix | 11 ++++++++--- src/config.rs | 5 +++++ src/main.rs | 16 ++++++++-------- 5 files changed, 51 insertions(+), 25 deletions(-) diff --git a/.env.example b/.env.example index e152b89..01854f4 100644 --- a/.env.example +++ b/.env.example @@ -101,9 +101,12 @@ # LOGGING # ============================================================================ -# Rust log level (not a ngit-grasp config, but useful for debugging) +# Log level for application logging +# CLI: --log-level +# Default: info # Options: error, warn, info, debug, trace -# RUST_LOG=info +# Can also use filter expressions: ngit_grasp=debug,actix_web=info +# NGIT_LOG_LEVEL=info # ============================================================================ # PROACTIVE SYNC (GRASP-02) diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index b24b498..b09b20f 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -1041,10 +1041,10 @@ Per-connection limits (built-in to relay-builder, not configurable): ### Logging Configuration -#### `RUST_LOG` +#### `NGIT_LOG_LEVEL` -**Description:** Logging level and filters (standard Rust environment variable) -**Type:** String (log level or filter) +**Description:** Logging level and filters for application logging +**Type:** String (log level or filter expression) **Default:** `info` **Required:** No @@ -1052,17 +1052,17 @@ Per-connection limits (built-in to relay-builder, not configurable): ```bash # Simple levels -RUST_LOG=error # Errors only -RUST_LOG=warn # Warnings and errors -RUST_LOG=info # Info, warnings, errors -RUST_LOG=debug # Debug and above -RUST_LOG=trace # Everything +NGIT_LOG_LEVEL=error # Errors only +NGIT_LOG_LEVEL=warn # Warnings and errors +NGIT_LOG_LEVEL=info # Info, warnings, errors (default) +NGIT_LOG_LEVEL=debug # Debug and above +NGIT_LOG_LEVEL=trace # Everything (very verbose) -# Module-specific -RUST_LOG=ngit_grasp=debug,actix_web=info +# Module-specific filtering +NGIT_LOG_LEVEL=ngit_grasp=debug,actix_web=info # Complex filters -RUST_LOG=debug,hyper=info,tokio=warn +NGIT_LOG_LEVEL=debug,hyper=info,tokio=warn ``` **Log levels (most to least verbose):** @@ -1073,12 +1073,25 @@ RUST_LOG=debug,hyper=info,tokio=warn 4. `warn` - Warnings about potential issues 5. `error` - Errors only +**CLI flag:** + +```bash +ngit-grasp --log-level trace +``` + **Production recommendation:** ```bash -RUST_LOG=info,ngit_grasp=debug +NGIT_LOG_LEVEL=info ``` +**Notes:** + +- Uses Rust's `tracing` crate filter syntax +- Supports module-level filtering (e.g., `ngit_grasp=debug,hyper=info`) +- `trace` level can significantly impact performance +- For production, `info` or `warn` is recommended + --- ### Security Configuration (Planned) diff --git a/nix/module.nix b/nix/module.nix index 4a6fc94..89d58de 100644 --- a/nix/module.nix +++ b/nix/module.nix @@ -127,9 +127,14 @@ let }; logLevel = mkOption { - type = types.enum [ "trace" "debug" "info" "warn" "error" ]; + type = types.str; default = "info"; - description = "Logging level for RUST_LOG environment variable"; + example = "debug"; + description = '' + Logging level for application logging. + Can be a simple level (trace, debug, info, warn, error) or a filter expression. + Examples: "info", "debug", "ngit_grasp=debug,actix_web=info" + ''; }; syncMaxBackoffSecs = mkOption { @@ -334,7 +339,7 @@ let NGIT_REPOSITORY_BLACKLIST = concatStringsSep "," cfg.repositoryBlacklist; NGIT_EVENT_BLACKLIST = concatStringsSep "," cfg.eventBlacklist; NGIT_MAX_CONNECTIONS = toString cfg.maxConnections; - RUST_LOG = cfg.logLevel; + NGIT_LOG_LEVEL = cfg.logLevel; } // optionalAttrs (cfg.relayName != null) { NGIT_RELAY_NAME = cfg.relayName; } // optionalAttrs (cfg.archiveReadOnly != null) { diff --git a/src/config.rs b/src/config.rs index 271a340..df7a7ef 100644 --- a/src/config.rs +++ b/src/config.rs @@ -500,6 +500,10 @@ pub struct Config { /// Prevents connection exhaustion DoS attacks #[arg(long, env = "NGIT_MAX_CONNECTIONS", default_value_t = 4096)] pub max_connections: usize, + + /// Log level for application logging + #[arg(long, env = "NGIT_LOG_LEVEL", default_value = "info")] + pub log_level: String, } impl Config { @@ -782,6 +786,7 @@ impl Config { repository_blacklist: String::new(), event_blacklist: String::new(), max_connections: 500, + log_level: "debug".to_string(), } } } diff --git a/src/main.rs b/src/main.rs index 5e5b83a..105b861 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,8 +3,8 @@ use std::{path::PathBuf, sync::Arc}; use anyhow::Result; use tokio::signal; -use tracing::{error, info, warn, Level}; -use tracing_subscriber::FmtSubscriber; +use tracing::{error, info, warn}; +use tracing_subscriber::{EnvFilter, FmtSubscriber}; use ngit_grasp::{ config::{Config, DatabaseBackend}, @@ -17,16 +17,16 @@ use ngit_grasp::{ #[tokio::main] async fn main() -> Result<()> { - // Initialize tracing + // Load configuration first (priority: CLI flags > env vars > .env file > defaults) + let config = Config::load()?; + + // Initialize tracing with configured log level let subscriber = FmtSubscriber::builder() - .with_max_level(Level::DEBUG) + .with_env_filter(EnvFilter::new(&config.log_level)) .finish(); tracing::subscriber::set_global_default(subscriber)?; - info!("Starting ngit-grasp with nostr-relay-builder..."); - - // Load configuration (priority: CLI flags > env vars > .env file > defaults) - let config = Config::load()?; + info!("Starting ngit-grasp with log level: {}", config.log_level); // Validate configuration and fail fast on fatal errors // Recoverable issues (e.g., malformed whitelist entries) are logged as warnings -- cgit v1.2.3 From 905ebd838a9ff8cc777cf3b3b6306066e8c177fc Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Mon, 26 Jan 2026 17:20:11 +0000 Subject: fix: load existing events from database on startup with two-pass queries Previously, SelfSubscriber only saw events returned by the WebSocket subscription to the local relay, which has limits on the number of events returned. This caused repos with announcements in the database to never get Layer 2/3 filters created, resulting in missing state events. Now, on startup, we query the database directly with two separate queries: 1. Query announcements (30617) to populate repo_sync_index 2. Query root events (1617/1618/1621) to create Layer 3 filters Both queries use .since(last_connected) if available for incremental loading on reconnect. Filters are created inline and made mutable to support the .since() clause, rather than using a shared create_event_filter() method. Fixes the issue where state events were missing for repos like cashbird and creative-space that had announcements in the database but weren't returned by the WebSocket subscription. --- src/sync/mod.rs | 1 + src/sync/self_subscriber.rs | 167 ++++++++++++++++++++++++++++++++++++++------ 2 files changed, 145 insertions(+), 23 deletions(-) diff --git a/src/sync/mod.rs b/src/sync/mod.rs index bc8c428..226e681 100644 --- a/src/sync/mod.rs +++ b/src/sync/mod.rs @@ -1442,6 +1442,7 @@ impl SyncManager { self.service_domain.clone(), Arc::clone(&self.repo_sync_index), action_tx, + self.database.clone(), ); let subscriber_shutdown = shutdown_tx.subscribe(); tokio::spawn(async move { self_subscriber.run(Some(subscriber_shutdown)).await }); diff --git a/src/sync/self_subscriber.rs b/src/sync/self_subscriber.rs index 3cc408d..e9505f1 100644 --- a/src/sync/self_subscriber.rs +++ b/src/sync/self_subscriber.rs @@ -16,6 +16,8 @@ use nostr_sdk::Timestamp; use tokio::sync::broadcast::error::RecvError; use tokio::sync::{broadcast, mpsc}; +use crate::nostr::builder::SharedDatabase; + use super::{AddFilters, RepoSyncIndex, RepoSyncNeeds}; // ============================================================================= @@ -98,6 +100,8 @@ pub struct SelfSubscriber { action_tx: mpsc::Sender, /// Last time we connected - used for since filter on reconnect last_connected: Option, + /// Database for querying existing events on startup + database: SharedDatabase, } impl SelfSubscriber { @@ -108,11 +112,13 @@ impl SelfSubscriber { /// * `relay_domain` - Our service domain (used for filtering relevant repos) /// * `repo_sync_index` - Shared index to update with discovered repos /// * `action_tx` - Channel to send AddFilters actions to the SyncManager + /// * `database` - Database for querying existing events on startup pub fn new( own_relay_url: String, relay_domain: String, repo_sync_index: RepoSyncIndex, action_tx: mpsc::Sender, + database: SharedDatabase, ) -> Self { Self { own_relay_url, @@ -120,6 +126,7 @@ impl SelfSubscriber { repo_sync_index, action_tx, last_connected: None, + database, } } @@ -135,6 +142,127 @@ impl SelfSubscriber { .unwrap_or(Duration::from_millis(5000)) } + /// Load existing events from database on startup + /// + /// Queries the database with two separate queries to build the initial + /// PendingUpdates state. This ensures all repos get Layer 2/3 filters + /// created, not just those returned by the WebSocket subscription + /// (which has limits on the number of events returned). + /// + /// Query order: + /// 1. First query: Get announcements (30617) to populate repo_sync_index + /// with repos and their relays + /// 2. Second query: Get root events (1617/1618/1621) for handle_root_event() + /// to add root event IDs for Layer 3 filter creation + /// + /// Both queries use `.since(last_connected)` if available for incremental + /// loading on reconnect. + /// + /// Returns a PendingUpdates containing all repos that need Layer 2/3 filters. + async fn load_existing_events(&self) -> PendingUpdates { + let mut pending = PendingUpdates::new(); + + // Log whether this is a full or incremental load + if let Some(since) = self.last_connected { + tracing::info!( + since = %since, + "Loading events incrementally from database (reconnect)" + ); + } else { + tracing::info!("Loading all events from database (first connection)"); + } + + // First query: Get announcements to populate repo_sync_index + let mut announcement_filter = Filter::new().kind(Kind::GitRepoAnnouncement); + if let Some(timestamp) = self.last_connected { + announcement_filter = announcement_filter.since(timestamp); + } + + let announcements = match self.database.query(announcement_filter).await { + Ok(events) => { + tracing::info!( + count = events.len(), + "Loaded announcements from database" + ); + events + } + Err(e) => { + tracing::error!( + error = %e, + "Failed to query announcements from database" + ); + return pending; + } + }; + + // Process announcements + let mut announcements_loaded = 0; + for event in announcements.iter() { + if let Some(repo_id) = Self::extract_repo_id(event) { + let relays = Self::extract_relay_urls(event); + pending.add_repo(repo_id, relays, HashSet::new()); + announcements_loaded += 1; + } + } + + // Update repo_sync_index with announcements BEFORE querying root events + { + let mut index = self.repo_sync_index.write().await; + for (repo_id, needs) in &pending.repos { + let entry = index + .entry(repo_id.clone()) + .or_insert_with(|| RepoSyncNeeds { + relays: HashSet::new(), + root_events: HashSet::new(), + }); + entry.relays.extend(needs.relays.clone()); + } + } + + // Second query: Get root events for handle_root_event() + let mut root_filter = Filter::new().kinds(vec![ + Kind::GitPatch, + Kind::GitIssue, + Kind::GitPullRequest, + ]); + if let Some(timestamp) = self.last_connected { + root_filter = root_filter.since(timestamp); + } + + let root_events = match self.database.query(root_filter).await { + Ok(events) => { + tracing::info!( + count = events.len(), + "Loaded root events from database" + ); + events + } + Err(e) => { + tracing::error!( + error = %e, + "Failed to query root events from database" + ); + // Continue with just announcements + return pending; + } + }; + + // Process root events + let mut root_events_processed = 0; + for event in root_events.iter() { + self.handle_root_event(event, &mut pending).await; + root_events_processed += 1; + } + + tracing::info!( + announcements_loaded = announcements_loaded, + root_events_processed = root_events_processed, + "Processed existing events from database" + ); + + pending + } + /// Process a relay pool notification /// /// Handles incoming events from the subscription, queueing 30617 announcements @@ -276,33 +404,22 @@ impl SelfSubscriber { // Subscribe to announcement and root event kinds // Per v4 spec: 30617, 1617, 1618, 1621 (NOT 30618) // Plus kind 10317 (User Grasp List) for GRASP discovery - // Check if we have a last_connected time for reconnect filtering - let filter = if let Some(last) = self.last_connected { + let mut filter = Filter::new().kinds(vec![ + Kind::GitRepoAnnouncement, + Kind::GitPatch, + Kind::GitIssue, + Kind::GitPullRequest, + Kind::GitUserGraspList, + ]); + if let Some(timestamp) = self.last_connected { // Quick reconnect - use since filter (15 min buffer) - let since = Timestamp::from(last.as_secs().saturating_sub(15 * 60)); + let since = Timestamp::from(timestamp.as_secs().saturating_sub(15 * 60)); tracing::debug!( since = %since, "Using since filter for reconnect" ); - Filter::new() - .kinds(vec![ - Kind::GitRepoAnnouncement, // Repository Announcements - Kind::GitPatch, // Patches - Kind::GitIssue, // Issues - Kind::GitPullRequest, // Pull Requests - Kind::GitUserGraspList, // User Grasp List - ]) - .since(since) - } else { - // First connection - no since filter - Filter::new().kinds(vec![ - Kind::GitRepoAnnouncement, // Repository Announcements - Kind::GitPatch, // Patches - Kind::GitIssue, // Issues - Kind::GitPullRequest, // Pull Requests - Kind::GitUserGraspList, // User Grasp List - ]) - }; + filter = filter.since(since); + } // Update last_connected AFTER creating filter but BEFORE subscribing self.last_connected = Some(Timestamp::now()); @@ -323,7 +440,11 @@ impl SelfSubscriber { let mut notifications = client.notifications(); let batch_window = Self::get_batch_window(); - let mut pending = PendingUpdates::new(); + + // Load existing events from database on startup + // This ensures all repos get Layer 2/3 filters created, not just those + // returned by the WebSocket subscription (which has limits) + let mut pending = self.load_existing_events().await; // Timer does NOT reset on new events - use interval let mut timer = tokio::time::interval(batch_window); -- cgit v1.2.3 From 04056a12110928e406d2aca456fc3169ae39f8ad Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 07:37:35 +0000 Subject: increase git throttle limits to 60/m --- src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.rs b/src/main.rs index 105b861..6c9da05 100644 --- a/src/main.rs +++ b/src/main.rs @@ -187,8 +187,8 @@ async fn main() -> Result<()> { )); // Create throttle manager for rate limiting remote git servers - // Default: 5 concurrent requests per domain, 30 requests per minute per domain - let throttle_manager = Arc::new(ThrottleManager::new(5, 30)); + // Default: 5 concurrent requests per domain, 60 requests per minute per domain + let throttle_manager = Arc::new(ThrottleManager::new(5, 60)); throttle_manager.set_context(sync_ctx.clone()); throttle_manager.set_git_naughty_list(git_naughty_list.clone()); -- cgit v1.2.3 From 6e5b7eb84b3ca8a902ac4bcbab9c2a9f9ecdee51 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 09:16:41 +0000 Subject: fix(sync): Remove .since() filter from database queries in load_existing_events() Root cause: `last_connected` was set to Timestamp::now() BEFORE load_existing_events() was called (line 425), causing the database query to filter out all existing events with .since(current_time). The query became: SELECT * FROM events WHERE created_at >= Result: 0 events returned (nothing has created_at in the future) Solution: Remove .since() filter from database queries entirely. The `last_connected` field is now only used for WebSocket subscription filters to avoid re-fetching events from remote relays on reconnect. Rationale for this approach over reordering operations: - Database queries are fast (indexed by kind and created_at) - Loading all events on startup ensures consistency - Eliminates subtle ordering dependency that could break in refactoring - Cleaner mental model: database = full load, WebSocket = incremental This fixes the issue where ~190 state events weren't being fetched after deploying the database query fix (commit 4162c90). Evidence: Production logs showed "Loaded announcements from database count=0" when there should have been hundreds of announcements. --- src/sync/self_subscriber.rs | 42 ++++++++---------------------------------- 1 file changed, 8 insertions(+), 34 deletions(-) diff --git a/src/sync/self_subscriber.rs b/src/sync/self_subscriber.rs index e9505f1..86e4583 100644 --- a/src/sync/self_subscriber.rs +++ b/src/sync/self_subscriber.rs @@ -155,35 +155,18 @@ impl SelfSubscriber { /// 2. Second query: Get root events (1617/1618/1621) for handle_root_event() /// to add root event IDs for Layer 3 filter creation /// - /// Both queries use `.since(last_connected)` if available for incremental - /// loading on reconnect. - /// /// Returns a PendingUpdates containing all repos that need Layer 2/3 filters. async fn load_existing_events(&self) -> PendingUpdates { let mut pending = PendingUpdates::new(); - // Log whether this is a full or incremental load - if let Some(since) = self.last_connected { - tracing::info!( - since = %since, - "Loading events incrementally from database (reconnect)" - ); - } else { - tracing::info!("Loading all events from database (first connection)"); - } + tracing::info!("Loading all events from database"); - // First query: Get announcements to populate repo_sync_index - let mut announcement_filter = Filter::new().kind(Kind::GitRepoAnnouncement); - if let Some(timestamp) = self.last_connected { - announcement_filter = announcement_filter.since(timestamp); - } + // First query: Get all announcements to populate repo_sync_index + let announcement_filter = Filter::new().kind(Kind::GitRepoAnnouncement); let announcements = match self.database.query(announcement_filter).await { Ok(events) => { - tracing::info!( - count = events.len(), - "Loaded announcements from database" - ); + tracing::info!(count = events.len(), "Loaded announcements from database"); events } Err(e) => { @@ -219,22 +202,13 @@ impl SelfSubscriber { } } - // Second query: Get root events for handle_root_event() - let mut root_filter = Filter::new().kinds(vec![ - Kind::GitPatch, - Kind::GitIssue, - Kind::GitPullRequest, - ]); - if let Some(timestamp) = self.last_connected { - root_filter = root_filter.since(timestamp); - } + // Second query: Get all root events for handle_root_event() + let root_filter = + Filter::new().kinds(vec![Kind::GitPatch, Kind::GitIssue, Kind::GitPullRequest]); let root_events = match self.database.query(root_filter).await { Ok(events) => { - tracing::info!( - count = events.len(), - "Loaded root events from database" - ); + tracing::info!(count = events.len(), "Loaded root events from database"); events } Err(e) => { -- cgit v1.2.3 From dd9b00c644853a8db0ec463a7e1eddabd6634e41 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 11:06:09 +0000 Subject: fix: improve logging to enable migration script to detect announcement parse failures --- src/sync/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sync/mod.rs b/src/sync/mod.rs index 226e681..a0dfa59 100644 --- a/src/sync/mod.rs +++ b/src/sync/mod.rs @@ -2812,6 +2812,7 @@ impl SyncManager { event_id = %event.id, kind = %event.kind.as_u16(), identifier = %identifier, + pubkey = %event.pubkey, "Added rejected announcement to two-tier index" ); } -- cgit v1.2.3 From ddcba2b350615e6d6ad7028b570206efb42f0338 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 11:15:58 +0000 Subject: fix: prevent false positives in naughty list classification Strip URLs (http://, https://, git://, ws://, wss://) from error messages before classification to prevent false positives from repository names, paths, or identifiers containing keywords like 'ssl', 'certificate', etc. - Add strip_urls() function to remove URLs before pattern matching - Add WebSocket protocol support (ws://, wss://) for relay errors - Filter remote warnings that don't indicate infrastructure problems - Use more specific SSL/TLS patterns to avoid npub substring matches - Reduce test suite from 40 to 13 tests, keeping only edge cases Fixes false positives seen in production: - git.shakespeare.diy: 'repository not found' with npub containing 'ssl' - relay.ngit.dev: HTTP 500 error with npub containing 'ssl' - gitnostr.com: remote permission warning misclassified as protocol error --- src/sync/naughty_list.rs | 428 +++++++++++++++++++++++++---------------------- 1 file changed, 232 insertions(+), 196 deletions(-) diff --git a/src/sync/naughty_list.rs b/src/sync/naughty_list.rs index 097affe..60ab949 100644 --- a/src/sync/naughty_list.rs +++ b/src/sync/naughty_list.rs @@ -101,6 +101,69 @@ impl NaughtyListTracker { Self::new(12) } + /// Strip URLs from an error message to prevent false positives from URL components. + /// + /// URLs can contain path components, repository names, or user identifiers that + /// accidentally match error patterns (e.g., "my-openssl-project", "ssl-team", + /// "certificate-manager"). By stripping URLs before classification, we ensure + /// only the actual error message text is analyzed. + /// + /// Handles: http://, https://, git://, ws://, wss:// + fn strip_urls(error: &str) -> String { + let mut result = String::with_capacity(error.len()); + let mut chars = error.chars().peekable(); + + while let Some(c) = chars.next() { + // Check for URL start patterns + let potential_url = match c { + 'h' => { + // Check for http:// or https:// + let rest: String = chars.clone().take(7).collect(); + rest.starts_with("ttp://") || rest.starts_with("ttps://") + } + 'g' => { + // Check for git:// + let rest: String = chars.clone().take(5).collect(); + rest.starts_with("it://") + } + 'w' => { + // Check for ws:// or wss:// + let rest: String = chars.clone().take(5).collect(); + rest.starts_with("s://") || rest.starts_with("ss://") + } + _ => false, + }; + + if potential_url { + // Found URL start, consume until URL end + result.push_str("[URL]"); + + // Skip until we hit a URL terminator + loop { + match chars.peek() { + Some(&ch) if Self::is_url_char(ch) => { + chars.next(); + } + _ => break, + } + } + } else { + result.push(c); + } + } + + result + } + + /// Check if a character can be part of a URL + #[inline] + fn is_url_char(c: char) -> bool { + // URLs end at whitespace, quotes, or certain brackets + // This is conservative - real URLs can contain more, but git errors + // typically have URLs followed by these terminators + !matches!(c, ' ' | '\t' | '\n' | '\r' | '"' | '\'' | '>' | ']' | ')') + } + /// Classify an error string into a naughty category or return None for transient errors /// /// # Arguments @@ -112,10 +175,32 @@ impl NaughtyListTracker { /// - `Some(NaughtyCategory)` if the error indicates a persistent infrastructure issue /// - `None` if the error is a transient network issue (use HealthTracker backoff) pub fn classify_error(error: &str) -> Option { - let error_lower = error.to_lowercase(); + // Filter out remote warnings - these are informational messages from the remote + // server that don't indicate infrastructure problems with the domain itself. + // Example: "remote: warning: unable to access '/root/.config/git/attributes': Permission denied" + // These warnings are about the remote server's internal configuration, not connectivity. + let filtered_error: String = error + .lines() + .filter(|line| { + let line_lower = line.to_lowercase(); + // Keep lines that are NOT remote warnings + !(line_lower.starts_with("remote: warning:") + || line_lower.starts_with("warning: remote")) + }) + .collect::>() + .join("\n"); + + // If after filtering we have no content, this was just warnings - not a real error + if filtered_error.trim().is_empty() { + return None; + } + + // Strip URLs to prevent false positives from URL components + // (e.g., repository named "openssl-test" or path containing "certificate") + let url_stripped = Self::strip_urls(&filtered_error); + let error_lower = url_stripped.to_lowercase(); - // DNS lookup failures - use specific patterns to avoid false positives - // from URLs containing "dns" (e.g., npubs like "...cdns7..." or domains) + // DNS lookup failures if error_lower.contains("failed to lookup address") || error_lower.contains("name or service not known") || error_lower.contains("nodename nor servname provided") @@ -129,8 +214,17 @@ impl NaughtyListTracker { // TLS certificate errors if error_lower.contains("certificate") - || error_lower.contains("ssl") - || error_lower.contains("tls") + || error_lower.contains("ssl error") + || error_lower.contains("ssl certificate") + || error_lower.contains("ssl handshake") + || error_lower.contains("ssl_error") + || error_lower.contains("tls error") + || error_lower.contains("tls handshake") + || error_lower.contains("tls alert") + || error_lower.contains("tls_error") + || error_lower.contains("openssl") + || error_lower.contains("schannel") + || error_lower.contains("secure channel") { // Exclude timeout errors that mention TLS if !error_lower.contains("timeout") && !error_lower.contains("timed out") { @@ -294,211 +388,216 @@ impl NaughtyListTracker { mod tests { use super::*; + // ========================================================================= + // URL STRIPPING TESTS + // ========================================================================= + #[test] - fn test_classify_dns_errors() { - assert_eq!( - NaughtyListTracker::classify_error("failed to lookup address information"), - Some(NaughtyCategory::DnsLookupFailed) - ); + fn test_strip_urls_basic_protocols() { + // HTTP/HTTPS assert_eq!( - NaughtyListTracker::classify_error("Name or service not known"), - Some(NaughtyCategory::DnsLookupFailed) - ); - assert_eq!( - NaughtyListTracker::classify_error("nodename nor servname provided"), - Some(NaughtyCategory::DnsLookupFailed) + NaughtyListTracker::strip_urls("error: https://example.com/repo.git failed"), + "error: [URL] failed" ); assert_eq!( - NaughtyListTracker::classify_error("dns error: NXDOMAIN"), - Some(NaughtyCategory::DnsLookupFailed) + NaughtyListTracker::strip_urls("error: http://example.com/path failed"), + "error: [URL] failed" ); - } - #[test] - fn test_classify_tls_errors() { + // Git protocol assert_eq!( - NaughtyListTracker::classify_error("certificate not valid for 'example.com'"), - Some(NaughtyCategory::TlsCertificateInvalid) + NaughtyListTracker::strip_urls("fatal: git://github.com/user/repo.git not found"), + "fatal: [URL] not found" ); + + // WebSocket protocols (used for relay URLs) assert_eq!( - NaughtyListTracker::classify_error("SSL certificate problem"), - Some(NaughtyCategory::TlsCertificateInvalid) + NaughtyListTracker::strip_urls("error: wss://relay.example.com failed"), + "error: [URL] failed" ); assert_eq!( - NaughtyListTracker::classify_error("TLS handshake failed"), - Some(NaughtyCategory::TlsCertificateInvalid) + NaughtyListTracker::strip_urls("error: ws://localhost:8080 failed"), + "error: [URL] failed" ); + } - // TLS timeout should NOT be classified as naughty - assert_eq!( - NaughtyListTracker::classify_error("TLS connection timed out"), - None - ); + #[test] + fn test_strip_urls_multiple() { + let error = "failed to clone https://a.com/repo.git and wss://relay.com"; + let stripped = NaughtyListTracker::strip_urls(error); + assert_eq!(stripped, "failed to clone [URL] and [URL]"); } #[test] - fn test_classify_protocol_errors() { - assert_eq!( - NaughtyListTracker::classify_error("websocket protocol error"), - Some(NaughtyCategory::ProtocolError) - ); + fn test_strip_urls_preserves_error_text() { + let error = + "fatal: unable to access 'https://example.com/repo.git/': SSL certificate problem"; + let stripped = NaughtyListTracker::strip_urls(error); + assert!(stripped.contains("SSL certificate problem")); + assert!(!stripped.contains("example.com")); + } + + // ========================================================================= + // EDGE CASES: TIMEOUT/CONNECTION EXCEPTIONS + // These are the "unusual rules" where a pattern matches but should be excluded + // ========================================================================= + + #[test] + fn test_tls_timeout_not_naughty() { + // TLS errors with timeout should NOT be classified as naughty + // (timeout is transient, not a certificate problem) assert_eq!( - NaughtyListTracker::classify_error("invalid frame header"), - Some(NaughtyCategory::ProtocolError) + NaughtyListTracker::classify_error("TLS connection timed out"), + None ); - - // WebSocket connection errors should NOT be classified as naughty assert_eq!( - NaughtyListTracker::classify_error("websocket connection refused"), + NaughtyListTracker::classify_error("SSL handshake timeout"), None ); } #[test] - fn test_classify_transient_errors() { - // Timeouts are transient + fn test_websocket_connection_errors_not_naughty() { + // WebSocket connection errors are transient, not protocol violations assert_eq!( - NaughtyListTracker::classify_error("connection timed out"), + NaughtyListTracker::classify_error("websocket connection refused"), None ); assert_eq!( - NaughtyListTracker::classify_error("operation timed out"), + NaughtyListTracker::classify_error("websocket connection timeout"), None ); + } - // Connection refused is transient + #[test] + fn test_remote_warnings_filtered() { + // Remote warnings should be filtered out before classification + let warning_only = + "remote: warning: unable to access '/root/.config/git/attributes': Permission denied"; + assert_eq!(NaughtyListTracker::classify_error(warning_only), None); + + // But real errors after warnings should still be classified + let warning_with_error = "remote: warning: something\nfatal: failed to lookup address"; assert_eq!( - NaughtyListTracker::classify_error("connection refused"), - None + NaughtyListTracker::classify_error(warning_with_error), + Some(NaughtyCategory::DnsLookupFailed) ); + } - // Generic network errors are transient - assert_eq!( - NaughtyListTracker::classify_error("network unreachable"), - None - ); + // ========================================================================= + // INTEGRATION: FULL CLASSIFICATION FLOW + // Verify URL stripping + classification work together correctly + // ========================================================================= + + #[test] + fn test_url_with_keywords_not_false_positive() { + // URLs containing keywords should NOT trigger classification + let cases = [ + ("https://example.com/my-openssl-project.git", "not found"), + ("https://example.com/ssl-team/repo.git", "not found"), + ("https://example.com/certificate-manager.git", "not found"), + ("https://example.com/dns-tools.git", "not found"), + ("wss://relay-tls-test.example.com", "connection refused"), + ]; + + for (url, suffix) in cases { + let error = format!("fatal: repository '{}/' {}", url, suffix); + assert_eq!( + NaughtyListTracker::classify_error(&error), + None, + "URL '{}' should not trigger false positive", + url + ); + } + } - // Repository not found is transient (not an infrastructure issue) + #[test] + fn test_real_errors_still_detected() { + // Real errors in the message text (not URL) should still be detected assert_eq!( NaughtyListTracker::classify_error( - "fatal: repository 'https://example.com/repo.git/' not found" + "fatal: 'https://example.com/repo.git': SSL certificate problem" ), - None + Some(NaughtyCategory::TlsCertificateInvalid) ); - } - - #[test] - fn test_classify_false_positive_npub_with_dns() { - // This npub contains "dns" in its encoding: npub17plqkxhsv66g8quxxc9p5t9mxazzn20m426exqnl8lxnh5a4cDNS7jezx0 - // A "not found" error with this npub should NOT be classified as DNS failure - let error = "fatal: repository 'https://git.shakespeare.diy/npub17plqkxhsv66g8quxxc9p5t9mxazzn20m426exqnl8lxnh5a4cdns7jezx0/kuboslopp%20by%20Shakespeare.git/' not found"; assert_eq!( - NaughtyListTracker::classify_error(error), - None, - "npub containing 'dns' should not trigger DNS failure classification" + NaughtyListTracker::classify_error( + "fatal: 'https://example.com/repo.git': failed to lookup address" + ), + Some(NaughtyCategory::DnsLookupFailed) ); - - // Same for relay.ngit.dev - let error2 = "fatal: repository 'https://relay.ngit.dev/npub17plqkxhsv66g8quxxc9p5t9mxazzn20m426exqnl8lxnh5a4cdns7jezx0/kuboslopp%20by%20Shakespeare.git/' not found"; assert_eq!( - NaughtyListTracker::classify_error(error2), - None, - "npub containing 'dns' should not trigger DNS failure classification" + NaughtyListTracker::classify_error("websocket protocol error"), + Some(NaughtyCategory::ProtocolError) ); } #[test] - fn test_record_new_entry() { - let tracker = NaughtyListTracker::with_defaults(); - let url = "wss://bad-relay.example.com"; - - let is_new = tracker.record( - url, - NaughtyCategory::DnsLookupFailed, - "failed to lookup address".to_string(), + fn test_url_with_keyword_and_real_error() { + // URL contains keyword AND there's a real error - should detect the error + let error = "fatal: 'https://example.com/ssl-tools/repo.git': SSL certificate problem"; + assert_eq!( + NaughtyListTracker::classify_error(error), + Some(NaughtyCategory::TlsCertificateInvalid) ); - - assert!(is_new); - assert!(tracker.is_naughty(url)); - - let entry = tracker.get_entry(url).unwrap(); - assert_eq!(entry.category, NaughtyCategory::DnsLookupFailed); - assert_eq!(entry.occurrence_count, 1); } + // ========================================================================= + // TRACKER FUNCTIONALITY + // ========================================================================= + #[test] - fn test_record_updates_existing() { + fn test_tracker_record_and_update() { let tracker = NaughtyListTracker::with_defaults(); let url = "wss://bad-relay.example.com"; // First occurrence - let is_new1 = tracker.record(url, NaughtyCategory::DnsLookupFailed, "error 1".to_string()); - assert!(is_new1); + let is_new = tracker.record(url, NaughtyCategory::DnsLookupFailed, "error 1".to_string()); + assert!(is_new); + assert!(tracker.is_naughty(url)); - // Second occurrence + // Second occurrence updates existing let is_new2 = tracker.record(url, NaughtyCategory::DnsLookupFailed, "error 2".to_string()); assert!(!is_new2); let entry = tracker.get_entry(url).unwrap(); assert_eq!(entry.occurrence_count, 2); - assert_eq!(entry.reason, "error 2"); // Updated to latest + assert_eq!(entry.reason, "error 2"); } #[test] - fn test_is_naughty() { - let tracker = NaughtyListTracker::with_defaults(); - let url = "wss://bad-relay.example.com"; - - assert!(!tracker.is_naughty(url)); + fn test_tracker_expiration() { + let tracker = NaughtyListTracker::new(0); // Expire immediately tracker.record( - url, - NaughtyCategory::TlsCertificateInvalid, - "cert error".to_string(), + "wss://relay.example.com", + NaughtyCategory::DnsLookupFailed, + "error".to_string(), ); - assert!(tracker.is_naughty(url)); - } - - #[test] - fn test_get_all() { - let tracker = NaughtyListTracker::with_defaults(); + // Entry exists but is expired + assert!(!tracker.is_naughty("wss://relay.example.com")); - tracker.record( - "wss://relay1.example.com", - NaughtyCategory::DnsLookupFailed, - "dns error".to_string(), - ); - tracker.record( - "wss://relay2.example.com", - NaughtyCategory::TlsCertificateInvalid, - "tls error".to_string(), - ); + std::thread::sleep(std::time::Duration::from_millis(10)); - let all = tracker.get_all(); - assert_eq!(all.len(), 2); + let expired = tracker.expire_old_entries(); + assert_eq!(expired.len(), 1); + assert_eq!(tracker.total_count(), 0); } #[test] - fn test_count_by_category() { + fn test_tracker_counts() { let tracker = NaughtyListTracker::with_defaults(); + tracker.record("wss://r1.com", NaughtyCategory::DnsLookupFailed, "e".into()); + tracker.record("wss://r2.com", NaughtyCategory::DnsLookupFailed, "e".into()); tracker.record( - "wss://relay1.example.com", - NaughtyCategory::DnsLookupFailed, - "error".to_string(), - ); - tracker.record( - "wss://relay2.example.com", - NaughtyCategory::DnsLookupFailed, - "error".to_string(), - ); - tracker.record( - "wss://relay3.example.com", + "wss://r3.com", NaughtyCategory::TlsCertificateInvalid, - "error".to_string(), + "e".into(), ); + assert_eq!(tracker.total_count(), 3); assert_eq!( tracker.count_by_category(NaughtyCategory::DnsLookupFailed), 2 @@ -507,74 +606,11 @@ mod tests { tracker.count_by_category(NaughtyCategory::TlsCertificateInvalid), 1 ); - assert_eq!(tracker.count_by_category(NaughtyCategory::ProtocolError), 0); - } - - #[test] - fn test_total_count() { - let tracker = NaughtyListTracker::with_defaults(); - assert_eq!(tracker.total_count(), 0); - - tracker.record( - "wss://relay1.example.com", - NaughtyCategory::DnsLookupFailed, - "error".to_string(), - ); - assert_eq!(tracker.total_count(), 1); - - tracker.record( - "wss://relay2.example.com", - NaughtyCategory::TlsCertificateInvalid, - "error".to_string(), - ); - assert_eq!(tracker.total_count(), 2); - } - - #[test] - fn test_expire_old_entries() { - // Use very short expiration for testing - let tracker = NaughtyListTracker::new(0); // Expire immediately (0 hours) - - tracker.record( - "wss://relay1.example.com", - NaughtyCategory::DnsLookupFailed, - "error".to_string(), - ); - - // Entry should exist in the map - assert_eq!(tracker.total_count(), 1); - - // But is_naughty should return false since it's already expired (0 hours) - assert!(!tracker.is_naughty("wss://relay1.example.com")); - - // Sleep to ensure time passes - std::thread::sleep(std::time::Duration::from_millis(10)); - - // Expire old entries (should remove the 0-hour expired entry) - let expired = tracker.expire_old_entries(); - assert_eq!(expired.len(), 1); - assert_eq!(expired[0], "wss://relay1.example.com"); - - // Entry should be gone - assert!(!tracker.is_naughty("wss://relay1.example.com")); - assert_eq!(tracker.total_count(), 0); + assert_eq!(tracker.get_all().len(), 3); } #[test] fn test_category_display() { - assert_eq!( - NaughtyCategory::DnsLookupFailed.to_string(), - "dns_lookup_failed" - ); - assert_eq!( - NaughtyCategory::TlsCertificateInvalid.to_string(), - "tls_certificate_invalid" - ); - assert_eq!(NaughtyCategory::ProtocolError.to_string(), "protocol_error"); - } - - #[test] - fn test_category_as_str() { assert_eq!( NaughtyCategory::DnsLookupFailed.as_str(), "dns_lookup_failed" -- cgit v1.2.3 From 49b0df788255848173c01db394a2df29b7c08576 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 11:17:59 +0000 Subject: refactor: simplify parse failure enrichment using log entries and nak Remove --analysis-root flag and external data file dependencies. The script now extracts repo/npub information directly from 'Added rejected announcement' log entries (which include pubkey and identifier fields) and uses `nak encode npub ` to convert hex pubkeys to npub format. This simplification was enabled by the recent logging improvement that added pubkey to the 'Added rejected announcement' log entries. --- .../migration-scripts/30-extract-parse-failures.sh | 332 ++++++--------------- 1 file changed, 94 insertions(+), 238 deletions(-) diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh index f86e9f8..d7f9706 100755 --- a/docs/how-to/migration-scripts/30-extract-parse-failures.sh +++ b/docs/how-to/migration-scripts/30-extract-parse-failures.sh @@ -24,6 +24,12 @@ # --until End date for log extraction (default: now) # --dry-run Show what would be extracted without writing files # +# ENRICHMENT: +# The script automatically enriches parse failures with repo/npub information +# by extracting from "Added rejected announcement" log entries which include +# pubkey and identifier fields. Hex pubkeys are converted to npub format using +# `nak encode npub ` if the nak tool is available. +# # OUTPUT: # /parse-failures.txt # @@ -31,7 +37,7 @@ # event_idkindreasonreponpub # # EXPECTED LOG FORMATS: -# The script looks for two types of log entries: +# The script looks for three types of log entries: # # 1. Structured [PARSE_FAIL] entries: # 2026-01-22T10:30:45Z ngit-grasp[1234]: [PARSE_FAIL] kind=30618 event_id=abc123... reason="invalid refs format" repo=myrepo npub=npub1... @@ -39,13 +45,17 @@ # 2. "Invalid announcement" rejections (write policy): # Event rejected by write policy event_id=abc123... relay=wss://... kind=30617 reason=Invalid announcement: multiple clone tags found... # +# 3. "Added rejected announcement" entries (for enrichment): +# Added rejected announcement to two-tier index event_id=abc123... kind=30617 identifier=myrepo pubkey=hex... +# These entries provide pubkey and identifier for enriching write policy rejections. +# # NOTE: Builder logs ("Rejected repository announcement note1xxx:") are NOT extracted # because they use bech32 (note1) IDs while write policy logs use hex IDs. Extracting # both would cause double-counting since deduplication only works within each format. # Write policy logs contain the same events, so we don't lose any data. # # Required fields: kind, event_id, reason -# Optional fields: repo, npub (may not be available for all entry types) +# Enrichment fields: repo (identifier), npub (converted from hex pubkey) # # DEPENDENCY: # This script requires logging improvements in ngit-grasp to emit structured @@ -127,23 +137,21 @@ usage() { echo "Options:" echo " --since Start date (default: 30 days ago)" echo " --until End date (default: now)" - echo " --analysis-root Filter to only missing announcements from analysis" echo " --dry-run Show what would be extracted without writing" echo "" echo "Examples:" echo " $0 ngit-grasp.service output/logs" echo " $0 ngit-grasp.service output/logs --since '2026-01-01'" echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'" - echo " $0 ngit-grasp.service output/logs --analysis-root /tmp/migration-analysis-20260123" echo "" echo "Expected log formats:" echo " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." echo " Event rejected by write policy event_id=abc123 ... kind=30617 reason=Invalid announcement: ..." echo "" - echo "Filtering with --analysis-root:" - echo " When provided, only parse failures for announcements that are in production" - echo " but missing from the archive will be included. This filters out rejections" - echo " for events from other relays that don't affect the migration." + echo "Enrichment:" + echo " Parse failures are automatically enriched with repo/npub from" + echo " 'Added rejected announcement' log entries. Hex pubkeys are converted" + echo " to npub format using 'nak encode npub' if available." exit 1 } @@ -211,96 +219,52 @@ parse_write_policy_rejection_line() { # the same event to be counted twice. Write policy logs contain the same # events, so we don't lose any data by only extracting from that source. -# Enrich parse failures with repo/npub by looking up event_id in announcements.json +# Enrich parse failures with repo/npub by looking up event_id in "Added rejected announcement" log entries # This is critical because "Invalid announcement" rejections only log event_id and kind, # not the repo name or npub. Without enrichment, Phase 5 shows event_id|kind instead # of repo|npub in action-required.txt, making the output unusable. # # Arguments: # $1 - parse failures file to enrich (modified in place) -# $2 - analysis root directory containing prod/raw/announcements.json +# $2 - lookup file containing event_id -> identifier|pubkey mappings from logs # # The function: -# 1. Builds a lookup table from announcements.json: event_id -> repo|npub +# 1. Uses the lookup table built from "Added rejected announcement" log entries # 2. For each parse failure with empty repo/npub, looks up the event_id # 3. Populates repo and npub columns from the lookup +# 4. Converts hex pubkeys to npub format using `nak encode npub` if available enrich_with_repo_npub() { local parse_failures_file="$1" - local analysis_root="$2" - - local prod_announcements="$analysis_root/prod/raw/announcements.json" - - # Validate required file exists - if [[ ! -f "$prod_announcements" ]]; then - log_warn "Production announcements file not found: $prod_announcements" - log_warn "Skipping enrichment - repo/npub columns will remain empty" - return 0 - fi + local lookup_file="$2" - # Check if jq is available - if ! command -v jq &> /dev/null; then - log_warn "jq not found - cannot enrich parse failures with repo/npub" - log_warn "Install jq or run without --analysis-root" + # Validate lookup file exists and has content + if [[ ! -f "$lookup_file" ]] || [[ ! -s "$lookup_file" ]]; then + log_warn "No enrichment data available - repo/npub columns will remain empty" return 0 fi - log_info "Enriching parse failures with repo/npub from announcements..." - - # Step 1: Build lookup table from announcements.json - # Output format: event_idreponpub - local lookup_file - lookup_file=$(mktemp) - - # Extract id, d-tag (repo identifier), and pubkey from announcements - # Convert pubkey to npub using bech32 encoding - # Note: We use a simple hex-to-npub conversion via external tool if available, - # otherwise we'll use the hex pubkey (Phase 5 can still match on it) - log_info " Building event_id -> repo/npub lookup table..." - - # First, extract the raw data: id, d-tag, pubkey (hex) - jq -r 'select(.kind == 30617) | - .id as $id | - .pubkey as $pubkey | - ((.tags[] | select(.[0] == "d") | .[1]) // "") as $dtag | - "\($id)\t\($dtag)\t\($pubkey)"' "$prod_announcements" > "$lookup_file.raw" 2>/dev/null || { - log_warn "Failed to parse production announcements JSON" - rm -f "$lookup_file" "$lookup_file.raw" - return 0 - } + log_info "Enriching parse failures with repo/npub from log entries..." - # Convert hex pubkeys to npub format - # Check if we have a tool to do bech32 encoding (nak, nostr-tool, etc.) + # Check if we have nak for pubkey->npub conversion local can_convert_npub=false if command -v nak &> /dev/null; then can_convert_npub=true log_info " Using 'nak' for pubkey->npub conversion" + else + log_warn " 'nak' not found - will use hex pubkeys instead of npub" fi - # Process the lookup file, converting pubkeys to npubs if possible - while IFS=$'\t' read -r event_id repo pubkey_hex; do - local npub - if [[ "$can_convert_npub" == true && -n "$pubkey_hex" ]]; then - # Use nak to encode pubkey as npub - npub=$(nak encode npub "$pubkey_hex" 2>/dev/null || echo "") - fi - # Fall back to hex pubkey if conversion failed - [[ -z "$npub" ]] && npub="$pubkey_hex" - printf '%s\t%s\t%s\n' "$event_id" "$repo" "$npub" - done < "$lookup_file.raw" > "$lookup_file" - - rm -f "$lookup_file.raw" - local lookup_count lookup_count=$(wc -l < "$lookup_file") lookup_count="${lookup_count//[^0-9]/}" - log_info " Built lookup table with $lookup_count announcements" + log_info " Lookup table has $lookup_count entries" - # Step 2: Enrich parse failures + # Enrich parse failures local enriched_file enriched_file=$(mktemp) # Copy header lines - grep '^#' "$parse_failures_file" > "$enriched_file" + grep '^#' "$parse_failures_file" > "$enriched_file" 2>/dev/null || true # Process data lines local enriched_count=0 @@ -317,14 +281,21 @@ enrich_with_repo_npub() { continue fi - # Look up event_id in our table + # Look up event_id in our table (format: event_ididentifierpubkey_hex) local lookup_result lookup_result=$(grep "^${event_id}"$'\t' "$lookup_file" 2>/dev/null | head -1 || echo "") if [[ -n "$lookup_result" ]]; then - local looked_up_repo looked_up_npub + local looked_up_repo looked_up_pubkey_hex looked_up_npub looked_up_repo=$(echo "$lookup_result" | cut -f2) - looked_up_npub=$(echo "$lookup_result" | cut -f3) + looked_up_pubkey_hex=$(echo "$lookup_result" | cut -f3) + + # Convert hex pubkey to npub if nak is available + if [[ "$can_convert_npub" == true && -n "$looked_up_pubkey_hex" ]]; then + looked_up_npub=$(nak encode npub "$looked_up_pubkey_hex" 2>/dev/null || echo "$looked_up_pubkey_hex") + else + looked_up_npub="$looked_up_pubkey_hex" + fi # Use looked-up values if original was empty [[ -z "$repo" ]] && repo="$looked_up_repo" @@ -338,160 +309,31 @@ enrich_with_repo_npub() { # Replace original with enriched version mv "$enriched_file" "$parse_failures_file" - # Cleanup - rm -f "$lookup_file" - log_info " Enriched $enriched_count of $total_count parse failures with repo/npub" log_success "Enrichment complete" } -# Filter parse failures to only those for missing announcements -# This is used when --analysis-root is provided to scope results to the migration -# -# Arguments: -# $1 - parse failures file to filter (modified in place) -# $2 - analysis root directory containing comparison/ and prod/ subdirs -# -# The function: -# 1. Reads missing announcements from comparison/complete-prod-missing-archive.txt -# 2. Extracts pubkey/identifier pairs for those announcements -# 3. Reads production announcements from prod/raw/announcements.json -# 4. Gets event IDs for announcements matching the missing pubkey/identifier pairs -# 5. Filters parse failures to only those event IDs -filter_to_missing_announcements() { - local parse_failures_file="$1" - local analysis_root="$2" - - local missing_file="$analysis_root/comparison/complete-prod-missing-archive.txt" - local prod_announcements="$analysis_root/prod/raw/announcements.json" - - # Validate required files exist - if [[ ! -f "$missing_file" ]]; then - log_warn "Missing announcements file not found: $missing_file" - log_warn "Skipping filter - all parse failures will be included" - return 0 - fi - - if [[ ! -f "$prod_announcements" ]]; then - log_warn "Production announcements file not found: $prod_announcements" - log_warn "Skipping filter - all parse failures will be included" - return 0 - fi - - # Check if jq is available - if ! command -v jq &> /dev/null; then - log_warn "jq not found - cannot filter parse failures" - log_warn "Install jq or run without --analysis-root" - return 0 - fi - - log_info "Filtering parse failures to missing announcements only..." - - # Step 1: Extract pubkey/identifier pairs from missing announcements - # Format: identifier | npub | prod=complete | archive=missing - local missing_pairs_file - missing_pairs_file=$(mktemp) - - # Extract identifier and npub, convert npub to hex pubkey for matching - while IFS=' | ' read -r identifier npub rest; do - # Skip empty lines - [[ -z "$identifier" ]] && continue - # Trim whitespace - identifier=$(echo "$identifier" | xargs) - npub=$(echo "$npub" | xargs) - echo "${identifier}|${npub}" - done < "$missing_file" > "$missing_pairs_file" - - local missing_count - missing_count=$(wc -l < "$missing_pairs_file") - missing_count="${missing_count//[^0-9]/}" - log_info " Found $missing_count missing announcements to filter for" - - # Step 2: Get event IDs from production announcements for these pairs - # We need to match on 'd' tag (identifier) and pubkey - local missing_event_ids_file - missing_event_ids_file=$(mktemp) - - # Create a lookup of identifier|npub -> event_id from production announcements - # The JSON has: id, pubkey (hex), tags (array with ["d", identifier]) - log_info " Extracting event IDs from production announcements..." - - # Use jq to extract id, pubkey, and d-tag value, then filter - # Output format: event_id|identifier|pubkey_hex - # Note: The JSON file is NDJSON (newline-delimited), not an array - jq -r 'select(.kind == 30617) | - .id as $id | - .pubkey as $pubkey | - (.tags[] | select(.[0] == "d") | .[1]) as $dtag | - "\($id)|\($dtag)|\($pubkey)"' "$prod_announcements" > "$missing_event_ids_file.all" 2>/dev/null || { - log_warn "Failed to parse production announcements JSON" - rm -f "$missing_pairs_file" "$missing_event_ids_file" "$missing_event_ids_file.all" - return 0 - } - - # Now filter to only event IDs for missing announcements - # We need to convert npub to hex pubkey for comparison - # npub is bech32, pubkey in JSON is hex - # For simplicity, we'll match on identifier only (d-tag) since it should be unique per pubkey - # Actually, we need both because same identifier can exist for different pubkeys - - # Create a set of "identifier|pubkey_hex" to match against - # First, we need to convert npub to hex - but that requires a tool - # Alternative: match on identifier only and accept some false positives - # Better: use the comparison file which has npub, and match against announcements - - # Let's match on identifier only for now (simpler, may have minor false positives) - # Extract just the identifiers from missing announcements - local missing_identifiers_file - missing_identifiers_file=$(mktemp) - cut -d'|' -f1 "$missing_pairs_file" | sort -u > "$missing_identifiers_file" - - # Filter event IDs to only those with matching identifiers - while IFS='|' read -r event_id identifier pubkey_hex; do - if grep -qFx "$identifier" "$missing_identifiers_file"; then - echo "$event_id" - fi - done < "$missing_event_ids_file.all" | sort -u > "$missing_event_ids_file" - - local event_id_count - event_id_count=$(wc -l < "$missing_event_ids_file") - event_id_count="${event_id_count//[^0-9]/}" - log_info " Found $event_id_count event IDs for missing announcements" - - # Step 3: Filter parse failures to only those event IDs - local filtered_file - filtered_file=$(mktemp) - - # Copy header lines - grep '^#' "$parse_failures_file" > "$filtered_file" +# Parse "Added rejected announcement" log entries to build enrichment lookup table +# Input: log line containing "Added rejected announcement to two-tier index" +# Output: TSV line: event_ididentifierpubkey_hex +parse_rejected_announcement_line() { + local line="$1" - # Add a note about filtering - echo "# Filtered to missing announcements only (--analysis-root)" >> "$filtered_file" - echo "# Analysis root: $analysis_root" >> "$filtered_file" - echo "# Missing announcements: $missing_count" >> "$filtered_file" - echo "# Matching event IDs: $event_id_count" >> "$filtered_file" + local event_id identifier pubkey_hex - # Filter data lines - only include if event_id is in our list - local filtered_count=0 - while IFS=$'\t' read -r event_id kind reason repo npub; do - # Skip header lines (already copied) - [[ "$event_id" =~ ^# ]] && continue - - # Check if this event_id is in our missing list - if grep -qFx "$event_id" "$missing_event_ids_file"; then - printf '%s\t%s\t%s\t%s\t%s\n' "$event_id" "$kind" "$reason" "$repo" "$npub" >> "$filtered_file" - filtered_count=$((filtered_count + 1)) - fi - done < "$parse_failures_file" + # Extract event_id=VALUE (hex string) + event_id=$(echo "$line" | grep -oP 'event_id=\K[a-f0-9]+' || echo "") - # Replace original with filtered version - mv "$filtered_file" "$parse_failures_file" + # Extract identifier=VALUE (repo name) + identifier=$(echo "$line" | grep -oP 'identifier=\K[^ ]+' || echo "") - # Cleanup temp files - rm -f "$missing_pairs_file" "$missing_event_ids_file" "$missing_event_ids_file.all" "$missing_identifiers_file" + # Extract pubkey=VALUE (hex string) + pubkey_hex=$(echo "$line" | grep -oP 'pubkey=\K[a-f0-9]+' || echo "") - log_info " Filtered from $(grep -v '^#' "$parse_failures_file" | wc -l | xargs) to $filtered_count parse failures" - log_success "Filtered to parse failures for missing announcements only" + # Only output if we have all required fields + if [[ -n "$event_id" && -n "$identifier" && -n "$pubkey_hex" ]]; then + printf '%s\t%s\t%s\n' "$event_id" "$identifier" "$pubkey_hex" + fi } # Main @@ -509,7 +351,6 @@ main() { since_date=$(date -d "30 days ago" "+%Y-%m-%d" 2>/dev/null || date -v-30d "+%Y-%m-%d" 2>/dev/null || echo "") local until_date="" local dry_run=false - local analysis_root="" # Parse options while [[ $# -gt 0 ]]; do @@ -522,10 +363,6 @@ main() { until_date="$2" shift 2 ;; - --analysis-root) - analysis_root="$2" - shift 2 - ;; --dry-run) dry_run=true shift @@ -640,10 +477,11 @@ main() { log_info "Extracting log entries..." # Create temp files for intermediate results - local temp_stderr temp_parse_fail temp_write_policy_rejection + local temp_stderr temp_parse_fail temp_write_policy_rejection temp_rejected_announcement temp_stderr=$(mktemp) temp_parse_fail=$(mktemp) temp_write_policy_rejection=$(mktemp) + temp_rejected_announcement=$(mktemp) # Extract [PARSE_FAIL] entries directly to temp file (streaming) log_info " Searching for [PARSE_FAIL] entries..." @@ -661,17 +499,25 @@ main() { log_info " Searching for write policy rejections..." eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep 'Invalid announcement' > "$temp_write_policy_rejection" || true + # Extract "Added rejected announcement" entries for enrichment (streaming) + # These contain pubkey and identifier which we use to enrich write policy rejections + log_info " Searching for rejected announcement entries (for enrichment)..." + eval "$journal_cmd" 2>/dev/null | grep 'Added rejected announcement to two-tier index' > "$temp_rejected_announcement" || true + rm -f "$temp_stderr" # Check if we found anything - local parse_fail_line_count write_policy_line_count + local parse_fail_line_count write_policy_line_count rejected_announcement_line_count parse_fail_line_count=$(wc -l < "$temp_parse_fail") parse_fail_line_count="${parse_fail_line_count//[^0-9]/}" write_policy_line_count=$(wc -l < "$temp_write_policy_rejection") write_policy_line_count="${write_policy_line_count//[^0-9]/}" + rejected_announcement_line_count=$(wc -l < "$temp_rejected_announcement") + rejected_announcement_line_count="${rejected_announcement_line_count//[^0-9]/}" log_info " Found $parse_fail_line_count [PARSE_FAIL] log lines" log_info " Found $write_policy_line_count write policy rejection log lines" + log_info " Found $rejected_announcement_line_count rejected announcement log lines (for enrichment)" local total_invalid_announcement_lines=$write_policy_line_count @@ -704,7 +550,7 @@ main() { echo "# This is expected if ngit-grasp logging improvements are not yet deployed." } > "$output_file" - rm -f "$temp_parse_fail" "$temp_write_policy_rejection" + rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_rejected_announcement" log_info "Created empty output file: $output_file" exit 0 fi @@ -753,7 +599,22 @@ main() { local invalid_announcement_count=$write_policy_count - rm -f "$temp_parse_fail" "$temp_write_policy_rejection" + # Build enrichment lookup table from "Added rejected announcement" entries + local enrichment_lookup_file + enrichment_lookup_file=$(mktemp) + + log_info " Building enrichment lookup table..." + if [[ "$rejected_announcement_line_count" -gt 0 ]]; then + while IFS= read -r line; do + local parsed + parsed=$(parse_rejected_announcement_line "$line") + if [[ -n "$parsed" ]]; then + echo "$parsed" >> "$enrichment_lookup_file" + fi + done < "$temp_rejected_announcement" + fi + + rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_rejected_announcement" # Deduplicate by event_id (first column) - keep first occurrence log_info " Deduplicating entries..." @@ -764,17 +625,18 @@ main() { grep -v '^#' "$output_file" | sort -t$'\t' -k1,1 -u >> "$deduped_file" mv "$deduped_file" "$output_file" - # Enrich with repo/npub from announcements.json if analysis root provided + # Deduplicate enrichment lookup table by event_id + if [[ -s "$enrichment_lookup_file" ]]; then + sort -t$'\t' -k1,1 -u "$enrichment_lookup_file" > "$enrichment_lookup_file.deduped" + mv "$enrichment_lookup_file.deduped" "$enrichment_lookup_file" + fi + + # Enrich with repo/npub from "Added rejected announcement" log entries # This is critical for usability - without it, action-required.txt shows # event_id|kind instead of repo|npub, making parse failures unidentifiable - if [[ -n "$analysis_root" ]]; then - enrich_with_repo_npub "$output_file" "$analysis_root" - fi + enrich_with_repo_npub "$output_file" "$enrichment_lookup_file" - # Filter to missing announcements only if analysis root provided - if [[ -n "$analysis_root" ]]; then - filter_to_missing_announcements "$output_file" "$analysis_root" - fi + rm -f "$enrichment_lookup_file" # Count final entries (excluding header lines) local count @@ -789,15 +651,9 @@ main() { log_info "=== Extraction Summary ===" log_info "Service: $service" log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" - if [[ -n "$analysis_root" ]]; then - log_info "Filtered to: missing announcements only" - fi log_success "Extracted $count total entries" log_info " - [PARSE_FAIL] entries: $parse_fail_count" log_info " - Invalid announcement rejections: $invalid_announcement_count" - if [[ -n "$analysis_root" ]]; then - log_info " (filtered from original extraction)" - fi echo "" log_info "Output file: $output_file" -- cgit v1.2.3 From a7d0d574b9788f71e3add39699b3a409c0f2b492 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 12:46:05 +0000 Subject: fix migration script for invalid announcement detection --- .../migration-scripts/30-extract-parse-failures.sh | 319 +++++++++++++-------- 1 file changed, 199 insertions(+), 120 deletions(-) diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh index d7f9706..d762aae 100755 --- a/docs/how-to/migration-scripts/30-extract-parse-failures.sh +++ b/docs/how-to/migration-scripts/30-extract-parse-failures.sh @@ -155,70 +155,107 @@ usage() { exit 1 } -# Parse a [PARSE_FAIL] log line and extract fields -# Input: log line containing [PARSE_FAIL] -# Output: TSV line: event_idkindreasonreponpub -parse_parse_fail_line() { - local line="$1" - - # Extract fields using grep -oP (Perl regex) or awk - # Fields: kind, event_id, reason, repo (optional), npub (optional) - - local kind event_id reason repo npub - - # Extract kind=VALUE - kind=$(echo "$line" | grep -oP 'kind=\K[0-9]+' || echo "") - - # Extract event_id=VALUE (hex string, possibly truncated with ...) - event_id=$(echo "$line" | grep -oP 'event_id=\K[a-f0-9]+' || echo "") - - # Extract reason="VALUE" (quoted string) - reason=$(echo "$line" | grep -oP 'reason="\K[^"]*' || echo "") - - # Extract repo=VALUE (optional, unquoted identifier) - repo=$(echo "$line" | grep -oP 'repo=\K[^ ]+' || echo "") - - # Extract npub=VALUE (optional, npub1... format) - npub=$(echo "$line" | grep -oP 'npub=\K[^ ]+' || echo "") - - # Only output if we have the required fields - if [[ -n "$kind" && -n "$event_id" && -n "$reason" ]]; then - printf '%s\t%s\t%s\t%s\t%s\n' "$event_id" "$kind" "$reason" "$repo" "$npub" - fi -} - -# Parse an "Invalid announcement" rejection log line from write policy -# Input: log line containing "Event rejected by write policy" with "Invalid announcement" -# Output: TSV line: event_idkindreasonreponpub -# Note: repo and npub are empty for these entries (not available in log format) -parse_write_policy_rejection_line() { - local line="$1" - - local kind event_id reason - - # Extract event_id=VALUE (hex string) - event_id=$(echo "$line" | grep -oP 'event_id=\K[a-f0-9]+' || echo "") - - # Extract kind=VALUE - kind=$(echo "$line" | grep -oP 'kind=\K[0-9]+' || echo "") - - # Extract reason=VALUE (everything after "reason=") - # The reason is unquoted and goes to end of line - reason=$(echo "$line" | grep -oP 'reason=\K.*$' || echo "") - - # Only output if we have the required fields - if [[ -n "$kind" && -n "$event_id" && -n "$reason" ]]; then - # repo and npub are empty for invalid announcement entries - printf '%s\t%s\t%s\t\t\n' "$event_id" "$kind" "$reason" - fi -} - +# ============================================================================= +# AWK-BASED BATCH PARSING FUNCTIONS +# ============================================================================= +# These functions use awk for efficient batch processing instead of per-line +# grep calls. This provides ~400x speedup for large log files. +# # NOTE: parse_builder_rejection_line() was removed to fix double-counting bug. # Builder logs use bech32 (note1) IDs while write policy logs use hex IDs. # Since deduplication only works within each format, extracting both caused # the same event to be counted twice. Write policy logs contain the same # events, so we don't lose any data by only extracting from that source. +# Parse [PARSE_FAIL] log lines in batch using awk +# Input: file containing log lines with [PARSE_FAIL] +# Output: TSV lines: event_idkindreasonreponpub +parse_parse_fail_batch() { + local input_file="$1" + awk ' + { + # Extract kind=VALUE + kind = "" + if (match($0, /kind=([0-9]+)/, m)) kind = m[1] + + # Extract event_id=VALUE (hex string) + event_id = "" + if (match($0, /event_id=([a-f0-9]+)/, m)) event_id = m[1] + + # Extract reason="VALUE" (quoted string) + reason = "" + if (match($0, /reason="([^"]*)"/, m)) reason = m[1] + + # Extract repo=VALUE (optional) + repo = "" + if (match($0, /repo=([^ ]+)/, m)) repo = m[1] + + # Extract npub=VALUE (optional) + npub = "" + if (match($0, /npub=([^ ]+)/, m)) npub = m[1] + + # Output if we have required fields + if (kind != "" && event_id != "" && reason != "") { + print event_id "\t" kind "\t" reason "\t" repo "\t" npub + } + } + ' "$input_file" +} + +# Parse "Invalid announcement" rejection log lines in batch using awk +# Input: file containing "Event rejected by write policy" log lines +# Output: TSV lines: event_idkindreason +parse_write_policy_rejection_batch() { + local input_file="$1" + awk ' + { + # Extract event_id=VALUE (hex string) + event_id = "" + if (match($0, /event_id=([a-f0-9]+)/, m)) event_id = m[1] + + # Extract kind=VALUE + kind = "" + if (match($0, /kind=([0-9]+)/, m)) kind = m[1] + + # Extract reason=VALUE (everything after "reason=") + reason = "" + if (match($0, /reason=(.*)$/, m)) reason = m[1] + + # Output if we have required fields (repo and npub are empty) + if (kind != "" && event_id != "" && reason != "") { + print event_id "\t" kind "\t" reason "\t\t" + } + } + ' "$input_file" +} + +# Parse "Added rejected announcement" log lines in batch using awk +# Input: file containing "Added rejected announcement to two-tier index" log lines +# Output: TSV lines: event_ididentifierpubkey_hex +parse_rejected_announcement_batch() { + local input_file="$1" + awk ' + { + # Extract event_id=VALUE (hex string) + event_id = "" + if (match($0, /event_id=([a-f0-9]+)/, m)) event_id = m[1] + + # Extract identifier=VALUE (repo name) + identifier = "" + if (match($0, /identifier=([^ ]+)/, m)) identifier = m[1] + + # Extract pubkey=VALUE (hex string) + pubkey = "" + if (match($0, /pubkey=([a-f0-9]+)/, m)) pubkey = m[1] + + # Output if we have all required fields + if (event_id != "" && identifier != "" && pubkey != "") { + print event_id "\t" identifier "\t" pubkey + } + } + ' "$input_file" +} + # Enrich parse failures with repo/npub by looking up event_id in "Added rejected announcement" log entries # This is critical because "Invalid announcement" rejections only log event_id and kind, # not the repo name or npub. Without enrichment, Phase 5 shows event_id|kind instead @@ -233,6 +270,11 @@ parse_write_policy_rejection_line() { # 2. For each parse failure with empty repo/npub, looks up the event_id # 3. Populates repo and npub columns from the lookup # 4. Converts hex pubkeys to npub format using `nak encode npub` if available +# +# OPTIMIZATION: This function uses batch processing for efficiency: +# - Uses awk for O(n) join instead of per-line grep (O(n*m)) +# - Batches all pubkey->npub conversions in a single nak call +# - This reduces runtime from minutes to seconds for large datasets enrich_with_repo_npub() { local parse_failures_file="$1" local lookup_file="$2" @@ -259,52 +301,98 @@ enrich_with_repo_npub() { lookup_count="${lookup_count//[^0-9]/}" log_info " Lookup table has $lookup_count entries" - # Enrich parse failures + # STEP 1: Extract unique pubkeys that need conversion + # Get pubkeys from lookup file (column 3), deduplicate + local unique_pubkeys_file npub_map_file + unique_pubkeys_file=$(mktemp) + npub_map_file=$(mktemp) + + cut -f3 "$lookup_file" | sort -u > "$unique_pubkeys_file" + local unique_pubkey_count + unique_pubkey_count=$(wc -l < "$unique_pubkeys_file") + unique_pubkey_count="${unique_pubkey_count//[^0-9]/}" + log_info " Converting $unique_pubkey_count unique pubkeys to npub format..." + + # STEP 2: Batch convert all pubkeys to npub in a single nak call + # nak reads hex pubkeys from stdin (one per line) and outputs npubs + if [[ "$can_convert_npub" == true && "$unique_pubkey_count" -gt 0 ]]; then + # Create mapping file: pubkey_hexnpub + # nak encode npub reads from stdin and outputs one npub per line + paste "$unique_pubkeys_file" <(nak encode npub < "$unique_pubkeys_file" 2>/dev/null) > "$npub_map_file" || { + # Fallback: if batch conversion fails, use hex pubkeys + log_warn " Batch npub conversion failed, using hex pubkeys" + awk '{print $1 "\t" $1}' "$unique_pubkeys_file" > "$npub_map_file" + } + else + # No nak available, use hex pubkeys as-is + awk '{print $1 "\t" $1}' "$unique_pubkeys_file" > "$npub_map_file" + fi + + rm -f "$unique_pubkeys_file" + + # STEP 3: Use awk for efficient join (O(n) instead of O(n*m) grep per line) + # This joins parse_failures with lookup_file on event_id, then with npub_map on pubkey local enriched_file enriched_file=$(mktemp) # Copy header lines grep '^#' "$parse_failures_file" > "$enriched_file" 2>/dev/null || true - # Process data lines - local enriched_count=0 - local total_count=0 - while IFS=$'\t' read -r event_id kind reason repo npub; do - # Skip header lines (already copied) - [[ "$event_id" =~ ^# ]] && continue - - total_count=$((total_count + 1)) - - # If repo and npub are already populated, keep them - if [[ -n "$repo" && -n "$npub" ]]; then - printf '%s\t%s\t%s\t%s\t%s\n' "$event_id" "$kind" "$reason" "$repo" "$npub" >> "$enriched_file" - continue - fi - - # Look up event_id in our table (format: event_ididentifierpubkey_hex) - local lookup_result - lookup_result=$(grep "^${event_id}"$'\t' "$lookup_file" 2>/dev/null | head -1 || echo "") + # Use awk to perform the join efficiently + # Input files (order matters for ARGIND): + # 1. npub_map_file: pubkey_hexnpub + # 2. lookup_file: event_ididentifierpubkey_hex + # 3. parse_failures_file: event_idkindreasonreponpub + awk -F'\t' -v OFS='\t' ' + # Track which file we are processing + FNR==1 { file_num++ } - if [[ -n "$lookup_result" ]]; then - local looked_up_repo looked_up_pubkey_hex looked_up_npub - looked_up_repo=$(echo "$lookup_result" | cut -f2) - looked_up_pubkey_hex=$(echo "$lookup_result" | cut -f3) + # First file: npub_map (pubkey_hex -> npub) + file_num==1 { + npub_map[$1] = $2 + next + } + # Second file: lookup (event_id -> identifier, pubkey_hex) + file_num==2 { + lookup_repo[$1] = $2 + lookup_pubkey[$1] = $3 + next + } + # Third file: parse_failures + /^#/ { next } # Skip headers (already copied) + { + event_id = $1 + kind = $2 + reason = $3 + repo = $4 + npub = $5 - # Convert hex pubkey to npub if nak is available - if [[ "$can_convert_npub" == true && -n "$looked_up_pubkey_hex" ]]; then - looked_up_npub=$(nak encode npub "$looked_up_pubkey_hex" 2>/dev/null || echo "$looked_up_pubkey_hex") - else - looked_up_npub="$looked_up_pubkey_hex" - fi + # If repo/npub empty, try to enrich from lookup + if (repo == "" && event_id in lookup_repo) { + repo = lookup_repo[event_id] + } + if (npub == "" && event_id in lookup_pubkey) { + pubkey = lookup_pubkey[event_id] + if (pubkey in npub_map) { + npub = npub_map[pubkey] + } else { + npub = pubkey # Fallback to hex + } + } - # Use looked-up values if original was empty - [[ -z "$repo" ]] && repo="$looked_up_repo" - [[ -z "$npub" ]] && npub="$looked_up_npub" - enriched_count=$((enriched_count + 1)) - fi - - printf '%s\t%s\t%s\t%s\t%s\n' "$event_id" "$kind" "$reason" "$repo" "$npub" >> "$enriched_file" - done < "$parse_failures_file" + print event_id, kind, reason, repo, npub + } + ' "$npub_map_file" "$lookup_file" "$parse_failures_file" >> "$enriched_file" + + rm -f "$npub_map_file" + + # Count enriched entries + local enriched_count total_count + total_count=$(grep -v '^#' "$parse_failures_file" | wc -l) + total_count="${total_count//[^0-9]/}" + # Count entries that have non-empty repo AND npub after enrichment + enriched_count=$(grep -v '^#' "$enriched_file" | awk -F'\t' '$4 != "" && $5 != ""' | wc -l) + enriched_count="${enriched_count//[^0-9]/}" # Replace original with enriched version mv "$enriched_file" "$parse_failures_file" @@ -569,32 +657,29 @@ main() { echo "# Note: repo and npub may be empty for some entries" } > "$output_file" - # Parse [PARSE_FAIL] entries + # Parse [PARSE_FAIL] entries using batch awk processing log_info " Parsing [PARSE_FAIL] entries..." local parse_fail_count=0 if [[ "$parse_fail_line_count" -gt 0 ]]; then - while IFS= read -r line; do - local parsed - parsed=$(parse_parse_fail_line "$line") - if [[ -n "$parsed" ]]; then - echo "$parsed" >> "$output_file" - parse_fail_count=$((parse_fail_count + 1)) - fi - done < "$temp_parse_fail" + parse_parse_fail_batch "$temp_parse_fail" >> "$output_file" + parse_fail_count=$(grep -v '^#' "$output_file" | wc -l) + parse_fail_count="${parse_fail_count//[^0-9]/}" fi - # Parse write policy rejection entries + # Parse write policy rejection entries using batch awk processing log_info " Parsing write policy rejection entries..." local write_policy_count=0 if [[ "$write_policy_line_count" -gt 0 ]]; then - while IFS= read -r line; do - local parsed - parsed=$(parse_write_policy_rejection_line "$line") - if [[ -n "$parsed" ]]; then - echo "$parsed" >> "$output_file" - write_policy_count=$((write_policy_count + 1)) - fi - done < "$temp_write_policy_rejection" + local before_count + before_count=$(grep -v '^#' "$output_file" 2>/dev/null | wc -l || echo "0") + before_count="${before_count//[^0-9]/}" + before_count="${before_count:-0}" + parse_write_policy_rejection_batch "$temp_write_policy_rejection" >> "$output_file" + local after_count + after_count=$(grep -v '^#' "$output_file" 2>/dev/null | wc -l || echo "0") + after_count="${after_count//[^0-9]/}" + after_count="${after_count:-0}" + write_policy_count=$((after_count - before_count)) fi local invalid_announcement_count=$write_policy_count @@ -605,13 +690,7 @@ main() { log_info " Building enrichment lookup table..." if [[ "$rejected_announcement_line_count" -gt 0 ]]; then - while IFS= read -r line; do - local parsed - parsed=$(parse_rejected_announcement_line "$line") - if [[ -n "$parsed" ]]; then - echo "$parsed" >> "$enrichment_lookup_file" - fi - done < "$temp_rejected_announcement" + parse_rejected_announcement_batch "$temp_rejected_announcement" > "$enrichment_lookup_file" fi rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_rejected_announcement" -- cgit v1.2.3 From c9ab6aef228f0a77b2997cfc6bf83d5761ab7e08 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 12:46:26 +0000 Subject: migration analysis add since to limit log download --- .../migration-scripts/run-migration-analysis.sh | 25 ++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/docs/how-to/migration-scripts/run-migration-analysis.sh b/docs/how-to/migration-scripts/run-migration-analysis.sh index b2ca142..089b553 100755 --- a/docs/how-to/migration-scripts/run-migration-analysis.sh +++ b/docs/how-to/migration-scripts/run-migration-analysis.sh @@ -28,6 +28,8 @@ # --archive-git Git base directory for archive (enables Phase 2) # --service Systemd service name for log extraction (enables Phase 4) # --output Output directory (default: work/migration-analysis-YYYYMMDD-HHMM) +# --since Start date for log extraction (default: 30 days ago) +# --until End date for log extraction (default: now) # # PHASE CONTROL: # --skip-phase-1 Skip event fetching (use existing data) @@ -141,6 +143,8 @@ SERVICE_NAME="" OUTPUT_DIR="" DRY_RUN=false CONTINUE_ON_ERROR=false +LOG_SINCE="" +LOG_UNTIL="" # Phase control SKIP_PHASE_1=false @@ -223,6 +227,14 @@ parse_args() { CONTINUE_ON_ERROR=true shift ;; + --since) + LOG_SINCE="$2" + shift 2 + ;; + --until) + LOG_UNTIL="$2" + shift 2 + ;; --help|-h) usage ;; @@ -578,8 +590,17 @@ run_phase_4() { local cmds=() - cmds+=("'$SCRIPT_DIR/30-extract-parse-failures.sh' '$SERVICE_NAME' '$OUTPUT_DIR/logs'") - cmds+=("'$SCRIPT_DIR/31-extract-purgatory-expiry.sh' '$SERVICE_NAME' '$OUTPUT_DIR/logs'") + # Build log extraction options + local log_opts="" + if [[ -n "$LOG_SINCE" ]]; then + log_opts="$log_opts --since '$LOG_SINCE'" + fi + if [[ -n "$LOG_UNTIL" ]]; then + log_opts="$log_opts --until '$LOG_UNTIL'" + fi + + cmds+=("'$SCRIPT_DIR/30-extract-parse-failures.sh' '$SERVICE_NAME' '$OUTPUT_DIR/logs' $log_opts") + cmds+=("'$SCRIPT_DIR/31-extract-purgatory-expiry.sh' '$SERVICE_NAME' '$OUTPUT_DIR/logs' $log_opts") run_phase 4 "Extract Logs (VPS required)" "${cmds[@]}" } -- cgit v1.2.3 From a1573c6018c2e81795dc87d36011604dfed80936 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 13:56:45 +0000 Subject: fix: prevent git protocol errors from triggering naughty list Change protocol error detection to only match WebSocket-specific errors (websocket, invalid frame) instead of generic 'protocol' keyword which was incorrectly catching transient git protocol errors. Git protocol errors like 'fatal: protocol error: bad line length' are transient network issues that should use backoff/retry, not permanent naughty list blocking. Only WebSocket/Nostr protocol violations indicate persistent infrastructure problems. Fixes production false positive: - relay.ngit.dev: git protocol error + remote warning misclassified Add production test cases for git protocol errors and warning combinations. --- src/sync/naughty_list.rs | 77 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 71 insertions(+), 6 deletions(-) diff --git a/src/sync/naughty_list.rs b/src/sync/naughty_list.rs index 60ab949..0abb986 100644 --- a/src/sync/naughty_list.rs +++ b/src/sync/naughty_list.rs @@ -232,12 +232,12 @@ impl NaughtyListTracker { } } - // Protocol errors - if error_lower.contains("websocket") - || error_lower.contains("protocol") - || error_lower.contains("invalid frame") - { - // Exclude connection errors + // Protocol errors - specifically WebSocket/Nostr protocol violations + // Note: We check for "websocket" specifically, NOT generic "protocol" keyword + // because git errors often contain "protocol error" (e.g., "fatal: protocol error: bad line length") + // which are transient network issues, not persistent infrastructure problems. + if error_lower.contains("websocket") || error_lower.contains("invalid frame") { + // Exclude connection errors (transient) if !error_lower.contains("connection") && !error_lower.contains("timeout") && !error_lower.contains("refused") @@ -622,3 +622,68 @@ mod tests { assert_eq!(NaughtyCategory::ProtocolError.as_str(), "protocol_error"); } } + +#[cfg(test)] +mod production_tests { + use super::*; + + /// Production case from relay.ngit.dev - remote warning should not be classified + #[test] + fn test_classify_production_relay_ngit_dev_warning() { + let error = + "remote: warning: unable to access '/root/.config/git/attributes': Permission denied"; + assert_eq!(NaughtyListTracker::classify_error(error), None); + } + + /// Git protocol errors are transient, not persistent infrastructure issues + #[test] + fn test_git_protocol_errors_not_naughty() { + // These are common git protocol errors that should NOT be classified as naughty + let git_protocol_errors = [ + "fatal: protocol error: bad line length character: remo", + "fatal: protocol error: expected old/new/ref, got 'shallow", + "fatal: git upload-pack: protocol error", + "error: protocol error: bad pack header", + "fatal: protocol error: bad band #3", + ]; + + for error in git_protocol_errors { + assert_eq!( + NaughtyListTracker::classify_error(error), + None, + "Git protocol error should not be classified as naughty: {}", + error + ); + } + } + + /// Remote warning followed by git protocol error - both should be filtered/ignored + #[test] + fn test_warning_with_git_protocol_error() { + let error = "remote: warning: unable to access '/root/.config/git/attributes': Permission denied\nfatal: protocol error: bad line length character: remo"; + assert_eq!( + NaughtyListTracker::classify_error(error), + None, + "Warning + git protocol error should not be classified as naughty" + ); + } + + /// WebSocket protocol errors ARE naughty (persistent infrastructure issues) + #[test] + fn test_websocket_errors_still_naughty() { + let websocket_errors = [ + "websocket protocol error", + "websocket handshake failed", + "invalid frame received", + ]; + + for error in websocket_errors { + assert_eq!( + NaughtyListTracker::classify_error(error), + Some(NaughtyCategory::ProtocolError), + "WebSocket error should be classified as protocol_error: {}", + error + ); + } + } +} -- cgit v1.2.3 From 10eab82164bb91236f2afa6b7919d0710609ba7f Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 19:14:43 +0000 Subject: docs: add ngit-relay troubleshooting guide for permission and corruption issues --- docs/how-to/migrate-to-ngit-grasp.md | 314 +++++++++++++++++++++++++++++++++++ 1 file changed, 314 insertions(+) diff --git a/docs/how-to/migrate-to-ngit-grasp.md b/docs/how-to/migrate-to-ngit-grasp.md index 62cad87..abe2191 100644 --- a/docs/how-to/migrate-to-ngit-grasp.md +++ b/docs/how-to/migrate-to-ngit-grasp.md @@ -714,3 +714,317 @@ This section documents the specific configuration and lessons learned from migra 2. **Investigate 5 edge cases**: Manual review of unusual states 3. **Monitor purgatory**: 382 expired entries indicate sync issues to investigate 4. **Plan cutover**: Once re-sync complete, switch DNS/proxy to ngit-grasp + +## ngit-relay Troubleshooting + +This section covers common issues encountered when running ngit-relay in production, including git permission errors and repository corruption. These issues were discovered during the relay.ngit.dev migration and may affect other deployments. + +### Git Permission Denied Errors + +#### Symptoms + +When cloning repositories, you see: + +```bash +$ git clone https://relay.ngit.dev/npub.../repo.git +Cloning into 'repo'... +remote: warning: unable to access '/root/.config/git/attributes': Permission denied +``` + +Or in container logs: + +``` +warning: unable to access '/root/.config/git/attributes': Permission denied +``` + +#### Explanation + +This occurs when: +1. Git operations run as a non-root user (typically `nginx` user, UID 101) +2. Git tries to access `/root/.config/git/attributes` for global git configuration +3. The `/root` directory has permissions `0700` (drwx------), preventing non-root users from traversing into it +4. Even though the `attributes` file itself may be world-readable, the nginx user cannot reach it due to parent directory permissions + +**Root cause:** The container runs git commands via fcgiwrap as the nginx user, but `/root` is only accessible by root. + +#### Quick Fix (Temporary - Does Not Survive Container Restart) + +This fix resolves the issue immediately but will be lost when containers restart: + +```bash +# For each ngit-relay container, exec in and create the git config directory +sudo podman exec sh -c "mkdir -p /root/.config/git && touch /root/.config/git/attributes && chmod 644 /root/.config/git/attributes" + +# Example for specific containers: +sudo podman exec gitnostr-com-ngit-relay sh -c "mkdir -p /root/.config/git && touch /root/.config/git/attributes && chmod 644 /root/.config/git/attributes" + +sudo podman exec relay-ngit-dev-ngit-relay sh -c "mkdir -p /root/.config/git && touch /root/.config/git/attributes && chmod 644 /root/.config/git/attributes" +``` + +**Important:** This fix is temporary and will be lost when the container restarts. For a permanent solution, see the NixOS configuration below. + +#### Permanent Fix (NixOS Configuration) + +For NixOS deployments, add systemd services that automatically fix `/root` permissions after each container start: + +```nix +# In your ngit-relay service configuration (e.g., services/relay-ngit-dev-ngit-relay.nix) + +systemd.services.relay-ngit-dev-fix-root-perms = { + description = "Fix /root permissions in relay.ngit.dev container for git access"; + after = [ "podman-relay-ngit-dev-ngit-relay.service" ]; + requires = [ "podman-relay-ngit-dev-ngit-relay.service" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + ExecStart = "${pkgs.bash}/bin/bash -c 'sleep 5 && ${pkgs.podman}/bin/podman exec relay-ngit-dev-ngit-relay chmod 711 /root'"; + Restart = "on-failure"; + RestartSec = "10s"; + }; +}; +``` + +This changes `/root` permissions from `0700` to `0711`, allowing the nginx user to traverse through `/root` to reach `/root/.config/git/`. + +**Why 711?** +- `7` (owner/root): Full read/write/execute +- `1` (group): Execute only (traverse) +- `1` (other): Execute only (traverse) + +This allows non-root users to traverse through `/root` to access subdirectories, while still protecting `/root` contents from being listed or read. + +#### Verification + +After applying the fix: + +```bash +# Test that cloning works without permission warnings +git clone https://relay.ngit.dev/npub.../repo.git + +# Should clone successfully with no "Permission denied" warnings + +# Verify /root permissions inside container +sudo podman exec relay-ngit-dev-ngit-relay ls -ld /root +# Should show: drwx--x--x (711) + +# Verify nginx user can access git config +sudo podman exec relay-ngit-dev-ngit-relay su -s /bin/sh nginx -c "cat /root/.config/git/attributes" +# Should succeed without "Permission denied" +``` + +### Git Repository Corruption + +#### Symptoms + +When cloning repositories, you see: + +```bash +$ git clone https://relay.ngit.dev/npub.../repo.git +Cloning into 'repo'... +remote: fatal: bad tree object 8b765235809eb27159657eb4c97fb37d21c29bf0 +remote: aborting due to possible repository corruption on the remote side. +fatal: early EOF +fatal: fetch-pack: invalid index-pack output +``` + +Or when running `git fsck` on the server: + +``` +broken link from tree 7d60270e1904c30ae6cef7b465ef842a9f9f63c3 + to tree 8b765235809eb27159657eb4c97fb37d21c29bf0 +missing tree 8b765235809eb27159657eb4c97fb37d21c29bf0 +``` + +#### Explanation + +Repository corruption typically occurs due to: + +1. **Incomplete push operations**: A git push was interrupted mid-transfer, creating a commit that references objects that were never written to disk +2. **Permission issues during push**: The git-receive-pack process couldn't write objects due to permission problems (e.g., files owned by wrong user) +3. **Disk/filesystem issues**: Rare cases of disk errors or filesystem corruption + +**Common pattern:** A commit exists with references to tree objects, but those tree objects are missing from the repository. Sometimes individual blobs (files) exist as "dangling" objects but were never properly linked into the tree structure. + +**Warning signs:** +- HEAD file or objects owned by root when they should be owned by the service user (UID 101) +- Dangling blobs in `git fsck` output +- Recent permission denied errors in logs + +#### How to Fix + +**Step 1: Locate the corrupted repository** + +```bash +# SSH to the server +ssh dc@ngit.dev + +# Find the repository path +# For relay.ngit.dev: /persistent/relay-ngit-dev-ngit-relay/data/repos/npub.../repo.git +# For gitnostr.com: /persistent/gitnostr-com-ngit-relay/data/repos/npub.../repo.git + +cd /persistent/relay-ngit-dev-ngit-relay/data/repos/npub1c03rad0r6q833vh57kyd3ndu2jry30nkr0wepqfpsm05vq7he25slryrnw/axepool.git +``` + +**Step 2: Diagnose the corruption** + +```bash +# Run git fsck to identify missing/corrupted objects +git fsck --full + +# Example output: +# broken link from tree 7d60270e1904c30ae6cef7b465ef842a9f9f63c3 +# to tree 8b765235809eb27159657eb4c97fb37d21c29bf0 +# missing tree 8b765235809eb27159657eb4c97fb37d21c29bf0 +# dangling blob 94490b902c9bceb6f901cd0c7c25b685e3685d87 + +# Check which commit references the missing object +git log --all --oneline | head -10 + +# Inspect the broken commit +git cat-file -p +# This will show which tree is missing +``` + +**Step 3: Attempt automatic repair** + +Try these in order: + +```bash +# Option A: Repack and garbage collect +git gc --aggressive --prune=now + +# Then check if corruption is fixed +git fsck --full + +# Option B: If that doesn't work, try recovering from pack files +git unpack-objects < .git/objects/pack/*.pack +git fsck --full +``` + +**Step 4: Manual reconstruction (if automatic repair fails)** + +If the missing tree object can be reconstructed from dangling blobs: + +```bash +# 1. Identify what should be in the missing tree +# Look at the commit message and nearby commits to understand the structure + +# 2. Find dangling blobs that might belong to the tree +git fsck --full | grep "dangling blob" + +# 3. Examine each dangling blob to identify files +git cat-file -p 94490b902c9bceb6f901cd0c7c25b685e3685d87 + +# 4. Reconstruct the tree manually +# This requires creating a new tree object with the correct structure +# Example (advanced): +git mktree < filename1.rs +100644 blob filename2.rs +EOF +# This outputs a new tree hash + +# 5. Create a new commit with the fixed tree +git commit-tree -p -m "Reconstructed commit message" +# This outputs a new commit hash + +# 6. Update the branch reference +git update-ref refs/heads/ + +# 7. Clean up +git gc --prune=now +``` + +**Step 5: Verify the fix** + +```bash +# Run fsck again - should show no errors +git fsck --full + +# Test clone locally +git clone /path/to/repo.git /tmp/test-clone + +# Test clone via HTTP +git clone https://relay.ngit.dev/npub.../repo.git /tmp/test-clone-http +``` + +**Step 6: Fix ownership and permissions** + +Ensure all repository files are owned by the correct user: + +```bash +# For ngit-relay containers, files should be owned by UID 101 (nginx user) +sudo chown -R 101:101 /persistent/relay-ngit-dev-ngit-relay/data/repos/npub.../repo.git + +# Verify +ls -la /persistent/relay-ngit-dev-ngit-relay/data/repos/npub.../repo.git +``` + +**Step 7: Replicate fix to other instances (if applicable)** + +If you have multiple relay instances (e.g., gitnostr.com and relay.ngit.dev), replicate the fix: + +```bash +# Copy the repaired pack files +sudo cp /persistent/relay-ngit-dev-ngit-relay/data/repos/npub.../repo.git/objects/pack/* \ + /persistent/gitnostr-com-ngit-relay/data/repos/npub.../repo.git/objects/pack/ + +# Update the branch reference +cd /persistent/gitnostr-com-ngit-relay/data/repos/npub.../repo.git +git update-ref refs/heads/ + +# Fix ownership +sudo chown -R 101:101 /persistent/gitnostr-com-ngit-relay/data/repos/npub.../repo.git + +# Clean up +git gc --prune=now +``` + +#### Prevention + +To prevent future corruption: + +1. **Fix permission issues first**: Ensure the permission denied errors are resolved (see previous section) +2. **Monitor for root-owned files**: Files in git repositories should be owned by UID 101, not root +3. **Check disk health**: Run `df -h` and `smartctl` to ensure disk is healthy +4. **Enable git fsck in monitoring**: Periodically run `git fsck` on repositories to catch corruption early + +```bash +# Add to monitoring/cron (example) +find /persistent/*/data/repos -name "*.git" -type d | while read repo; do + echo "Checking $repo" + git -C "$repo" fsck --full 2>&1 | grep -v "^Checking\|^dangling" +done +``` + +#### Real-World Example: axepool.git Corruption + +During the relay.ngit.dev migration, the `axepool.git` repository was corrupted: + +**Problem:** +- Commit `e84518b` referenced tree `8b765235...` (the `src` directory) +- Tree `8b765235...` was missing from the repository +- Blob `94490b90...` (mint_client.rs) existed as a dangling object but wasn't linked + +**Root cause:** +- An incomplete push operation +- Permission issues (HEAD file was owned by root) +- The commit was created but the tree object was never written + +**Solution:** +1. Identified the missing tree should contain: `lib.rs`, `main.rs`, `mint_client.rs` +2. Found the dangling blob `94490b90...` was `mint_client.rs` +3. Reconstructed the `src` tree with all three files +4. Created new commit `e12bc3cf...` with the fixed tree +5. Updated `refs/heads/add-missing-hooks` to point to the new commit +6. Ran `git gc --prune=now` to clean up +7. Replicated fix to gitnostr.com instance + +**Result:** Both relays now clone successfully with all files intact. + +### Additional Resources + +- **ngit-relay repository**: https://github.com/danconwaydev/ngit-relay +- **Git internals documentation**: https://git-scm.com/book/en/v2/Git-Internals-Plumbing-and-Porcelain +- **Podman documentation**: https://docs.podman.io/ -- cgit v1.2.3 From 3a7fa1d1288c28eae0ee58b4c448c672ec3b69c2 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 19:43:41 +0000 Subject: fix: return HTTP 200 with ERR pkt-line for git protocol errors Previously, all git upload-pack/receive-pack failures returned HTTP 500, but the git smart HTTP protocol requires protocol-level errors (like "not our ref") to be returned as HTTP 200 OK with an ERR pkt-line in the response body. Changes: - Add build_git_protocol_error_response() to create HTTP 200 responses with properly formatted ERR pkt-line ("ERR \n") - Add is_git_protocol_error() to detect protocol errors (exit code 128 with stderr content) vs transport errors - Update handle_upload_pack() and handle_receive_pack() to return protocol errors as HTTP 200 with ERR pkt-line - Keep HTTP 500 for actual transport errors (spawn failures, I/O errors, signals) This allows git clients to properly parse and display protocol error messages instead of seeing generic HTTP 500 errors. --- src/git/handlers.rs | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/src/git/handlers.rs b/src/git/handlers.rs index 017eee4..e3a6ad4 100644 --- a/src/git/handlers.rs +++ b/src/git/handlers.rs @@ -99,6 +99,42 @@ pub async fn handle_info_refs( .unwrap()) } +/// Build an HTTP 200 OK response with an ERR pkt-line for git protocol errors. +/// +/// Per the git smart HTTP protocol spec, protocol-level errors (like "not our ref") +/// should be returned as HTTP 200 OK with the error message in pkt-line format: +/// `PKT-LINE("ERR" SP explanation-text)` +/// +/// This allows git clients to properly parse and display the error message. +fn build_git_protocol_error_response( + service: GitService, + error_message: &str, +) -> Response> { + // Format: "ERR \n" + let err_content = format!("ERR {}\n", error_message.trim()); + let err_pktline = PktLine::data(err_content.as_bytes()).encode(); + + Response::builder() + .status(StatusCode::OK) + .header("content-type", service.result_content_type()) + .header("cache-control", "no-cache") + .body(Full::new(Bytes::from(err_pktline))) + .unwrap() +} + +/// Check if a git process failure is a protocol error (vs transport error). +/// +/// Protocol errors are communicated via stderr when git exits with code 128. +/// These should be returned to the client as HTTP 200 with ERR pkt-line. +/// +/// Transport errors (process spawn failures, I/O errors, signals) should +/// remain as HTTP 500 errors. +fn is_git_protocol_error(exit_code: Option, stderr: &[u8]) -> bool { + // Git uses exit code 128 for protocol/usage errors + // If there's stderr content, it's a protocol error message + exit_code == Some(128) && !stderr.is_empty() +} + /// Handle POST /git-upload-pack (clone/fetch) pub async fn handle_upload_pack( repo_path: PathBuf, @@ -150,6 +186,21 @@ pub async fn handle_upload_pack( if !status.success() { let stderr_str = String::from_utf8_lossy(&stderr_output); + + // Check if this is a git protocol error (exit code 128 with stderr) + // Protocol errors should be returned as HTTP 200 with ERR pkt-line + if is_git_protocol_error(status.code(), &stderr_output) { + warn!( + "Git upload-pack protocol error (returning ERR pkt-line): {}", + stderr_str + ); + return Ok(build_git_protocol_error_response( + GitService::UploadPack, + &stderr_str, + )); + } + + // Transport errors (spawn failures, signals, etc.) remain as HTTP 500 error!("Git upload-pack failed: {}", stderr_str); return Err(GitError::GitFailed(status.code())); } @@ -277,6 +328,21 @@ pub async fn handle_receive_pack( if !status.success() { let stderr_str = String::from_utf8_lossy(&stderr_output); + + // Check if this is a git protocol error (exit code 128 with stderr) + // Protocol errors should be returned as HTTP 200 with ERR pkt-line + if is_git_protocol_error(status.code(), &stderr_output) { + warn!( + "Git receive-pack protocol error (returning ERR pkt-line): {}", + stderr_str + ); + return Ok(build_git_protocol_error_response( + GitService::ReceivePack, + &stderr_str, + )); + } + + // Transport errors (spawn failures, signals, etc.) remain as HTTP 500 error!("Git receive-pack failed: {}", stderr_str); return Err(GitError::GitFailed(status.code())); } -- cgit v1.2.3 From 51c331f26ad3c8c422b41267e3695c8f2295510e Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 21:05:50 +0000 Subject: feat: implement OID retry logic for 'not our ref' errors Add retry loop in fetch_oids that handles git's behavior of stopping at the first missing OID. When a 'not our ref' error occurs: - Parse the missing OID from stderr - Remove it from the fetch list and track it as missing - Retry with remaining OIDs until success or all OIDs exhausted This ensures we fetch all available OIDs even when some are missing from the remote, rather than failing the entire batch. Also improves error reporting: - Include URL in all error messages for easier debugging - Log stderr even when domain is already on naughty list --- src/purgatory/sync/context.rs | 163 +++++++++++++++++++++++++----------------- 1 file changed, 96 insertions(+), 67 deletions(-) diff --git a/src/purgatory/sync/context.rs b/src/purgatory/sync/context.rs index 33c2d12..0df8be0 100644 --- a/src/purgatory/sync/context.rs +++ b/src/purgatory/sync/context.rs @@ -361,59 +361,60 @@ impl SyncContext for RealSyncContext { let naughty_list = self.git_naughty_list.clone(); tokio::task::spawn_blocking(move || -> Result> { - // git fetch ... - fetch all OIDs with full history - let mut args = vec!["fetch", &url]; - args.extend(missing_oids.iter().map(|s| s.as_str())); - - let output = Command::new("git") - .args(&args) - .current_dir(&repo_path) - .output(); - - match output { - Ok(result) if result.status.success() => { - // Count how many OIDs we now have - let fetched: Vec = missing_oids - .iter() - .filter(|oid| crate::git::oid_exists(&repo_path, oid)) - .cloned() - .collect(); - - debug!(fetched_count = fetched.len(), "Successfully fetched OIDs"); - - Ok(fetched) + let mut remaining_oids = missing_oids.clone(); + let mut missing_from_remote: Vec = Vec::new(); + + // Retry loop: keep fetching until success or no OIDs left + loop { + if remaining_oids.is_empty() { + // All OIDs were missing from remote + debug!( + url = %url, + missing_count = missing_from_remote.len(), + "All requested OIDs missing from remote" + ); + return Ok(vec![]); } - Ok(result) => { - let stderr = String::from_utf8_lossy(&result.stderr); - - // Extract domain and classify error for naughty list - if let Some(domain) = extract_domain(&url) { - if let Some(category) = NaughtyListTracker::classify_error(&stderr) { - let is_new = naughty_list.record(&domain, category, stderr.to_string()); - - if is_new { - tracing::warn!( - domain = %domain, - category = %category, - error = %stderr, - "Git remote domain added to naughty list" - ); - } else { - debug!( - domain = %domain, - category = %category, - "Git remote domain still on naughty list" - ); - } + + // git fetch ... - fetch all OIDs with full history + let mut args = vec!["fetch".to_string(), url.clone()]; + args.extend(remaining_oids.iter().cloned()); + + let output = Command::new("git") + .args(&args) + .current_dir(&repo_path) + .output(); + + match output { + Ok(result) if result.status.success() => { + // Fetch succeeded - count how many OIDs we now have + let fetched: Vec = missing_oids + .iter() + .filter(|oid| crate::git::oid_exists(&repo_path, oid)) + .cloned() + .collect(); + + if !missing_from_remote.is_empty() { + debug!( + url = %url, + fetched_count = fetched.len(), + missing_count = missing_from_remote.len(), + missing_oids = ?missing_from_remote, + "Fetch completed after retries - some OIDs were missing from remote" + ); + } else { + debug!(fetched_count = fetched.len(), "Successfully fetched OIDs"); } + + return Ok(fetched); } + Ok(result) => { + let stderr = String::from_utf8_lossy(&result.stderr); - // Check for "not our ref" errors and provide a clearer error message - let error_msg = if stderr.contains("upload-pack: not our ref") { - // Parse out the missing OID from stderr (git only reports one at a time) - let missing_oid = stderr - .lines() - .find_map(|line| { + // Check for "not our ref" error - this is retryable + if stderr.contains("upload-pack: not our ref") { + // Parse out the missing OID from stderr + let missing_oid = stderr.lines().find_map(|line| { if line.contains("not our ref") { // Extract the OID from lines like: // "fatal: remote error: upload-pack: not our ref " @@ -423,32 +424,60 @@ impl SyncContext for RealSyncContext { } }); - let total_requested = missing_oids.len(); + if let Some(ref oid) = missing_oid { + // Remove the missing OID and retry with remaining + remaining_oids.retain(|o| o != oid); + missing_from_remote.push(oid.clone()); - if let Some(oid) = missing_oid { - if total_requested > 1 { - // BUG: Git stops at first missing OID, so we don't know if the others exist - // We need retry logic to fetch remaining OIDs individually - tracing::warn!( + debug!( url = %url, missing_oid = %oid, - total_requested = total_requested, - "Git fetch failed on first missing OID - other requested OIDs may exist but were not fetched. Retry logic needed." + remaining_count = remaining_oids.len(), + "OID not found on remote, retrying with remaining OIDs" ); - format!("remote missing oid {} (BUG: {} other oids not attempted)", oid, total_requested - 1) - } else { - format!("remote missing only oid requested: {}", oid) + + continue; // Retry with remaining OIDs + } + } + + // Non-retryable error - record to naughty list and return error + if let Some(domain) = extract_domain(&url) { + if let Some(category) = NaughtyListTracker::classify_error(&stderr) { + let is_new = + naughty_list.record(&domain, category, stderr.to_string()); + + if is_new { + tracing::warn!( + domain = %domain, + category = %category, + error = %stderr, + "Git remote domain added to naughty list" + ); + } else { + debug!( + domain = %domain, + category = %category, + error = %stderr, + "Git fetch failed (domain on naughty list)" + ); + } } - } else { - format!("git fetch failed: {}", stderr) } - } else { - format!("git fetch failed: {}", stderr) - }; - Err(anyhow::anyhow!("{}", error_msg)) + return Err(anyhow::anyhow!( + "git fetch failed for {}: {}", + url, + stderr + )); + } + Err(e) => { + return Err(anyhow::anyhow!( + "git fetch command error for {}: {}", + url, + e + )) + } } - Err(e) => Err(anyhow::anyhow!("git fetch command error: {}", e)), } }) .await -- cgit v1.2.3 From 847acdecb9c28a5307123b9ee685b769a598cfc1 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 21:40:46 +0000 Subject: fix: distinguish 0 OIDs fetched from successful fetch in logging When fetch_oids returns Ok(vec![]) (all requested OIDs missing from remote), the log message now says 'Fetch returned no OIDs (not available on remote)' instead of the misleading 'Fetch succeeded' with oids_fetched=0. --- src/purgatory/sync/functions.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/purgatory/sync/functions.rs b/src/purgatory/sync/functions.rs index 65d29af..2b7e71f 100644 --- a/src/purgatory/sync/functions.rs +++ b/src/purgatory/sync/functions.rs @@ -369,7 +369,7 @@ pub async fn sync_identifier_from_url( throttle_manager.complete_request(&domain); let oids_fetched = match fetch_result { - Ok(fetched) => { + Ok(fetched) if !fetched.is_empty() => { debug!( identifier = %identifier, url = %url, @@ -378,6 +378,14 @@ pub async fn sync_identifier_from_url( ); fetched.len() } + Ok(_) => { + debug!( + identifier = %identifier, + url = %url, + "Fetch returned no OIDs (not available on remote)" + ); + 0 + } Err(e) => { debug!( identifier = %identifier, -- cgit v1.2.3 From 6d920cae2704016869500889a92b358d845b69e1 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 21:42:25 +0000 Subject: improve logging --- src/purgatory/sync/context.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/purgatory/sync/context.rs b/src/purgatory/sync/context.rs index 0df8be0..904f8af 100644 --- a/src/purgatory/sync/context.rs +++ b/src/purgatory/sync/context.rs @@ -403,7 +403,7 @@ impl SyncContext for RealSyncContext { "Fetch completed after retries - some OIDs were missing from remote" ); } else { - debug!(fetched_count = fetched.len(), "Successfully fetched OIDs"); + debug!(url = %url, fetched_count = fetched.len(), "Successfully fetched OIDs"); } return Ok(fetched); @@ -418,7 +418,9 @@ impl SyncContext for RealSyncContext { if line.contains("not our ref") { // Extract the OID from lines like: // "fatal: remote error: upload-pack: not our ref " - line.split("not our ref").nth(1).map(|s| s.trim().to_string()) + line.split("not our ref") + .nth(1) + .map(|s| s.trim().to_string()) } else { None } @@ -464,11 +466,7 @@ impl SyncContext for RealSyncContext { } } - return Err(anyhow::anyhow!( - "git fetch failed for {}: {}", - url, - stderr - )); + return Err(anyhow::anyhow!("git fetch failed for {}: {}", url, stderr)); } Err(e) => { return Err(anyhow::anyhow!( -- cgit v1.2.3 From efc3da477d4edb9d1334718e3e20d197ba711468 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 21:55:34 +0000 Subject: fix: pass actually fetched OIDs to process_newly_available_git_data Previously, sync_identifier_from_url passed all needed OIDs to process_newly_available_git_data, not just the OIDs that were successfully fetched. This caused incorrect logging (new_oids_count would show all needed OIDs, not just fetched ones). While this didn't break functionality (the actual processing uses can_apply_state which checks the repository on disk), it made debugging confusing. Changes: - Rename oids_fetched to fetched_oids and change type from usize to Vec - Return Vec from match arms instead of counts - Pass fetched_oids (not needed_oids) to process_newly_available_git_data - Return fetched_oids.len() at the end This ensures logging accurately reflects which OIDs were actually fetched from the remote. --- src/purgatory/sync/functions.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/purgatory/sync/functions.rs b/src/purgatory/sync/functions.rs index 2b7e71f..9207d58 100644 --- a/src/purgatory/sync/functions.rs +++ b/src/purgatory/sync/functions.rs @@ -368,7 +368,7 @@ pub async fn sync_identifier_from_url( let fetch_result = ctx.fetch_oids(&target_repo, url, &needed_oids).await; throttle_manager.complete_request(&domain); - let oids_fetched = match fetch_result { + let fetched_oids = match fetch_result { Ok(fetched) if !fetched.is_empty() => { debug!( identifier = %identifier, @@ -376,7 +376,7 @@ pub async fn sync_identifier_from_url( oids_fetched = fetched.len(), "Fetch succeeded" ); - fetched.len() + fetched } Ok(_) => { debug!( @@ -384,7 +384,7 @@ pub async fn sync_identifier_from_url( url = %url, "Fetch returned no OIDs (not available on remote)" ); - 0 + vec![] } Err(e) => { debug!( @@ -393,13 +393,13 @@ pub async fn sync_identifier_from_url( error = %e, "Fetch failed" ); - 0 + vec![] } }; // Try to process any events that can now be satisfied - if oids_fetched > 0 { - let new_oids: HashSet = needed_oids.into_iter().collect(); + if !fetched_oids.is_empty() { + let new_oids: HashSet = fetched_oids.iter().cloned().collect(); if let Err(e) = ctx .process_newly_available_git_data(&target_repo, &new_oids) .await @@ -412,7 +412,7 @@ pub async fn sync_identifier_from_url( } } - oids_fetched + fetched_oids.len() } /// Sync git data for an identifier. -- cgit v1.2.3 From 3c1eda5fc9e660d40cadcdef8903aea986fe3242 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Wed, 28 Jan 2026 14:17:30 +0000 Subject: feat(migration): detect when archive git data is ahead of prod Add git ancestry comparison (22-compare-git-data.sh) to determine commit relationships between prod and archive repos. Repos where archive is ahead are now correctly classified as ready-for-migration since ngit-grasp only accepts git data authorized by state events. Previously, repos with different git data were flagged as needs-resync even when archive had newer/better data than prod. --- .../migration-scripts/22-compare-git-data.sh | 390 +++++++++++++++++++++ .../migration-scripts/40-classify-actions.sh | 84 ++++- .../migration-scripts/run-migration-analysis.sh | 16 +- 3 files changed, 481 insertions(+), 9 deletions(-) create mode 100755 docs/how-to/migration-scripts/22-compare-git-data.sh diff --git a/docs/how-to/migration-scripts/22-compare-git-data.sh b/docs/how-to/migration-scripts/22-compare-git-data.sh new file mode 100755 index 0000000..76521d4 --- /dev/null +++ b/docs/how-to/migration-scripts/22-compare-git-data.sh @@ -0,0 +1,390 @@ +#!/usr/bin/env bash +# +# 22-compare-git-data.sh - Compare actual git data between prod and archive relays +# +# PHASE 3c of the GRASP relay to ngit-grasp migration analysis pipeline. +# Compares actual git commits between prod and archive to determine which is ahead. +# +# KEY INSIGHT: +# Archive (ngit-grasp) enforces GRASP - git data ALWAYS matches a state event. +# If archive has different/newer data than prod, it means: +# - A state event authorized those commits at some point +# - Archive is actually MORE up-to-date than prod +# - Migration should use archive data (it's already correct) +# +# USAGE: +# ./22-compare-git-data.sh +# +# EXAMPLES: +# ./22-compare-git-data.sh /var/lib/grasp-relay/git /var/lib/ngit-grasp/git \ +# output/comparison/complete-prod-incomplete-archive.txt output/comparison +# +# INPUT: +# prod-git-base Base directory for prod git repos (e.g., /var/lib/grasp-relay/git) +# archive-git-base Base directory for archive git repos (e.g., /var/lib/ngit-grasp/git) +# repo-list File with repos to compare (format: "repo | npub | ...") +# +# OUTPUT: +# /git-ancestry.tsv - Tab-separated values: +# reponpubrelationshipdetails +# +# Relationship values: +# archive-ahead - Archive has all prod commits plus more (GOOD - use archive) +# in-sync - Both have identical commits +# prod-ahead - Prod has commits archive is missing (needs re-sync) +# diverged - Both have unique commits (manual review) +# archive-only - Only archive has git data +# prod-only - Only prod has git data +# both-empty - Neither has git data +# +# PREREQUISITES: +# - git (for ref comparison) +# - Read access to both git directories (may need sudo) +# +# RUNTIME: Depends on number of repos to compare +# +# SEE ALSO: +# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide +# 21-compare-relays.sh - Phase 3b script that identifies repos to compare +# + +set -euo pipefail + +# Colors for output (disabled if not a terminal) +if [[ -t 1 ]]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[0;33m' + BLUE='\033[0;34m' + NC='\033[0m' +else + RED='' + GREEN='' + YELLOW='' + BLUE='' + NC='' +fi + +log_info() { + echo -e "${BLUE}[INFO]${NC} $*" >&2 +} + +log_success() { + echo -e "${GREEN}[OK]${NC} $*" >&2 +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $*" >&2 +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +log_progress() { + echo -ne "\r${BLUE}[PROGRESS]${NC} $*" >&2 +} + +usage() { + echo "Usage: $0 " + echo "" + echo "Arguments:" + echo " prod-git-base Base directory for prod git repos" + echo " archive-git-base Base directory for archive git repos" + echo " repo-list File with repos to compare (format: 'repo | npub | ...')" + echo " output-dir Directory to store output files" + echo "" + echo "Examples:" + echo " $0 /var/lib/grasp-relay/git /var/lib/ngit-grasp/git \\" + echo " output/comparison/complete-prod-incomplete-archive.txt output/comparison" + echo "" + echo "Output:" + echo " git-ancestry.tsv - TSV with: repo, npub, relationship, details" + exit 1 +} + +# Get all branch refs from a git directory +# Args: $1=git_dir +# Returns: sorted list of "ref_name commit_hash" lines +get_git_refs() { + local git_dir="$1" + + if [[ ! -d "$git_dir" ]]; then + return + fi + + git --git-dir="$git_dir" show-ref --heads 2>/dev/null | sort || true +} + +# Check if commit A is ancestor of commit B +# Args: $1=git_dir, $2=commit_a, $3=commit_b +# Returns: 0 if A is ancestor of B, 1 otherwise +is_ancestor() { + local git_dir="$1" + local commit_a="$2" + local commit_b="$3" + + git --git-dir="$git_dir" merge-base --is-ancestor "$commit_a" "$commit_b" 2>/dev/null +} + +# Compare git data between prod and archive for a single repo +# Args: $1=prod_git_dir, $2=archive_git_dir +# Returns: relationship string +compare_repo_git() { + local prod_git="$1" + local archive_git="$2" + + local prod_exists=false + local archive_exists=false + + [[ -d "$prod_git" ]] && prod_exists=true + [[ -d "$archive_git" ]] && archive_exists=true + + # Handle cases where one or both don't exist + if [[ "$prod_exists" == "false" && "$archive_exists" == "false" ]]; then + echo "both-empty" + return + fi + + if [[ "$prod_exists" == "false" ]]; then + echo "archive-only" + return + fi + + if [[ "$archive_exists" == "false" ]]; then + echo "prod-only" + return + fi + + # Both exist - get refs + local prod_refs archive_refs + prod_refs=$(get_git_refs "$prod_git") + archive_refs=$(get_git_refs "$archive_git") + + # Handle empty refs + if [[ -z "$prod_refs" && -z "$archive_refs" ]]; then + echo "both-empty" + return + fi + + if [[ -z "$prod_refs" ]]; then + echo "archive-only" + return + fi + + if [[ -z "$archive_refs" ]]; then + echo "prod-only" + return + fi + + # Compare refs - check if they're identical + if [[ "$prod_refs" == "$archive_refs" ]]; then + echo "in-sync" + return + fi + + # Refs differ - need to check ancestry + # Strategy: For each branch, check if one is ancestor of the other + # If all archive branches are ahead of or equal to prod branches, archive is ahead + # If all prod branches are ahead of or equal to archive branches, prod is ahead + # Otherwise, they've diverged + + local archive_ahead=true + local prod_ahead=true + local has_common_branch=false + + # Create temporary file to use archive as reference repo for ancestry checks + # We need a repo that has both sets of commits to check ancestry + # Use archive since it's the target and should have the superset + + # Check each prod branch against archive + while read -r prod_hash prod_ref; do + [[ -z "$prod_hash" ]] && continue + + # Get the same branch from archive + local archive_hash + archive_hash=$(echo "$archive_refs" | grep " $prod_ref$" | awk '{print $1}' || echo "") + + if [[ -z "$archive_hash" ]]; then + # Branch exists in prod but not archive - prod has something archive doesn't + # But this could be a deleted branch, so don't immediately say prod is ahead + continue + fi + + has_common_branch=true + + if [[ "$prod_hash" == "$archive_hash" ]]; then + # Same commit - neither ahead for this branch + continue + fi + + # Different commits - check ancestry + # First, try to check if prod is ancestor of archive (archive ahead) + if is_ancestor "$archive_git" "$prod_hash" "$archive_hash" 2>/dev/null; then + # Prod commit is ancestor of archive commit - archive is ahead for this branch + prod_ahead=false + elif is_ancestor "$archive_git" "$archive_hash" "$prod_hash" 2>/dev/null; then + # Archive commit is ancestor of prod commit - prod is ahead for this branch + archive_ahead=false + else + # Neither is ancestor - diverged + archive_ahead=false + prod_ahead=false + fi + done <<< "$prod_refs" + + # Also check for branches only in archive (archive has extra branches) + while read -r archive_hash archive_ref; do + [[ -z "$archive_hash" ]] && continue + + local prod_hash + prod_hash=$(echo "$prod_refs" | grep " $archive_ref$" | awk '{print $1}' || echo "") + + if [[ -z "$prod_hash" ]]; then + # Branch exists in archive but not prod - archive has something prod doesn't + # This means archive is ahead (has extra branches) + prod_ahead=false + fi + done <<< "$archive_refs" + + # Determine final relationship + if [[ "$has_common_branch" == "false" ]]; then + # No common branches - completely different + echo "diverged" + return + fi + + if [[ "$archive_ahead" == "true" && "$prod_ahead" == "false" ]]; then + echo "archive-ahead" + elif [[ "$prod_ahead" == "true" && "$archive_ahead" == "false" ]]; then + echo "prod-ahead" + elif [[ "$archive_ahead" == "true" && "$prod_ahead" == "true" ]]; then + # Both true means all common branches are identical + # But one might have extra branches + echo "in-sync" + else + echo "diverged" + fi +} + +# Main +main() { + if [[ $# -ne 4 ]]; then + usage + fi + + local prod_git_base="$1" + local archive_git_base="$2" + local repo_list="$3" + local output_dir="$4" + + # Validate inputs + if [[ ! -d "$prod_git_base" ]]; then + log_error "Prod git base directory not found: $prod_git_base" + exit 1 + fi + + if [[ ! -d "$archive_git_base" ]]; then + log_error "Archive git base directory not found: $archive_git_base" + exit 1 + fi + + if [[ ! -f "$repo_list" ]]; then + log_error "Repo list file not found: $repo_list" + exit 1 + fi + + log_info "=== Git Data Comparison ===" + log_info "Prod git base: $prod_git_base" + log_info "Archive git base: $archive_git_base" + log_info "Repo list: $repo_list" + log_info "Output: $output_dir" + log_info "Started: $(date)" + echo "" + + # Create output directory + mkdir -p "$output_dir" + + # Output file + local tsv_file="$output_dir/git-ancestry.tsv" + + # Initialize TSV with header + echo -e "repo\tnpub\trelationship\tdetails" > "$tsv_file" + + # Count repos + local total_repos + total_repos=$(grep -c -v '^#' "$repo_list" 2>/dev/null || echo "0") + log_info "Processing $total_repos repos..." + echo "" + + # Counters + local count=0 + local count_archive_ahead=0 + local count_in_sync=0 + local count_prod_ahead=0 + local count_diverged=0 + local count_archive_only=0 + local count_prod_only=0 + local count_both_empty=0 + + # Process each repo + while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do + # Skip comments and empty lines + [[ "$repo" =~ ^# ]] && continue + [[ -z "$repo" ]] && continue + + # Clean up whitespace + repo="${repo// /}" + npub="${npub// /}" + + [[ -z "$repo" || -z "$npub" ]] && continue + + count=$((count + 1)) + + # Build git paths + local prod_git="$prod_git_base/${npub}/${repo}.git" + local archive_git="$archive_git_base/${npub}/${repo}.git" + + # Compare + local relationship details="" + relationship=$(compare_repo_git "$prod_git" "$archive_git") + + # Count by relationship + case "$relationship" in + archive-ahead) count_archive_ahead=$((count_archive_ahead + 1)) ;; + in-sync) count_in_sync=$((count_in_sync + 1)) ;; + prod-ahead) count_prod_ahead=$((count_prod_ahead + 1)) ;; + diverged) count_diverged=$((count_diverged + 1)) ;; + archive-only) count_archive_only=$((count_archive_only + 1)) ;; + prod-only) count_prod_only=$((count_prod_only + 1)) ;; + both-empty) count_both_empty=$((count_both_empty + 1)) ;; + esac + + # Output TSV line + printf '%s\t%s\t%s\t%s\n' "$repo" "$npub" "$relationship" "$details" >> "$tsv_file" + + # Progress indicator every 10 repos + if [[ $((count % 10)) -eq 0 ]]; then + log_progress "Processed $count/$total_repos repos..." + fi + done < "$repo_list" + + # Clear progress line + echo "" >&2 + + # Summary + echo "" + log_info "=== Comparison Summary ===" + log_success "Archive ahead (use archive data): $count_archive_ahead" + log_success "In sync: $count_in_sync" + log_warn "Prod ahead (needs re-sync): $count_prod_ahead" + log_error "Diverged (manual review): $count_diverged" + log_info "Archive only: $count_archive_only" + log_info "Prod only: $count_prod_only" + log_info "Both empty: $count_both_empty" + echo "" + log_info "Total: $count repos" + log_info "Output: $tsv_file" +} + +main "$@" diff --git a/docs/how-to/migration-scripts/40-classify-actions.sh b/docs/how-to/migration-scripts/40-classify-actions.sh index b1348f8..07ae7c9 100755 --- a/docs/how-to/migration-scripts/40-classify-actions.sh +++ b/docs/how-to/migration-scripts/40-classify-actions.sh @@ -10,16 +10,25 @@ # - Empty in prod (prod=cat2, any archive status) # - Archive-only (archive=any, prod=missing) # - Not in prod (purgatory-only, prod=missing) +# - Archive ahead (archive has newer git data than prod - GRASP enforced) # # Tier 2: Action Required (needs-resync.txt) # - Complete in prod, missing from archive (with purgatory context) -# - Complete in prod, incomplete in archive (with purgatory context) +# - Complete in prod, incomplete in archive AND prod is ahead (with purgatory context) # # Tier 3: Manual Investigation (manual-review.txt) # - Partial in prod (prod=cat3) # - No-match in prod (prod=cat4) # - Parse failures # - Conflicting states +# - Diverged git history (both have unique commits) +# +# KEY INSIGHT: +# Archive (ngit-grasp) enforces GRASP - git data ALWAYS matches a state event. +# If archive has different/newer data than prod, it means: +# - A state event authorized those commits at some point +# - Archive is actually MORE up-to-date than prod +# - Migration should use archive data (it's already correct) # # Usage: ./40-classify-actions.sh # @@ -231,6 +240,25 @@ DELETED_COUNT=0 [[ ${#DELETED[@]} -gt 0 ]] && DELETED_COUNT=${#DELETED[@]} log_info "Loaded $DELETED_COUNT deletion entries" +# Build git ancestry lookup: repo|npub -> relationship (archive-ahead, prod-ahead, diverged, etc.) +# This data comes from 22-compare-git-data.sh which compares actual git commits +declare -A GIT_ANCESTRY +GIT_ANCESTRY_COUNT=0 +if [[ -f "$COMPARISON_DIR/git-ancestry.tsv" ]]; then + while IFS=$'\t' read -r repo npub relationship details || [[ -n "$repo" ]]; do + # Skip header and comments + [[ "$repo" == "repo" ]] && continue + [[ "$repo" =~ ^# ]] && continue + [[ -z "$repo" || -z "$npub" ]] && continue + GIT_ANCESTRY["$repo|$npub"]="$relationship" + GIT_ANCESTRY_COUNT=$((GIT_ANCESTRY_COUNT + 1)) + done < "$COMPARISON_DIR/git-ancestry.tsv" + log_info "Loaded $GIT_ANCESTRY_COUNT git ancestry entries" +else + log_warn "No git-ancestry.tsv found - will not check if archive is ahead of prod" + log_warn "Run 22-compare-git-data.sh to enable archive-ahead detection" +fi + # ============================================================================ # Phase 2: Build unique repo list from all sources # ============================================================================ @@ -263,12 +291,14 @@ COUNTS[ready_deleted]=0 COUNTS[ready_empty_prod]=0 COUNTS[ready_archive_only]=0 COUNTS[ready_not_in_prod]=0 +COUNTS[ready_archive_ahead]=0 COUNTS[resync_missing_archive]=0 COUNTS[resync_incomplete_archive]=0 COUNTS[review_partial_prod]=0 COUNTS[review_nomatch_prod]=0 COUNTS[review_parse_failure]=0 COUNTS[review_conflicting]=0 +COUNTS[review_diverged]=0 # Output arrays declare -a READY_LINES @@ -381,14 +411,48 @@ for key in "${!ALL_REPOS[@]}"; do REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in prod with parse failure") COUNTS[review_parse_failure]=$((COUNTS[review_parse_failure] + 1)) else - # Needs resync - include purgatory context - context=$(get_context "$key" "$prod_status" "$archive_status") - if [[ "$archive_cat" == "missing" ]]; then - RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync to archive") - COUNTS[resync_missing_archive]=$((COUNTS[resync_missing_archive] + 1)) + # Check git ancestry to see if archive is actually ahead + local git_relationship="${GIT_ANCESTRY[$key]:-unknown}" + + if [[ "$git_relationship" == "archive-ahead" || "$git_relationship" == "in-sync" ]]; then + # Archive has newer/same git data - this is GOOD + # Archive's git data was authorized by a state event (GRASP enforced) + context=$(get_context "$key" "$prod_status" "$archive_status") + if [[ -n "$context" && "$context" != "none" ]]; then + context="$context, git=$git_relationship" + else + context="git=$git_relationship" + fi + READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive ahead (use archive data)") + COUNTS[ready_archive_ahead]=$((COUNTS[ready_archive_ahead] + 1)) + elif [[ "$git_relationship" == "diverged" ]]; then + # Git histories diverged - needs manual review + context=$(get_context "$key" "$prod_status" "$archive_status") + if [[ -n "$context" && "$context" != "none" ]]; then + context="$context, git=diverged" + else + context="git=diverged" + fi + REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | git histories diverged (manual review)") + COUNTS[review_diverged]=$((COUNTS[review_diverged] + 1)) else - RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync (archive incomplete)") - COUNTS[resync_incomplete_archive]=$((COUNTS[resync_incomplete_archive] + 1)) + # prod-ahead, archive-only, prod-only, both-empty, or unknown + # These need resync - include purgatory context + context=$(get_context "$key" "$prod_status" "$archive_status") + if [[ "$git_relationship" != "unknown" ]]; then + if [[ -n "$context" && "$context" != "none" ]]; then + context="$context, git=$git_relationship" + else + context="git=$git_relationship" + fi + fi + if [[ "$archive_cat" == "missing" ]]; then + RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync to archive") + COUNTS[resync_missing_archive]=$((COUNTS[resync_missing_archive] + 1)) + else + RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync (archive incomplete)") + COUNTS[resync_incomplete_archive]=$((COUNTS[resync_incomplete_archive] + 1)) + fi fi fi fi @@ -498,6 +562,7 @@ fi echo "| Reason | Count |" echo "|--------|-------|" echo "| complete in both prod and archive | ${COUNTS[ready_complete_both]} |" + echo "| archive ahead (has newer git data) | ${COUNTS[ready_archive_ahead]} |" echo "| deleted by user | ${COUNTS[ready_deleted]} |" echo "| empty in prod (user never pushed) | ${COUNTS[ready_empty_prod]} |" echo "| archive-only (not in prod) | ${COUNTS[ready_archive_only]} |" @@ -527,6 +592,7 @@ fi echo "| partial in prod (cat3) | ${COUNTS[review_partial_prod]} |" echo "| no-match in prod (cat4) | ${COUNTS[review_nomatch_prod]} |" echo "| complete in prod with parse failure | ${COUNTS[review_parse_failure]} |" + echo "| git histories diverged | ${COUNTS[review_diverged]} |" echo "" echo "## Input Data Summary" echo "" @@ -571,6 +637,7 @@ echo "" echo "=== Summary ===" echo "Ready for Migration: $TOTAL_READY ($PCT_READY%)" echo " - Complete in both: ${COUNTS[ready_complete_both]}" +echo " - Archive ahead: ${COUNTS[ready_archive_ahead]}" echo " - Deleted by user: ${COUNTS[ready_deleted]}" echo " - Empty in prod: ${COUNTS[ready_empty_prod]}" echo " - Archive-only: ${COUNTS[ready_archive_only]}" @@ -584,6 +651,7 @@ echo "Manual Review: $TOTAL_REVIEW ($PCT_REVIEW%)" echo " - Partial in prod: ${COUNTS[review_partial_prod]}" echo " - No-match in prod: ${COUNTS[review_nomatch_prod]}" echo " - Parse failures: ${COUNTS[review_parse_failure]}" +echo " - Git diverged: ${COUNTS[review_diverged]}" echo "" echo "Total: $TOTAL repos" echo "" diff --git a/docs/how-to/migration-scripts/run-migration-analysis.sh b/docs/how-to/migration-scripts/run-migration-analysis.sh index 089b553..acc5e44 100755 --- a/docs/how-to/migration-scripts/run-migration-analysis.sh +++ b/docs/how-to/migration-scripts/run-migration-analysis.sh @@ -320,7 +320,7 @@ check_prerequisites() { fi # Check scripts exist - for script in 01-fetch-events.sh 10-check-git-sync.sh 20-categorize.sh 21-compare-relays.sh 30-extract-parse-failures.sh 31-extract-purgatory-expiry.sh 40-classify-actions.sh; do + for script in 01-fetch-events.sh 10-check-git-sync.sh 20-categorize.sh 21-compare-relays.sh 22-compare-git-data.sh 30-extract-parse-failures.sh 31-extract-purgatory-expiry.sh 40-classify-actions.sh; do if [[ ! -x "$SCRIPT_DIR/$script" ]]; then log_error "Script not found or not executable: $SCRIPT_DIR/$script" missing=1 @@ -551,6 +551,20 @@ run_phase_3() { fi run_phase 3 "Categorize & Compare (fast)" "${cmds[@]}" + + # Phase 3c: Compare git data between relays (requires git paths) + # This determines if archive is ahead of prod for repos with mismatched state + if [[ -n "$PROD_GIT" && -n "$ARCHIVE_GIT" ]]; then + # Build list of repos to compare: those where prod=complete but archive is not + local repos_to_compare="$OUTPUT_DIR/comparison/complete-prod-incomplete-archive.txt" + if [[ -f "$repos_to_compare" ]] && [[ ! -f "$OUTPUT_DIR/comparison/git-ancestry.tsv" ]]; then + log_info "Running git ancestry comparison (Phase 3c)..." + run_phase 3 "Git Ancestry Comparison" "'$SCRIPT_DIR/22-compare-git-data.sh' '$PROD_GIT' '$ARCHIVE_GIT' '$repos_to_compare' '$OUTPUT_DIR/comparison'" + fi + else + log_warn "Git paths not provided - skipping git ancestry comparison" + log_warn "Without git comparison, repos where archive is ahead will be incorrectly flagged as needing re-sync" + fi } # Phase 4: Extract logs -- cgit v1.2.3 From e9daa340ce1bd215e71d2dc86a81207b7d61df02 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Wed, 28 Jan 2026 15:10:49 +0000 Subject: fix(migration): remove local keyword outside function in classify script --- docs/how-to/migration-scripts/40-classify-actions.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/how-to/migration-scripts/40-classify-actions.sh b/docs/how-to/migration-scripts/40-classify-actions.sh index 07ae7c9..8b61636 100755 --- a/docs/how-to/migration-scripts/40-classify-actions.sh +++ b/docs/how-to/migration-scripts/40-classify-actions.sh @@ -412,7 +412,7 @@ for key in "${!ALL_REPOS[@]}"; do COUNTS[review_parse_failure]=$((COUNTS[review_parse_failure] + 1)) else # Check git ancestry to see if archive is actually ahead - local git_relationship="${GIT_ANCESTRY[$key]:-unknown}" + git_relationship="${GIT_ANCESTRY[$key]:-unknown}" if [[ "$git_relationship" == "archive-ahead" || "$git_relationship" == "in-sync" ]]; then # Archive has newer/same git data - this is GOOD -- cgit v1.2.3 From f148b3a0e4b032c0acf835cda6d2935e19b9f67e Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Wed, 28 Jan 2026 21:00:14 +0000 Subject: feat(purgatory): track event source for filtered expiry logging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add EventSource enum (Direct/Sync) to purgatory entries to distinguish between user-submitted events and sync-fetched events. This enables: - WARN-level logging for direct submissions that expire (user should know) - DEBUG-level logging for sync-fetched expirations (expected behavior) - Source upgrade from Sync→Direct if user submits after sync - Expiry timer reset on source upgrade (fresh 30-min window for user) The source is included in [PURGATORY_EXPIRED] logs as source=direct or source=sync for easy filtering. --- src/nostr/builder.rs | 2 +- src/nostr/policy/state.rs | 2 +- src/purgatory/mod.rs | 206 ++++++++++++++++++++++++++++++++++------------ src/purgatory/types.rs | 30 +++++++ 4 files changed, 187 insertions(+), 53 deletions(-) diff --git a/src/nostr/builder.rs b/src/nostr/builder.rs index 629c111..9211972 100644 --- a/src/nostr/builder.rs +++ b/src/nostr/builder.rs @@ -399,7 +399,7 @@ impl Nip34WritePolicy { // Add to purgatory self.ctx .purgatory - .add_pr(event.clone(), event.id.to_hex(), commit.clone()); + .add_pr(event.clone(), event.id.to_hex(), commit.clone(), is_synced); WritePolicyResult::Reject { status: true, // Client sees OK diff --git a/src/nostr/policy/state.rs b/src/nostr/policy/state.rs index f94f004..52f0483 100644 --- a/src/nostr/policy/state.rs +++ b/src/nostr/policy/state.rs @@ -207,7 +207,7 @@ impl StatePolicy { // (add_state automatically enqueues for background sync) self.ctx .purgatory - .add_state(event.clone(), state.identifier.clone(), event.pubkey); + .add_state(event.clone(), state.identifier.clone(), event.pubkey, is_synced); tracing::info!( "state event added to purgatory: eventid: {}, identifier: {}", diff --git a/src/purgatory/mod.rs b/src/purgatory/mod.rs index 8b75351..d442ad8 100644 --- a/src/purgatory/mod.rs +++ b/src/purgatory/mod.rs @@ -17,7 +17,7 @@ pub mod sync; mod types; pub use helpers::{can_apply_state, can_satisfy_state, extract_refs_from_state, get_unpushed_refs}; -pub use types::{PrPurgatoryEntry, RefPair, RefUpdate, StatePurgatoryEntry}; +pub use types::{EventSource, PrPurgatoryEntry, RefPair, RefUpdate, StatePurgatoryEntry}; use dashmap::DashMap; use nostr_sdk::prelude::*; @@ -58,6 +58,9 @@ struct SerializableStatePurgatoryEntry { created_at_offset_secs: u64, /// Duration offset from saved_at for expires_at expires_at_offset_secs: u64, + /// Source of this event (direct submission vs sync) + #[serde(default)] + source: types::EventSource, } /// Serializable wrapper for `PrPurgatoryEntry` with time offsets. @@ -75,6 +78,9 @@ struct SerializablePrPurgatoryEntry { created_at_offset_secs: u64, /// Duration offset from saved_at for expires_at expires_at_offset_secs: u64, + /// Source of this event (direct submission vs sync) + #[serde(default)] + source: types::EventSource, } /// Serializable purgatory state for disk persistence. @@ -271,11 +277,38 @@ impl Purgatory { /// For sync-triggered events, the SyncManager calls `enqueue_sync_immediate` separately /// to override this delay. /// + /// If an event already exists in purgatory with `Sync` source and the new submission + /// is direct (`!from_sync`), the source is upgraded to `Direct` without extending expiry. + /// /// # Arguments /// * `event` - The state event (kind 30618) to hold /// * `identifier` - The repository identifier from the 'd' tag /// * `author` - The event author's public key - pub fn add_state(&self, event: Event, identifier: String, author: PublicKey) { + /// * `from_sync` - True if this event came from proactive sync (vs user-submitted) + pub fn add_state(&self, event: Event, identifier: String, author: PublicKey, from_sync: bool) { + let source = if from_sync { + types::EventSource::Sync + } else { + types::EventSource::Direct + }; + + // Check if event already exists - if so, potentially upgrade source + if let Some(mut entries) = self.state_events.get_mut(&identifier) { + if let Some(existing) = entries.iter_mut().find(|e| e.event.id == event.id) { + // Upgrade source from Sync to Direct if new submission is direct + if existing.source == types::EventSource::Sync && !from_sync { + existing.source = types::EventSource::Direct; + existing.expires_at = Instant::now() + DEFAULT_EXPIRY; + tracing::debug!( + event_id = %event.id, + identifier = %identifier, + "Upgraded purgatory entry source from Sync to Direct, reset expiry" + ); + } + return; // Event already exists, don't add duplicate + } + } + let now = Instant::now(); let entry = StatePurgatoryEntry { event, @@ -283,6 +316,7 @@ impl Purgatory { author, created_at: now, expires_at: now + DEFAULT_EXPIRY, + source, }; self.state_events @@ -302,11 +336,35 @@ impl Purgatory { /// Automatically enqueues the referenced repository identifier for background sync /// with the default delay (3 minutes), giving time for a git push to arrive. /// + /// If an event already exists in purgatory with `Sync` source and the new submission + /// is direct (`!from_sync`), the source is upgraded to `Direct` without extending expiry. + /// /// # Arguments /// * `event` - The PR event (kind 1617/1618) to hold /// * `event_id` - The event ID (hex string) from the 'e' tag /// * `commit` - The commit SHA from the 'c' tag - pub fn add_pr(&self, event: Event, event_id: String, commit: String) { + /// * `from_sync` - True if this event came from proactive sync (vs user-submitted) + pub fn add_pr(&self, event: Event, event_id: String, commit: String, from_sync: bool) { + let source = if from_sync { + types::EventSource::Sync + } else { + types::EventSource::Direct + }; + + // Check if event already exists - if so, potentially upgrade source + if let Some(mut existing) = self.pr_events.get_mut(&event_id) { + // Upgrade source from Sync to Direct if new submission is direct + if existing.source == types::EventSource::Sync && !from_sync { + existing.source = types::EventSource::Direct; + existing.expires_at = Instant::now() + DEFAULT_EXPIRY; + tracing::debug!( + event_id = %event_id, + "Upgraded PR purgatory entry source from Sync to Direct, reset expiry" + ); + } + return; // Event already exists, don't add duplicate + } + // Extract identifier from the event's `a` tag for sync enqueueing let identifier = crate::git::sync::extract_identifier_from_pr_event(&event); @@ -316,6 +374,7 @@ impl Purgatory { commit, created_at: now, expires_at: now + DEFAULT_EXPIRY, + source, }; self.pr_events.insert(event_id, entry); @@ -329,6 +388,8 @@ impl Purgatory { /// Add a PR placeholder (git data arrived before PR event). /// /// Creates a placeholder entry waiting for the corresponding PR event. + /// Placeholders are always marked as `Direct` source since they originate + /// from git pushes (direct user action). /// /// # Arguments /// * `event_id` - The expected event ID (from git ref name) @@ -340,6 +401,7 @@ impl Purgatory { commit, created_at: now, expires_at: now + DEFAULT_EXPIRY, + source: types::EventSource::Direct, // Git pushes are direct user actions }; self.pr_events.insert(event_id, entry); @@ -626,15 +688,29 @@ impl Purgatory { for entry in entries.iter().filter(|e| e.expires_at <= now) { let npub = entry.author.to_bech32().unwrap_or_else(|_| entry.author.to_hex()); let event_id_short = &entry.event.id.to_hex()[..12]; + let source_str = if entry.source.is_direct() { "direct" } else { "sync" }; // Structured log for migration scripts - tracing::warn!( - "[PURGATORY_EXPIRED] repo={} npub={} event_id={}... kind={} reason=\"git data not received within 30 minutes\"", - identifier, - npub, - event_id_short, - entry.event.kind.as_u16() - ); + // Direct submissions log at WARN, synced events at DEBUG + if entry.source.is_direct() { + tracing::warn!( + "[PURGATORY_EXPIRED] repo={} npub={} event_id={}... kind={} source={} reason=\"git data not received within 30 minutes\"", + identifier, + npub, + event_id_short, + entry.event.kind.as_u16(), + source_str + ); + } else { + tracing::debug!( + "[PURGATORY_EXPIRED] repo={} npub={} event_id={}... kind={} source={} reason=\"git data not received within 30 minutes\"", + identifier, + npub, + event_id_short, + entry.event.kind.as_u16(), + source_str + ); + } self.mark_expired(entry.event.id); } @@ -655,16 +731,18 @@ impl Purgatory { let event_id_str = entry.key().clone(); let event_opt = pr_entry.event.clone(); let commit = pr_entry.commit.clone(); - (event_id_str, event_opt, commit) + let source = pr_entry.source; + (event_id_str, event_opt, commit, source) }) .collect(); let pr_removed = expired_prs.len(); - for (event_id_str, event_opt, commit) in expired_prs { + for (event_id_str, event_opt, commit, source) in expired_prs { // Log structured entry for PR events (not placeholders) if let Some(ref event) = event_opt { let npub = event.pubkey.to_bech32().unwrap_or_else(|_| event.pubkey.to_hex()); let event_id_short = &event.id.to_hex()[..12]; + let source_str = if source.is_direct() { "direct" } else { "sync" }; // Extract ALL repo identifiers from 'a' tags // (PR events can reference multiple repos when there are multiple maintainers) @@ -701,22 +779,37 @@ impl Purgatory { }; // Structured log for migration scripts - log once per repo + // Direct submissions log at WARN, synced events at DEBUG for repo in &repos_to_log { - tracing::warn!( - "[PURGATORY_EXPIRED] repo={} npub={} event_id={}... kind={} commit={} reason=\"git data not received within 30 minutes\"", - repo, - npub, - event_id_short, - event.kind.as_u16(), - &commit[..commit.len().min(12)] - ); + if source.is_direct() { + tracing::warn!( + "[PURGATORY_EXPIRED] repo={} npub={} event_id={}... kind={} commit={} source={} reason=\"git data not received within 30 minutes\"", + repo, + npub, + event_id_short, + event.kind.as_u16(), + &commit[..commit.len().min(12)], + source_str + ); + } else { + tracing::debug!( + "[PURGATORY_EXPIRED] repo={} npub={} event_id={}... kind={} commit={} source={} reason=\"git data not received within 30 minutes\"", + repo, + npub, + event_id_short, + event.kind.as_u16(), + &commit[..commit.len().min(12)], + source_str + ); + } } self.mark_expired(event.id); } else { // Placeholder (git data arrived first, but PR event never came) + // Placeholders are always Direct source (from git push) tracing::debug!( - "[PURGATORY_EXPIRED] placeholder event_id={} commit={} reason=\"PR event not received within 30 minutes\"", + "[PURGATORY_EXPIRED] placeholder event_id={} commit={} source=direct reason=\"PR event not received within 30 minutes\"", &event_id_str[..event_id_str.len().min(12)], &commit[..commit.len().min(12)] ); @@ -869,6 +962,7 @@ impl Purgatory { author: e.author, created_at_offset_secs: created_offset.as_secs(), expires_at_offset_secs: expires_offset.as_secs(), + source: e.source, } }) .collect(); @@ -891,6 +985,7 @@ impl Purgatory { commit: e.commit.clone(), created_at_offset_secs: created_offset.as_secs(), expires_at_offset_secs: expires_offset.as_secs(), + source: e.source, }; pr_events.insert(event_id, serializable); } @@ -992,6 +1087,7 @@ impl Purgatory { author: e.author, created_at, expires_at, + source: e.source, } }) .collect(); @@ -1017,6 +1113,7 @@ impl Purgatory { commit: e.commit, created_at, expires_at, + source: e.source, }; self.pr_events.insert(event_id, entry); @@ -1074,8 +1171,8 @@ mod tests { .sign_with_keys(&keys) .unwrap(); - purgatory.add_state(event.clone(), "test-repo".to_string(), keys.public_key()); - purgatory.add_pr(event, "test-event-id".to_string(), "abc123".to_string()); + purgatory.add_state(event.clone(), "test-repo".to_string(), keys.public_key(), false); + purgatory.add_pr(event, "test-event-id".to_string(), "abc123".to_string(), false); let (state_count, pr_count) = purgatory.count(); assert_eq!(state_count, 1); @@ -1126,7 +1223,7 @@ mod tests { let event = EventBuilder::text_note("state") .sign_with_keys(&keys) .unwrap(); - purgatory.add_state(event, "test-repo".to_string(), keys.public_key()); + purgatory.add_state(event, "test-repo".to_string(), keys.public_key(), false); // Now should have pending events assert!(purgatory.has_pending_events("test-repo")); @@ -1156,7 +1253,7 @@ mod tests { .sign_with_keys(&keys) .unwrap(); - purgatory.add_pr(event, "pr-event-id".to_string(), "commit123".to_string()); + purgatory.add_pr(event, "pr-event-id".to_string(), "commit123".to_string(), false); // Now should have pending events for test-repo assert!(purgatory.has_pending_events("test-repo")); @@ -1221,6 +1318,7 @@ fn test_pr_event_vs_placeholder() { event.clone(), "event-id-1".to_string(), "commit-abc".to_string(), + false, ); // Add a placeholder (no event) @@ -1277,8 +1375,9 @@ fn test_cleanup_removes_expired_entries() { state_event.clone(), "test-repo".to_string(), keys.public_key(), + false, ); - purgatory.add_pr(pr_event, "pr-123".to_string(), "commit-abc".to_string()); + purgatory.add_pr(pr_event, "pr-123".to_string(), "commit-abc".to_string(), false); purgatory.add_pr_placeholder("pr-456".to_string(), "commit-def".to_string()); // Verify entries are there @@ -1325,8 +1424,8 @@ fn test_cleanup_preserves_non_expired_entries() { .unwrap(); // Add fresh entries - purgatory.add_state(state_event, "test-repo".to_string(), keys.public_key()); - purgatory.add_pr(pr_event, "pr-123".to_string(), "commit-abc".to_string()); + purgatory.add_state(state_event, "test-repo".to_string(), keys.public_key(), false); + purgatory.add_pr(pr_event, "pr-123".to_string(), "commit-abc".to_string(), false); // Run cleanup let (state_removed, pr_removed) = purgatory.cleanup(); @@ -1356,8 +1455,8 @@ fn test_cleanup_mixed_expired_and_fresh() { .sign_with_keys(&keys) .unwrap(); - purgatory.add_state(event1, "test-repo".to_string(), keys.public_key()); - purgatory.add_state(event2, "test-repo".to_string(), keys.public_key()); + purgatory.add_state(event1, "test-repo".to_string(), keys.public_key(), false); + purgatory.add_state(event2, "test-repo".to_string(), keys.public_key(), false); // Expire only the first one if let Some(mut entries) = purgatory.state_events.get_mut("test-repo") { @@ -1374,8 +1473,8 @@ fn test_cleanup_mixed_expired_and_fresh() { .sign_with_keys(&keys) .unwrap(); - purgatory.add_pr(pr1, "pr-1".to_string(), "commit-1".to_string()); - purgatory.add_pr(pr2, "pr-2".to_string(), "commit-2".to_string()); + purgatory.add_pr(pr1, "pr-1".to_string(), "commit-1".to_string(), false); + purgatory.add_pr(pr2, "pr-2".to_string(), "commit-2".to_string(), false); // Expire only first PR if let Some(mut entry) = purgatory.pr_events.get_mut("pr-1") { @@ -1407,8 +1506,8 @@ fn test_remove_expired_legacy_method() { .unwrap(); let pr_event = EventBuilder::text_note("pr").sign_with_keys(&keys).unwrap(); - purgatory.add_state(state_event, "repo".to_string(), keys.public_key()); - purgatory.add_pr(pr_event, "pr-id".to_string(), "commit".to_string()); + purgatory.add_state(state_event, "repo".to_string(), keys.public_key(), false); + purgatory.add_pr(pr_event, "pr-id".to_string(), "commit".to_string(), false); // Expire both if let Some(mut entries) = purgatory.state_events.get_mut("repo") { @@ -1442,8 +1541,8 @@ fn test_expired_event_tracking() { let pr_event_id = pr_event.id; // Add events to purgatory - purgatory.add_state(state_event, "repo".to_string(), keys.public_key()); - purgatory.add_pr(pr_event, "pr-id".to_string(), "commit".to_string()); + purgatory.add_state(state_event, "repo".to_string(), keys.public_key(), false); + purgatory.add_pr(pr_event, "pr-id".to_string(), "commit".to_string(), false); // Events should not be marked as expired yet assert!(!purgatory.is_expired(&state_event_id)); @@ -1495,7 +1594,7 @@ fn test_cleanup_expired_events() { let event2_id = event2.id; // Add and immediately expire event1 - purgatory.add_state(event1, "repo1".to_string(), keys.public_key()); + purgatory.add_state(event1, "repo1".to_string(), keys.public_key(), false); if let Some(mut entries) = purgatory.state_events.get_mut("repo1") { for entry in entries.iter_mut() { entry.expires_at = Instant::now() - Duration::from_secs(1); @@ -1504,7 +1603,7 @@ fn test_cleanup_expired_events() { purgatory.cleanup(); // Add and expire event2 (will be more recent) - purgatory.add_state(event2, "repo2".to_string(), keys.public_key()); + purgatory.add_state(event2, "repo2".to_string(), keys.public_key(), false); if let Some(mut entries) = purgatory.state_events.get_mut("repo2") { for entry in entries.iter_mut() { entry.expires_at = Instant::now() - Duration::from_secs(1); @@ -1546,7 +1645,7 @@ fn test_expired_events_prevent_readdition() { let event_id = event.id; // Add event to purgatory - purgatory.add_state(event.clone(), "repo".to_string(), keys.public_key()); + purgatory.add_state(event.clone(), "repo".to_string(), keys.public_key(), false); // Expire it if let Some(mut entries) = purgatory.state_events.get_mut("repo") { @@ -1566,7 +1665,7 @@ fn test_expired_events_prevent_readdition() { // This simulates what negentropy/REQ+EOSE should do: // Check if event is in event_ids() before adding if !ids.contains(&event_id) { - purgatory.add_state(event, "repo".to_string(), keys.public_key()); + purgatory.add_state(event, "repo".to_string(), keys.public_key(), false); } // Event should NOT be re-added @@ -1609,7 +1708,7 @@ fn test_user_can_resubmit_expired_event() { let event_id = event.id; // Add event to purgatory - purgatory.add_state(event.clone(), "repo".to_string(), keys.public_key()); + purgatory.add_state(event.clone(), "repo".to_string(), keys.public_key(), false); // Expire it if let Some(mut entries) = purgatory.state_events.get_mut("repo") { @@ -1658,8 +1757,8 @@ async fn test_save_and_restore_state_events() { let event1_id = event1.id; let event2_id = event2.id; - purgatory.add_state(event1.clone(), "test-repo".to_string(), keys.public_key()); - purgatory.add_state(event2.clone(), "test-repo".to_string(), keys.public_key()); + purgatory.add_state(event1.clone(), "test-repo".to_string(), keys.public_key(), false); + purgatory.add_state(event2.clone(), "test-repo".to_string(), keys.public_key(), false); // Save to disk purgatory.save_to_disk(&state_file).unwrap(); @@ -1721,6 +1820,7 @@ async fn test_save_and_restore_pr_events() { pr_event.clone(), "pr-event-id".to_string(), "commit-abc".to_string(), + false, ); // Save to disk @@ -1790,7 +1890,7 @@ async fn test_save_and_restore_expired_events() { let event_id = event.id; // Add and expire event - purgatory.add_state(event, "repo".to_string(), keys.public_key()); + purgatory.add_state(event, "repo".to_string(), keys.public_key(), false); if let Some(mut entries) = purgatory.state_events.get_mut("repo") { for entry in entries.iter_mut() { entry.expires_at = Instant::now() - Duration::from_secs(1); @@ -1929,7 +2029,7 @@ async fn test_downtime_calculation() { .sign_with_keys(&keys) .unwrap(); - purgatory.add_state(event.clone(), "repo".to_string(), keys.public_key()); + purgatory.add_state(event.clone(), "repo".to_string(), keys.public_key(), false); // Get original expiry time let original_entries = purgatory.find_state("repo"); @@ -1985,7 +2085,7 @@ async fn test_expiry_times_preserved() { .sign_with_keys(&keys) .unwrap(); - purgatory.add_state(event.clone(), "repo".to_string(), keys.public_key()); + purgatory.add_state(event.clone(), "repo".to_string(), keys.public_key(), false); // Manually set expiry to a specific time in the future let custom_expiry = Instant::now() + Duration::from_secs(600); // 10 minutes @@ -2044,16 +2144,19 @@ async fn test_multiple_state_events_same_identifier() { event1.clone(), "shared-repo".to_string(), keys1.public_key(), + false, ); purgatory.add_state( event2.clone(), "shared-repo".to_string(), keys2.public_key(), + false, ); purgatory.add_state( event3.clone(), "shared-repo".to_string(), keys3.public_key(), + false, ); // Save to disk @@ -2100,6 +2203,7 @@ async fn test_mixed_pr_events_and_placeholders() { pr_event.clone(), "pr-with-event".to_string(), "commit-abc".to_string(), + false, ); // Add PR placeholder @@ -2145,7 +2249,7 @@ async fn test_file_cleanup_after_successful_restore() { let event = EventBuilder::text_note("test") .sign_with_keys(&keys) .unwrap(); - purgatory.add_state(event, "repo".to_string(), keys.public_key()); + purgatory.add_state(event, "repo".to_string(), keys.public_key(), false); // Save to disk purgatory.save_to_disk(&state_file).unwrap(); @@ -2179,8 +2283,8 @@ async fn test_comprehensive_roundtrip() { .sign_with_keys(&keys2) .unwrap(); - purgatory.add_state(state1.clone(), "repo1".to_string(), keys1.public_key()); - purgatory.add_state(state2.clone(), "repo2".to_string(), keys2.public_key()); + purgatory.add_state(state1.clone(), "repo1".to_string(), keys1.public_key(), false); + purgatory.add_state(state2.clone(), "repo2".to_string(), keys2.public_key(), false); // Add PR event let tags = vec![Tag::custom( @@ -2191,7 +2295,7 @@ async fn test_comprehensive_roundtrip() { .tags(tags) .sign_with_keys(&keys1) .unwrap(); - purgatory.add_pr(pr_event.clone(), "pr-1".to_string(), "commit-1".to_string()); + purgatory.add_pr(pr_event.clone(), "pr-1".to_string(), "commit-1".to_string(), false); // Add PR placeholder purgatory.add_pr_placeholder("pr-2".to_string(), "commit-2".to_string()); @@ -2201,7 +2305,7 @@ async fn test_comprehensive_roundtrip() { .sign_with_keys(&keys1) .unwrap(); let expired_id = expired_event.id; - purgatory.add_state(expired_event, "repo3".to_string(), keys1.public_key()); + purgatory.add_state(expired_event, "repo3".to_string(), keys1.public_key(), false); if let Some(mut entries) = purgatory.state_events.get_mut("repo3") { for entry in entries.iter_mut() { entry.expires_at = Instant::now() - Duration::from_secs(1); diff --git a/src/purgatory/types.rs b/src/purgatory/types.rs index 919504b..e37a3e1 100644 --- a/src/purgatory/types.rs +++ b/src/purgatory/types.rs @@ -8,6 +8,28 @@ use nostr_sdk::prelude::*; use serde::{Deserialize, Serialize}; use std::time::Instant; +/// Source of an event entering purgatory. +/// +/// Tracks whether an event was submitted directly by a user or fetched via +/// proactive sync from another relay. This distinction is used for: +/// - Filtered logging: Direct submissions log at WARN level, synced at DEBUG +/// - Operational monitoring: Helps identify user-facing issues vs sync noise +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] +pub enum EventSource { + /// Event was published directly to this relay by a user + #[default] + Direct, + /// Event was fetched via proactive sync from another relay + Sync, +} + +impl EventSource { + /// Returns true if this is a direct submission (not synced) + pub fn is_direct(&self) -> bool { + matches!(self, EventSource::Direct) + } +} + /// Default value for Instant fields during deserialization fn instant_now() -> Instant { Instant::now() @@ -86,6 +108,10 @@ pub struct StatePurgatoryEntry { /// Expiry deadline (30 min from creation, may be extended) #[serde(skip, default = "instant_now")] pub expires_at: Instant, + + /// Source of this event (direct submission vs sync) + #[serde(default)] + pub source: EventSource, } /// Entry for a PR event (kind 1617/1618) or placeholder waiting in purgatory. @@ -112,4 +138,8 @@ pub struct PrPurgatoryEntry { /// Expiry deadline (30 min from creation, may be extended) #[serde(skip, default = "instant_now")] pub expires_at: Instant, + + /// Source of this event (direct submission vs sync) + #[serde(default)] + pub source: EventSource, } -- cgit v1.2.3 From 92a9a3bfe0bc522e8ae411991a366a3a6310d525 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 3 Feb 2026 14:41:46 +0000 Subject: docs: archive relay.ngit.dev migration materials for reference Move migration guide and scripts to docs/archive/2026-01-relay-ngit-dev-migration/ with clear warnings that these are reference-only materials from a specific migration context, not general-purpose tools. These materials document the relay.ngit.dev migration from ngit-relay to ngit-grasp in January 2026. The scripts were developed iteratively during the migration and are specific to that context. They are preserved for: - Historical reference - Context for production fixes in this branch - Inspiration for future migrations (not direct reuse) The migration uncovered critical bugs now fixed in this branch: - Git protocol error handling - Naughty list false positives - Purgatory event tracking - Sync startup issues - Configuration management --- .../2026-01-relay-ngit-dev-migration/README.md | 61 ++ .../migration-guide.md | 1030 ++++++++++++++++++++ .../scripts/01-fetch-events.sh | 206 ++++ .../scripts/10-check-git-sync.sh | 564 +++++++++++ .../scripts/20-categorize.sh | 212 ++++ .../scripts/21-compare-relays.sh | 294 ++++++ .../scripts/22-compare-git-data.sh | 390 ++++++++ .../scripts/30-extract-parse-failures.sh | 774 +++++++++++++++ .../scripts/31-extract-purgatory-expiry.sh | 408 ++++++++ .../scripts/40-classify-actions.sh | 662 +++++++++++++ .../scripts/run-migration-analysis.sh | 779 +++++++++++++++ .../scripts/validate-service.sh | 151 +++ docs/how-to/README.md | 12 - docs/how-to/migrate-to-ngit-grasp.md | 1030 -------------------- docs/how-to/migration-scripts/01-fetch-events.sh | 206 ---- docs/how-to/migration-scripts/10-check-git-sync.sh | 564 ----------- docs/how-to/migration-scripts/20-categorize.sh | 212 ---- docs/how-to/migration-scripts/21-compare-relays.sh | 294 ------ .../migration-scripts/22-compare-git-data.sh | 390 -------- .../migration-scripts/30-extract-parse-failures.sh | 774 --------------- .../31-extract-purgatory-expiry.sh | 408 -------- .../migration-scripts/40-classify-actions.sh | 662 ------------- .../migration-scripts/run-migration-analysis.sh | 779 --------------- docs/how-to/migration-scripts/validate-service.sh | 151 --- 24 files changed, 5531 insertions(+), 5482 deletions(-) create mode 100644 docs/archive/2026-01-relay-ngit-dev-migration/README.md create mode 100644 docs/archive/2026-01-relay-ngit-dev-migration/migration-guide.md create mode 100755 docs/archive/2026-01-relay-ngit-dev-migration/scripts/01-fetch-events.sh create mode 100755 docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh create mode 100755 docs/archive/2026-01-relay-ngit-dev-migration/scripts/20-categorize.sh create mode 100755 docs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh create mode 100755 docs/archive/2026-01-relay-ngit-dev-migration/scripts/22-compare-git-data.sh create mode 100755 docs/archive/2026-01-relay-ngit-dev-migration/scripts/30-extract-parse-failures.sh create mode 100755 docs/archive/2026-01-relay-ngit-dev-migration/scripts/31-extract-purgatory-expiry.sh create mode 100755 docs/archive/2026-01-relay-ngit-dev-migration/scripts/40-classify-actions.sh create mode 100755 docs/archive/2026-01-relay-ngit-dev-migration/scripts/run-migration-analysis.sh create mode 100755 docs/archive/2026-01-relay-ngit-dev-migration/scripts/validate-service.sh delete mode 100644 docs/how-to/migrate-to-ngit-grasp.md delete mode 100755 docs/how-to/migration-scripts/01-fetch-events.sh delete mode 100755 docs/how-to/migration-scripts/10-check-git-sync.sh delete mode 100755 docs/how-to/migration-scripts/20-categorize.sh delete mode 100755 docs/how-to/migration-scripts/21-compare-relays.sh delete mode 100755 docs/how-to/migration-scripts/22-compare-git-data.sh delete mode 100755 docs/how-to/migration-scripts/30-extract-parse-failures.sh delete mode 100755 docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh delete mode 100755 docs/how-to/migration-scripts/40-classify-actions.sh delete mode 100755 docs/how-to/migration-scripts/run-migration-analysis.sh delete mode 100755 docs/how-to/migration-scripts/validate-service.sh diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/README.md b/docs/archive/2026-01-relay-ngit-dev-migration/README.md new file mode 100644 index 0000000..424067c --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/README.md @@ -0,0 +1,61 @@ +# relay.ngit.dev Migration Archive (January 2026) + +**Status:** Reference only - not maintained + +This directory contains the migration guide and scripts used during the +relay.ngit.dev migration from ngit-relay to ngit-grasp in January 2026. + +## ⚠️ Important + +These materials are **archived for reference only**: + +- **Scripts are specific to the relay.ngit.dev migration context** +- **Not designed for general use or other migrations** +- **May not work without modification** +- **Not maintained or supported** + +Do not expect these scripts to work out of the box for your migration. + +## What's Here + +- `migration-guide.md` - Lessons learned, approach, and context from the actual migration +- `scripts/` - Analysis and validation scripts used during the migration process + +## Why Archive This? + +The relay.ngit.dev migration uncovered numerous bugs and edge cases that resulted +in critical production fixes. See commits in the `4bc5-relay-ngit-dev-migration-v2` +branch for details. + +These materials document: + +- Real-world migration challenges encountered +- Debugging approaches that worked in practice +- Context for production fixes merged from this branch +- Iterative script development during active migration + +## Using This as Reference + +If you're planning a migration to ngit-grasp: + +1. **Read the migration guide** for conceptual approach and lessons learned +2. **Review the scripts** to understand what kinds of analysis were needed +3. **Expect to write your own scripts** tailored to your specific context +4. **Test extensively** in a non-production environment first + +These materials show what was needed for one specific migration, not a +general-purpose migration toolkit. + +## Context + +This migration was completed in January 2026 and resulted in relay.ngit.dev +running ngit-grasp in production. The branch containing these materials also +includes critical fixes for: + +- Git protocol error handling +- Naughty list false positives +- Purgatory event tracking +- Sync startup issues +- Configuration management + +Those fixes are now part of the main codebase. diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/migration-guide.md b/docs/archive/2026-01-relay-ngit-dev-migration/migration-guide.md new file mode 100644 index 0000000..abe2191 --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/migration-guide.md @@ -0,0 +1,1030 @@ +# Migrate to ngit-grasp from another GRASP implementation + +This guide walks you through migrating a production GRASP relay to ngit-grasp. The process involves analyzing your existing data to identify repositories that need attention before switching over. + +## Compatibility + +This migration process works with any GRASP implementation that: + +- Stores git data in the `/.git` directory structure +- Uses standard GRASP events (kind 30617 announcements, kind 30618 state, kind 5 deletions) +- Exposes a Nostr relay WebSocket endpoint + +**Known compatible implementations:** +- ngit-relay (reference implementation) +- ngit-grasp (when migrating between instances or from archive mode) +- Other GRASP-compliant relays following the specification + +The migration scripts analyze Nostr events and git data directly, making them implementation-agnostic. + +## Quick Start + +Run the migration analysis with a single command: + +```bash +# Basic analysis (fetches events, compares relays) +./docs/how-to/migration-scripts/run-migration-analysis.sh \ + --prod-relay wss://source-relay.example.com \ + --archive-relay wss://target-relay.example.com + +# Full analysis (includes git sync check - run on VPS) +./docs/how-to/migration-scripts/run-migration-analysis.sh \ + --prod-relay wss://source-relay.example.com \ + --archive-relay wss://target-relay.example.com \ + --prod-git /var/lib/grasp-relay/git \ + --archive-git /var/lib/ngit-grasp/git \ + --service ngit-grasp.service +``` + +The script produces three output files: +- `results/no-action-required.txt` - Repos ready for migration +- `results/action-required.txt` - Repos needing intervention +- `results/manual-investigation.txt` - Repos needing human review + +See [Running the Analysis](#running-the-analysis) for detailed options. + +## Prerequisites + +### Required Tools + +- **nak** - Nostr Army Knife for fetching events ([install](https://github.com/fiatjaf/nak)) +- **jq** - JSON processing (install via package manager) + +### For Full Analysis (VPS) + +- SSH access to the VPS running your source relay +- Read access to git data directories +- Access to systemd journal (for log extraction) + +### Verify Installation + +```bash +# Check required tools +nak --version +jq --version +git --version + +# Check optional tools (for VPS phases) +journalctl --version +``` + +## Gotchas and Common Issues + +Before running the analysis, be aware of these common issues discovered during real migrations: + +### Git Must Be Installed + +The analysis scripts require `git` to be installed and in PATH. This may not be present on minimal VPS installations. + +```bash +# Check if git is available +which git || echo "Git not found - install it first" + +# Install on Debian/Ubuntu +apt install git + +# Install on NixOS (add to configuration.nix) +environment.systemPackages = [ pkgs.git ]; +``` + +### Archive Relay May Only Be Accessible Locally + +If your archive relay is configured to listen only on localhost (e.g., `ws://localhost:7443`), you must run the analysis **on the VPS itself**, not from a remote machine. + +```bash +# Check if archive relay is accessible +# This will fail if run remotely against a localhost-only relay +nak req -k 30618 --limit 1 ws://localhost:7443 + +# Solution: SSH into the VPS and run analysis there +ssh user@your-vps +cd /path/to/scripts +./run-migration-analysis.sh --archive-relay ws://localhost:7443 ... +``` + +### Git Data Paths May Differ from Defaults + +Different deployments store git data in different locations. **Always verify paths before running the analysis.** + +```bash +# Find actual git data paths from service configuration +systemctl cat ngit-relay.service | grep -E 'ExecStart|WorkingDirectory|Environment' +systemctl cat ngit-grasp-*.service | grep -E 'ExecStart|WorkingDirectory|Environment' + +# Common locations: +# - /var/lib/ngit-relay/git (default) +# - /var/lib/ngit-grasp/git (default) +# - /persistent/*/data/repos (custom deployments) + +# Verify the path exists and contains expected structure +ls /path/to/git/npub1*/ # Should show *.git directories +``` + +### Phase 4 Needs the Correct Service Name + +> **CRITICAL:** Phase 4 extracts structured logs (`[PARSE_FAIL]`, `[PURGATORY_EXPIRED]`, `Invalid announcement` rejections) from journald. These logs **ONLY exist in ngit-grasp services**, NOT in ngit-relay services. + +If you specify an ngit-relay service (like `ngit-relay.service`), Phase 4 will find **zero logs** and produce empty results. This is a common mistake that wastes time and produces misleading analysis. + +**Correct service names (ngit-grasp):** +- `ngit-grasp.service` +- `ngit-grasp-relay-ngit-dev.service` (NixOS multi-instance) +- `ngit-grasp-archive.service` + +**Incorrect service names (ngit-relay - NO structured logging):** +- `ngit-relay.service` +- `relay-ngit-dev.service` + +```bash +# Find all ngit-related services +systemctl list-units 'ngit-*' --all + +# Check which service has structured logging (should be ngit-grasp) +journalctl -u ngit-grasp-*.service | grep -E '\[PARSE_FAIL\]|\[PURGATORY_EXPIRED\]|Invalid announcement' | head -5 + +# Verify ngit-relay does NOT have structured logging +journalctl -u ngit-relay.service | grep -E '\[PARSE_FAIL\]|\[PURGATORY_EXPIRED\]|Invalid announcement' | head -5 +# ^ This should return nothing + +# Use the archive service name for Phase 4 +./run-migration-analysis.sh ... --service ngit-grasp-relay-ngit-dev.service +``` + +The migration scripts now validate the service name and will **error** if you specify an ngit-relay service, preventing this common mistake. + +### Permission Issues with Service-Owned Directories + +Git data directories are typically owned by the service user and may require elevated permissions to read. + +```bash +# Check directory permissions +ls -la /var/lib/ngit-grasp/git + +# Options: +# 1. Run as root/sudo +sudo ./run-migration-analysis.sh ... + +# 2. Run as the service user +sudo -u ngit-grasp ./run-migration-analysis.sh ... + +# 3. Add your user to the service group +sudo usermod -aG ngit-grasp $USER +# (logout/login required) +``` + +### Service Names Vary by Deployment + +NixOS multi-instance deployments use service names like `ngit-grasp-.service`. Always check actual service names. + +```bash +# List all ngit services +systemctl list-units 'ngit-*' --all --no-pager + +# Example output: +# ngit-relay.service loaded active running ngit-relay +# ngit-grasp-relay-ngit-dev.service loaded active running ngit-grasp (relay-ngit-dev) +``` + +## Migration Overview + +The migration process has three stages: + +### Stage 1: Deploy Archive Instance + +Deploy ngit-grasp alongside your production relay: + +1. Configure ngit-grasp with: + - `domain` set to `.internal` (temporary) + - `archiveService` set to your production domain + - Running on a different port + +2. Let it sync for ~1 hour to gather all events and git data + +### Stage 2: Analyze Data + +Run the migration analysis to identify: +- Repositories successfully migrated (no action needed) +- Repositories with incomplete data (need investigation) +- Repositories with parse failures (may need re-announcement) + +### Stage 3: Switch Over + +Once all issues are resolved: +1. Set `domain` to your production URL +2. Disable archive mode +3. Update your reverse proxy to point to ngit-grasp + +## Running the Analysis + +### Before You Start + +**Verify paths and service names** before running the analysis. Incorrect paths are the most common source of errors. + +```bash +# 1. Find actual git data paths +systemctl cat ngit-relay.service | grep -E 'ExecStart|data|git' +systemctl cat ngit-grasp-*.service | grep -E 'ExecStart|data|git' + +# 2. Find service names +systemctl list-units 'ngit-*' --all --no-pager + +# 3. Verify git data exists at the paths +ls /path/to/prod/git/npub1*/ | head -5 +ls /path/to/archive/git/npub1*/ | head -5 + +# 4. Check if archive relay is accessible +nak req -k 30618 --limit 1 ws://localhost:7443 # or your archive URL +``` + +### Basic Usage + +```bash +# Preview what will happen (dry run) +./run-migration-analysis.sh \ + --prod-relay wss://source-relay.example.com \ + --archive-relay wss://target-relay.example.com \ + --dry-run + +# Run the analysis +./run-migration-analysis.sh \ + --prod-relay wss://source-relay.example.com \ + --archive-relay wss://target-relay.example.com +``` + +### Full Analysis on VPS + +**Important:** If your archive relay is localhost-only, you must run this on the VPS. + +```bash +# First, discover your actual paths (see "Before You Start" above) +# Then run with the correct values: + +./run-migration-analysis.sh \ + --prod-relay wss://source-relay.example.com \ + --archive-relay ws://localhost:7443 \ + --prod-git /path/to/prod/git \ + --archive-git /path/to/archive/git \ + --service ngit-grasp-your-instance.service +``` + +### Phase Control + +Skip or run specific phases: + +```bash +# Skip Phase 2 (use cached git sync data) +./run-migration-analysis.sh ... --skip-phase-2 + +# Run only Phase 1 (fetch events) +./run-migration-analysis.sh ... --only-phase-1 + +# Resume from Phase 3 (using existing data) +./run-migration-analysis.sh ... --from-phase-3 --output work/migration-analysis-20260122-1430 +``` + +### All Options + +| Option | Description | +|--------|-------------| +| `--prod-relay ` | Source relay WebSocket URL (required) | +| `--archive-relay ` | Target relay WebSocket URL (required) | +| `--prod-git ` | Git base directory for prod (enables Phase 2) | +| `--archive-git ` | Git base directory for archive (enables Phase 2) | +| `--service ` | Systemd service name for Phase 4 log extraction. **MUST be an ngit-grasp service** (not ngit-relay). Structured logging only exists in ngit-grasp. | +| `--output ` | Output directory (default: auto-generated) | +| `--skip-phase-N` | Skip phase N (1-5) | +| `--only-phase-N` | Run only phase N | +| `--from-phase-N` | Start from phase N | +| `--dry-run` | Show what would be executed | +| `--continue-on-error` | Continue even if a phase fails | + +## Understanding Results + +### Summary File + +The `results/summary.txt` file provides an overview: + +``` +## Overview + +| Category | Count | Percentage | +|----------|-------|------------| +| No Action Required | 450 | 85.7% | +| Action Required | 52 | 9.9% | +| Manual Investigation | 23 | 4.4% | +``` + +### No Action Required + +Repositories in `no-action-required.txt` are ready for migration: + +``` +myrepo | npub1abc... | complete in both prod and archive +oldrepo | npub1def... | deleted by user +testrepo | npub1ghi... | empty/blank in both (user never pushed) +``` + +**Common reasons:** +- `complete in both prod and archive` - Successfully migrated +- `deleted by user` - User requested deletion (kind 5 event) +- `empty/blank in both` - No git data was ever pushed +- `purgatory expired` - System already handled the timeout + +### Action Required + +Repositories in `action-required.txt` need intervention: + +``` +myrepo | npub1abc... | complete in prod, missing from archive | trigger re-sync or investigate +otherrepo | npub1def... | incomplete in both (prod=cat3, archive=cat2) | investigate git data source +``` + +**Common actions:** +- **Re-sync needed**: Trigger the archive to re-fetch from the source +- **Wait for sync**: Archive sync may still be in progress +- **Investigate git source**: Original git data may be incomplete +- **Fix parse failure**: Event format issue, may need re-announcement + +### Manual Investigation + +Repositories in `manual-investigation.txt` have unusual states: + +``` +weirdrepo | npub1abc... | in archive (cat1) but not in prod | may be new announcement or deleted from prod +conflictrepo | npub1def... | complete in prod, missing from archive, parse failure logged | investigate parse failure +``` + +These require human judgment to determine the correct action. + +## Troubleshooting + +### "nak not found" + +Install nak from https://github.com/fiatjaf/nak: + +```bash +# Using Go +go install github.com/fiatjaf/nak@latest + +# Or download binary from releases +``` + +### "git not found" + +Git must be installed and in PATH: + +```bash +# Check if git is available +which git + +# Install on Debian/Ubuntu +sudo apt install git + +# Install on NixOS (add to configuration.nix) +environment.systemPackages = [ pkgs.git ]; +``` + +### "Permission denied" on git directories + +Run with sudo or ensure your user has read access: + +```bash +# Check permissions +ls -la /var/lib/grasp-relay/git + +# Option 1: Run with sudo +sudo ./run-migration-analysis.sh ... + +# Option 2: Run as service user +sudo -u ngit-grasp ./run-migration-analysis.sh ... +``` + +### Archive relay connection failed + +If you get connection errors to the archive relay: + +```bash +# Check if relay is running +systemctl status ngit-grasp-*.service + +# Check if it's localhost-only +# If archive is ws://localhost:7443, you MUST run on the VPS +ssh user@your-vps +./run-migration-analysis.sh --archive-relay ws://localhost:7443 ... +``` + +### Wrong git paths / "No such file or directory" + +Git data paths vary by deployment. Discover the actual paths: + +```bash +# Find paths from service configuration +systemctl cat ngit-relay.service | grep -E 'ExecStart|WorkingDirectory|Environment' +systemctl cat ngit-grasp-*.service | grep -E 'ExecStart|WorkingDirectory|Environment' + +# Verify the path contains git repos +ls /discovered/path/npub1*/ +``` + +### Phase 2 takes too long + +The git sync check processes each repository individually (~20 minutes total). To speed up iteration: + +1. Run Phase 2 once and save the output +2. Use `--skip-phase-2` for subsequent runs +3. Use `--from-phase-3` to re-run classification with existing data + +### No parse failures found + +This is expected if: +- ngit-grasp logging improvements aren't deployed yet +- No events actually failed to parse + +The analysis will continue without log data. + +### Phase 4 finds no structured logs + +**Symptom:** Phase 4 completes but `parse-failures.txt` and `purgatory-expired.txt` are empty or contain only header comments. + +**Most common cause:** You're querying the wrong service (ngit-relay instead of ngit-grasp). + +Structured logging (`[PARSE_FAIL]`, `[PURGATORY_EXPIRED]`, `Invalid announcement` rejections) **only exists in ngit-grasp services**. If you specify an ngit-relay service, Phase 4 will find zero logs. + +**How to diagnose:** + +```bash +# 1. Check what service you configured +cat /path/to/output/config.txt | grep SERVICE_NAME + +# 2. If it contains "ngit-relay", that's the problem! +# ngit-relay does NOT have structured logging + +# 3. Find the correct ngit-grasp service +systemctl list-units 'ngit-grasp*' --all + +# 4. Verify the ngit-grasp service has structured logs +journalctl -u ngit-grasp-relay-ngit-dev.service --since "7 days ago" | \ + grep -E '\[PARSE_FAIL\]|\[PURGATORY_EXPIRED\]|Invalid announcement' | head -5 +``` + +**How to fix:** + +```bash +# Update SERVICE_NAME to the ngit-grasp archive service and re-run +./run-migration-analysis.sh \ + --prod-relay wss://relay.ngit.dev \ + --archive-relay ws://localhost:7443 \ + --service ngit-grasp-relay-ngit-dev.service \ + --from-phase-4 # Skip phases 1-3, just re-run phase 4 +``` + +**Other possible causes:** + +1. **Structured logging not deployed:** If the ngit-grasp instance doesn't have the logging improvements deployed, no structured logs will exist. Check the ngit-grasp version. + +2. **No events in time window:** If there genuinely were no parse failures, purgatory expiry events, or invalid announcement rejections, the files will be empty. This is valid - it means everything parsed successfully. + +3. **Wrong time range:** The default is 30 days. If your archive has been running longer, you may need `--since` to extend the range. + +**Prevention:** The migration scripts now validate the service name and will error if you specify an ngit-relay service. + +**Note on "Invalid announcement" rejections:** These are announcements (kind 30617) that were rejected by the write policy due to format violations. The most common reason is "multiple clone tags found" - the NIP-34 spec requires a single clone tag with multiple values, not multiple clone tags. These rejections are logged as `Event rejected by write policy ... reason=Invalid announcement: ...`. + +### Event counts are multiples of 250 + +This suggests pagination may have failed. The scripts use `--paginate` by default, but if you see exactly 250, 500, 750 events, verify the relay is responding correctly. + +## Architecture + +### Analysis Phases + +The analysis is split into 5 modular phases: + +| Phase | Name | Time | Location | Description | +|-------|------|------|----------|-------------| +| 1 | Fetch Events | ~30s each | Local | Fetch events from both relays | +| 2 | Git Sync Check | ~20 min each | VPS | Compare state events to git data | +| 3 | Categorize & Compare | <1s | Local | Categorize and compare results | +| 4 | Extract Logs | <30s | VPS | Extract parse failures and purgatory expiry | +| 5 | Final Classification | <5s | Local | Combine all data into actionable results | + +### Phase Flow Diagram + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ PHASE 1: Fetch Events (~30s, local) │ +│ Fetches kind 30618 (state), 30617 (announcements), 5 (deletion) │ +│ Run twice: once for prod, once for archive │ +└─────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────┐ +│ PHASE 2: Git Sync Check (~20 mins, VPS required) │ +│ Compares state event refs to actual git data on disk │ +│ Categorizes into: complete, empty, partial, no-match │ +└─────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────┐ +│ PHASE 3: Categorize & Compare (fast, local) │ +│ Compares prod vs archive categories │ +│ Identifies gaps and sync issues │ +└─────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────┐ +│ PHASE 4: Log-Based Categories (VPS required) │ +│ Extracts structured logs from the archive service: │ +│ - [PARSE_FAIL] - Events that failed to parse │ +│ - [PURGATORY_EXPIRED] - Repos where git data never arrived │ +│ - "Invalid announcement" - Announcements rejected for format │ +│ violations (e.g., multiple clone tags) │ +│ Provides context for why repos failed to sync │ +└─────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────┐ +│ PHASE 5: Final Classification (fast, local) │ +│ Combines all data sources │ +│ Outputs: no-action, action-required, manual-investigation │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### Git Sync Categories + +Phase 2 categorizes repositories into 4 categories: + +| Category | Description | Meaning | +|----------|-------------|---------| +| 1 | Complete Match | All refs in state event match git data | +| 2 | Empty/Blank | No git data available | +| 3 | Partial Match | Some refs match, some don't | +| 4 | No Match | Git data exists but refs don't match | + +### Output Directory Structure + +``` +work/migration-analysis-YYYYMMDD-HHMM/ +├── prod/ +│ ├── raw/ +│ │ ├── state-events.json # Phase 1 +│ │ ├── announcements.json # Phase 1 +│ │ └── deletions.json # Phase 1 +│ ├── git-sync-status.tsv # Phase 2 +│ └── category*.txt # Phase 2/3 +├── archive/ +│ └── (same structure as prod) +├── comparison/ +│ ├── complete-in-both.txt # Phase 3 +│ ├── complete-prod-missing-archive.txt +│ ├── complete-prod-incomplete-archive.txt +│ ├── incomplete-in-both.txt +│ ├── in-archive-not-prod.txt +│ └── summary.txt +├── logs/ +│ ├── parse-failures.txt # Phase 4 +│ └── purgatory-expired.txt # Phase 4 +└── results/ + ├── no-action-required.txt # Phase 5 + ├── action-required.txt # Phase 5 + ├── manual-investigation.txt # Phase 5 + └── summary.txt # Phase 5 +``` + +## Why Migration May Require Attention + +Different GRASP implementations may handle edge cases differently. ngit-grasp has stricter validation and better observability, which can surface issues that were previously hidden: + +| Aspect | Typical Source Relay | ngit-grasp | +|--------|---------------------|------------| +| Git data validation | May accept partial data | Requires all git data to reproduce state | +| PR refs cleanup | May not clear `refs/nostr/` | Properly manages PR refs | +| Parse failures | May silently ignore | Logs structured `[PARSE_FAIL]` entries | +| Sync timeout | May have no timeout | Purgatory expires after configurable period | + +These differences explain why some repositories may need attention during migration - ngit-grasp's stricter validation catches issues that other implementations may have silently accepted. + +## Next Steps + +After running the analysis: + +1. **Review the summary** - Check `results/summary.txt` for the overview +2. **Address action items** - Work through `results/action-required.txt` +3. **Investigate edge cases** - Review `results/manual-investigation.txt` +4. **Re-run analysis** - After fixing issues, re-run to verify +5. **Plan cutover** - Schedule the switch when all issues are resolved + +### When to Re-run + +Re-run the analysis when: +- Archive sync has had time to complete +- You've fixed parse failures or re-announced events +- You want to verify fixes before cutover + +```bash +# Re-run with existing Phase 2 data (faster) +./run-migration-analysis.sh ... --skip-phase-2 --output work/migration-analysis-20260122-1430 +``` + +## Individual Scripts + +For advanced usage, you can run individual phase scripts: + +```bash +# Phase 1: Fetch events +./migration-scripts/01-fetch-events.sh wss://source-relay.example.com output/prod + +# Phase 2: Git sync check +./migration-scripts/10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod --categorize + +# Phase 3a: Categorize +./migration-scripts/20-categorize.sh output/prod/git-sync-status.tsv output/prod + +# Phase 3b: Compare relays +./migration-scripts/21-compare-relays.sh output/prod output/archive output/comparison + +# Phase 4a: Extract parse failures +./migration-scripts/30-extract-parse-failures.sh ngit-grasp.service output/logs + +# Phase 4b: Extract purgatory expiry +./migration-scripts/31-extract-purgatory-expiry.sh ngit-grasp.service output/logs + +# Phase 5: Final classification +./migration-scripts/40-classify-actions.sh work/migration-analysis-20260122-1430 +``` + +Each script has detailed help available with `--help` or by reading the script header. + +## relay.ngit.dev Migration Notes + +This section documents the specific configuration and lessons learned from migrating relay.ngit.dev from ngit-relay to ngit-grasp. Use this as a reference for similar deployments. + +### Deployment Configuration + +| Component | Value | +|-----------|-------| +| **Production relay** | `wss://relay.ngit.dev` | +| **Production service** | `ngit-relay.service` | +| **Production git path** | `/persistent/relay-ngit-dev-ngit-relay/data/repos` | +| **Archive relay** | `ws://localhost:7443` (localhost only) | +| **Archive service** | `ngit-grasp-relay-ngit-dev.service` | +| **Archive git path** | `/persistent/grasp/relay-ngit-dev/git` | + +### Key Differences from Defaults + +1. **Git paths are non-standard**: The production relay uses `/persistent/relay-ngit-dev-ngit-relay/data/repos` instead of `/var/lib/ngit-relay/git` + +2. **Archive is localhost-only**: The archive relay listens on `ws://localhost:7443`, not a public URL. All analysis must run on the VPS. + +3. **Service names include instance**: NixOS multi-instance deployment uses `ngit-grasp-relay-ngit-dev.service`, not `ngit-grasp.service` + +### Analysis Command + +```bash +# Run on VPS (archive is localhost-only) +./docs/how-to/migration-scripts/run-migration-analysis.sh \ + --prod-relay wss://relay.ngit.dev \ + --archive-relay ws://localhost:7443 \ + --prod-git /persistent/relay-ngit-dev-ngit-relay/data/repos \ + --archive-git /persistent/grasp/relay-ngit-dev/git \ + --service ngit-grasp-relay-ngit-dev.service +``` + +### Analysis Results (January 2026) + +| Category | Count | Notes | +|----------|-------|-------| +| Complete in both | ~400 | Ready for migration | +| Complete in prod, missing from archive | 315 | Need re-sync | +| Empty in both | 100 | Users never pushed git data | +| Manual investigation | 5 | Unusual states | +| Purgatory expired | 382 | Structured logging working | + +### Lessons Learned + +1. **Always verify paths first**: The default paths in examples didn't match the actual deployment. Use `systemctl cat ` to find real paths. + +2. **Check archive accessibility**: We initially tried to run analysis remotely, but the archive relay was localhost-only. Had to SSH to VPS. + +3. **Use archive service for Phase 4 (CRITICAL)**: Structured logging (`[PARSE_FAIL]`, `[PURGATORY_EXPIRED]`) is **ONLY** in the ngit-grasp archive service, NOT the ngit-relay production service. Running Phase 4 against `ngit-relay.service` produces zero results because ngit-relay doesn't emit structured logs. The scripts now validate this and error if you specify an ngit-relay service. + +4. **Install git on VPS**: Git wasn't installed on the minimal VPS. The scripts now check for this in prerequisites. + +5. **Permissions matter**: Some directories required `sudo` to access. Running as root or the service user resolved this. + +### Next Steps for relay.ngit.dev + +1. **Re-sync 315 repos**: Trigger archive to re-fetch from production +2. **Investigate 5 edge cases**: Manual review of unusual states +3. **Monitor purgatory**: 382 expired entries indicate sync issues to investigate +4. **Plan cutover**: Once re-sync complete, switch DNS/proxy to ngit-grasp + +## ngit-relay Troubleshooting + +This section covers common issues encountered when running ngit-relay in production, including git permission errors and repository corruption. These issues were discovered during the relay.ngit.dev migration and may affect other deployments. + +### Git Permission Denied Errors + +#### Symptoms + +When cloning repositories, you see: + +```bash +$ git clone https://relay.ngit.dev/npub.../repo.git +Cloning into 'repo'... +remote: warning: unable to access '/root/.config/git/attributes': Permission denied +``` + +Or in container logs: + +``` +warning: unable to access '/root/.config/git/attributes': Permission denied +``` + +#### Explanation + +This occurs when: +1. Git operations run as a non-root user (typically `nginx` user, UID 101) +2. Git tries to access `/root/.config/git/attributes` for global git configuration +3. The `/root` directory has permissions `0700` (drwx------), preventing non-root users from traversing into it +4. Even though the `attributes` file itself may be world-readable, the nginx user cannot reach it due to parent directory permissions + +**Root cause:** The container runs git commands via fcgiwrap as the nginx user, but `/root` is only accessible by root. + +#### Quick Fix (Temporary - Does Not Survive Container Restart) + +This fix resolves the issue immediately but will be lost when containers restart: + +```bash +# For each ngit-relay container, exec in and create the git config directory +sudo podman exec sh -c "mkdir -p /root/.config/git && touch /root/.config/git/attributes && chmod 644 /root/.config/git/attributes" + +# Example for specific containers: +sudo podman exec gitnostr-com-ngit-relay sh -c "mkdir -p /root/.config/git && touch /root/.config/git/attributes && chmod 644 /root/.config/git/attributes" + +sudo podman exec relay-ngit-dev-ngit-relay sh -c "mkdir -p /root/.config/git && touch /root/.config/git/attributes && chmod 644 /root/.config/git/attributes" +``` + +**Important:** This fix is temporary and will be lost when the container restarts. For a permanent solution, see the NixOS configuration below. + +#### Permanent Fix (NixOS Configuration) + +For NixOS deployments, add systemd services that automatically fix `/root` permissions after each container start: + +```nix +# In your ngit-relay service configuration (e.g., services/relay-ngit-dev-ngit-relay.nix) + +systemd.services.relay-ngit-dev-fix-root-perms = { + description = "Fix /root permissions in relay.ngit.dev container for git access"; + after = [ "podman-relay-ngit-dev-ngit-relay.service" ]; + requires = [ "podman-relay-ngit-dev-ngit-relay.service" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + ExecStart = "${pkgs.bash}/bin/bash -c 'sleep 5 && ${pkgs.podman}/bin/podman exec relay-ngit-dev-ngit-relay chmod 711 /root'"; + Restart = "on-failure"; + RestartSec = "10s"; + }; +}; +``` + +This changes `/root` permissions from `0700` to `0711`, allowing the nginx user to traverse through `/root` to reach `/root/.config/git/`. + +**Why 711?** +- `7` (owner/root): Full read/write/execute +- `1` (group): Execute only (traverse) +- `1` (other): Execute only (traverse) + +This allows non-root users to traverse through `/root` to access subdirectories, while still protecting `/root` contents from being listed or read. + +#### Verification + +After applying the fix: + +```bash +# Test that cloning works without permission warnings +git clone https://relay.ngit.dev/npub.../repo.git + +# Should clone successfully with no "Permission denied" warnings + +# Verify /root permissions inside container +sudo podman exec relay-ngit-dev-ngit-relay ls -ld /root +# Should show: drwx--x--x (711) + +# Verify nginx user can access git config +sudo podman exec relay-ngit-dev-ngit-relay su -s /bin/sh nginx -c "cat /root/.config/git/attributes" +# Should succeed without "Permission denied" +``` + +### Git Repository Corruption + +#### Symptoms + +When cloning repositories, you see: + +```bash +$ git clone https://relay.ngit.dev/npub.../repo.git +Cloning into 'repo'... +remote: fatal: bad tree object 8b765235809eb27159657eb4c97fb37d21c29bf0 +remote: aborting due to possible repository corruption on the remote side. +fatal: early EOF +fatal: fetch-pack: invalid index-pack output +``` + +Or when running `git fsck` on the server: + +``` +broken link from tree 7d60270e1904c30ae6cef7b465ef842a9f9f63c3 + to tree 8b765235809eb27159657eb4c97fb37d21c29bf0 +missing tree 8b765235809eb27159657eb4c97fb37d21c29bf0 +``` + +#### Explanation + +Repository corruption typically occurs due to: + +1. **Incomplete push operations**: A git push was interrupted mid-transfer, creating a commit that references objects that were never written to disk +2. **Permission issues during push**: The git-receive-pack process couldn't write objects due to permission problems (e.g., files owned by wrong user) +3. **Disk/filesystem issues**: Rare cases of disk errors or filesystem corruption + +**Common pattern:** A commit exists with references to tree objects, but those tree objects are missing from the repository. Sometimes individual blobs (files) exist as "dangling" objects but were never properly linked into the tree structure. + +**Warning signs:** +- HEAD file or objects owned by root when they should be owned by the service user (UID 101) +- Dangling blobs in `git fsck` output +- Recent permission denied errors in logs + +#### How to Fix + +**Step 1: Locate the corrupted repository** + +```bash +# SSH to the server +ssh dc@ngit.dev + +# Find the repository path +# For relay.ngit.dev: /persistent/relay-ngit-dev-ngit-relay/data/repos/npub.../repo.git +# For gitnostr.com: /persistent/gitnostr-com-ngit-relay/data/repos/npub.../repo.git + +cd /persistent/relay-ngit-dev-ngit-relay/data/repos/npub1c03rad0r6q833vh57kyd3ndu2jry30nkr0wepqfpsm05vq7he25slryrnw/axepool.git +``` + +**Step 2: Diagnose the corruption** + +```bash +# Run git fsck to identify missing/corrupted objects +git fsck --full + +# Example output: +# broken link from tree 7d60270e1904c30ae6cef7b465ef842a9f9f63c3 +# to tree 8b765235809eb27159657eb4c97fb37d21c29bf0 +# missing tree 8b765235809eb27159657eb4c97fb37d21c29bf0 +# dangling blob 94490b902c9bceb6f901cd0c7c25b685e3685d87 + +# Check which commit references the missing object +git log --all --oneline | head -10 + +# Inspect the broken commit +git cat-file -p +# This will show which tree is missing +``` + +**Step 3: Attempt automatic repair** + +Try these in order: + +```bash +# Option A: Repack and garbage collect +git gc --aggressive --prune=now + +# Then check if corruption is fixed +git fsck --full + +# Option B: If that doesn't work, try recovering from pack files +git unpack-objects < .git/objects/pack/*.pack +git fsck --full +``` + +**Step 4: Manual reconstruction (if automatic repair fails)** + +If the missing tree object can be reconstructed from dangling blobs: + +```bash +# 1. Identify what should be in the missing tree +# Look at the commit message and nearby commits to understand the structure + +# 2. Find dangling blobs that might belong to the tree +git fsck --full | grep "dangling blob" + +# 3. Examine each dangling blob to identify files +git cat-file -p 94490b902c9bceb6f901cd0c7c25b685e3685d87 + +# 4. Reconstruct the tree manually +# This requires creating a new tree object with the correct structure +# Example (advanced): +git mktree < filename1.rs +100644 blob filename2.rs +EOF +# This outputs a new tree hash + +# 5. Create a new commit with the fixed tree +git commit-tree -p -m "Reconstructed commit message" +# This outputs a new commit hash + +# 6. Update the branch reference +git update-ref refs/heads/ + +# 7. Clean up +git gc --prune=now +``` + +**Step 5: Verify the fix** + +```bash +# Run fsck again - should show no errors +git fsck --full + +# Test clone locally +git clone /path/to/repo.git /tmp/test-clone + +# Test clone via HTTP +git clone https://relay.ngit.dev/npub.../repo.git /tmp/test-clone-http +``` + +**Step 6: Fix ownership and permissions** + +Ensure all repository files are owned by the correct user: + +```bash +# For ngit-relay containers, files should be owned by UID 101 (nginx user) +sudo chown -R 101:101 /persistent/relay-ngit-dev-ngit-relay/data/repos/npub.../repo.git + +# Verify +ls -la /persistent/relay-ngit-dev-ngit-relay/data/repos/npub.../repo.git +``` + +**Step 7: Replicate fix to other instances (if applicable)** + +If you have multiple relay instances (e.g., gitnostr.com and relay.ngit.dev), replicate the fix: + +```bash +# Copy the repaired pack files +sudo cp /persistent/relay-ngit-dev-ngit-relay/data/repos/npub.../repo.git/objects/pack/* \ + /persistent/gitnostr-com-ngit-relay/data/repos/npub.../repo.git/objects/pack/ + +# Update the branch reference +cd /persistent/gitnostr-com-ngit-relay/data/repos/npub.../repo.git +git update-ref refs/heads/ + +# Fix ownership +sudo chown -R 101:101 /persistent/gitnostr-com-ngit-relay/data/repos/npub.../repo.git + +# Clean up +git gc --prune=now +``` + +#### Prevention + +To prevent future corruption: + +1. **Fix permission issues first**: Ensure the permission denied errors are resolved (see previous section) +2. **Monitor for root-owned files**: Files in git repositories should be owned by UID 101, not root +3. **Check disk health**: Run `df -h` and `smartctl` to ensure disk is healthy +4. **Enable git fsck in monitoring**: Periodically run `git fsck` on repositories to catch corruption early + +```bash +# Add to monitoring/cron (example) +find /persistent/*/data/repos -name "*.git" -type d | while read repo; do + echo "Checking $repo" + git -C "$repo" fsck --full 2>&1 | grep -v "^Checking\|^dangling" +done +``` + +#### Real-World Example: axepool.git Corruption + +During the relay.ngit.dev migration, the `axepool.git` repository was corrupted: + +**Problem:** +- Commit `e84518b` referenced tree `8b765235...` (the `src` directory) +- Tree `8b765235...` was missing from the repository +- Blob `94490b90...` (mint_client.rs) existed as a dangling object but wasn't linked + +**Root cause:** +- An incomplete push operation +- Permission issues (HEAD file was owned by root) +- The commit was created but the tree object was never written + +**Solution:** +1. Identified the missing tree should contain: `lib.rs`, `main.rs`, `mint_client.rs` +2. Found the dangling blob `94490b90...` was `mint_client.rs` +3. Reconstructed the `src` tree with all three files +4. Created new commit `e12bc3cf...` with the fixed tree +5. Updated `refs/heads/add-missing-hooks` to point to the new commit +6. Ran `git gc --prune=now` to clean up +7. Replicated fix to gitnostr.com instance + +**Result:** Both relays now clone successfully with all files intact. + +### Additional Resources + +- **ngit-relay repository**: https://github.com/danconwaydev/ngit-relay +- **Git internals documentation**: https://git-scm.com/book/en/v2/Git-Internals-Plumbing-and-Porcelain +- **Podman documentation**: https://docs.podman.io/ diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/01-fetch-events.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/01-fetch-events.sh new file mode 100755 index 0000000..e0d6f26 --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/01-fetch-events.sh @@ -0,0 +1,206 @@ +#!/usr/bin/env bash +# +# 01-fetch-events.sh - Fetch nostr events from a relay for migration analysis +# +# PHASE 1 of the GRASP relay to ngit-grasp migration analysis pipeline. +# Fetches kind 30618 (state), 30617 (announcement), and 5 (deletion) events. +# +# USAGE: +# ./01-fetch-events.sh +# +# EXAMPLES: +# # Fetch from production relay +# ./01-fetch-events.sh wss://relay.ngit.dev output/prod +# +# # Fetch from archive relay +# ./01-fetch-events.sh wss://archive.relay.ngit.dev output/archive +# +# # Full migration analysis setup +# mkdir -p work/migration-analysis-$(date +%Y%m%d-%H%M) +# ./01-fetch-events.sh wss://relay.ngit.dev work/migration-analysis-*/prod +# ./01-fetch-events.sh wss://archive.relay.ngit.dev work/migration-analysis-*/archive +# +# OUTPUT: +# /raw/state-events.json - kind 30618 events (one per line, JSONL) +# /raw/announcements.json - kind 30617 events (one per line, JSONL) +# /raw/deletions.json - kind 5 events (one per line, JSONL) +# +# OUTPUT FORMAT: +# Each file contains one JSON event per line (JSONL format). +# Events are the raw nostr event objects as returned by the relay. +# +# PREREQUISITES: +# - nak (Nostr Army Knife) - https://github.com/fiatjaf/nak +# - jq (for counting/validation) +# +# RUNTIME: ~30 seconds per relay (depends on network and event count) +# +# NOTES: +# - Uses --paginate to ensure all events are fetched (not just first page) +# - If event counts are exact multiples of 250, pagination may have failed +# - Run Phase 1 and Phase 2 back-to-back for accurate snapshot +# +# SEE ALSO: +# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide +# + +set -euo pipefail + +# Colors for output (disabled if not a terminal) +if [[ -t 1 ]]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[0;33m' + BLUE='\033[0;34m' + NC='\033[0m' # No Color +else + RED='' + GREEN='' + YELLOW='' + BLUE='' + NC='' +fi + +log_info() { + echo -e "${BLUE}[INFO]${NC} $*" >&2 +} + +log_success() { + echo -e "${GREEN}[OK]${NC} $*" >&2 +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $*" >&2 +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +usage() { + echo "Usage: $0 " + echo "" + echo "Arguments:" + echo " relay-url WebSocket URL of the relay (e.g., wss://relay.ngit.dev)" + echo " output-dir Directory to store fetched events (e.g., output/prod)" + echo "" + echo "Examples:" + echo " $0 wss://relay.ngit.dev output/prod" + echo " $0 wss://archive.relay.ngit.dev output/archive" + exit 1 +} + +# Check prerequisites +check_prerequisites() { + local missing=0 + + if ! command -v nak &> /dev/null; then + log_error "nak not found. Install from: https://github.com/fiatjaf/nak" + missing=1 + fi + + if ! command -v jq &> /dev/null; then + log_error "jq not found. Install with your package manager." + missing=1 + fi + + if [[ $missing -eq 1 ]]; then + exit 1 + fi +} + +# Fetch events of a specific kind +# Args: $1=relay, $2=kind, $3=output_file, $4=description +fetch_kind() { + local relay="$1" + local kind="$2" + local output_file="$3" + local description="$4" + + log_info "Fetching $description (kind $kind) from $relay..." + + local start_time + start_time=$(date +%s) + + # Use --paginate to ensure we get all events, not just first page + # nak outputs one event per line (JSONL format) + if ! nak req -k "$kind" --paginate "$relay" > "$output_file" 2>/dev/null; then + log_error "Failed to fetch $description from $relay" + return 1 + fi + + local end_time + end_time=$(date +%s) + local duration=$((end_time - start_time)) + + # Count events + local count + count=$(wc -l < "$output_file" | tr -d ' ') + + # Warn if count is suspicious (exact multiple of 250 suggests pagination issue) + if [[ $count -gt 0 ]] && [[ $((count % 250)) -eq 0 ]]; then + log_warn "$description count ($count) is exact multiple of 250 - pagination may have failed!" + fi + + log_success "Fetched $count $description in ${duration}s -> $output_file" + + echo "$count" +} + +# Main +main() { + if [[ $# -ne 2 ]]; then + usage + fi + + local relay="$1" + local output_dir="$2" + + # Validate relay URL + if [[ ! "$relay" =~ ^wss?:// ]]; then + log_error "Invalid relay URL: $relay (must start with ws:// or wss://)" + exit 1 + fi + + check_prerequisites + + log_info "Starting event fetch from $relay" + log_info "Output directory: $output_dir" + + # Create output directory structure + local raw_dir="$output_dir/raw" + mkdir -p "$raw_dir" + + local total_start + total_start=$(date +%s) + + # Fetch each event type + local state_count announcement_count deletion_count + + state_count=$(fetch_kind "$relay" 30618 "$raw_dir/state-events.json" "state events") + announcement_count=$(fetch_kind "$relay" 30617 "$raw_dir/announcements.json" "announcements") + deletion_count=$(fetch_kind "$relay" 5 "$raw_dir/deletions.json" "deletion requests") + + local total_end + total_end=$(date +%s) + local total_duration=$((total_end - total_start)) + + # Summary + echo "" + log_info "=== Fetch Summary ===" + log_info "Relay: $relay" + log_info "Output: $output_dir" + log_info "State events (30618): $state_count" + log_info "Announcements (30617): $announcement_count" + log_info "Deletions (5): $deletion_count" + log_info "Total time: ${total_duration}s" + echo "" + + # Output file listing for easy copy/paste + log_info "Output files:" + echo " $raw_dir/state-events.json" + echo " $raw_dir/announcements.json" + echo " $raw_dir/deletions.json" +} + +main "$@" diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh new file mode 100755 index 0000000..b4536cb --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh @@ -0,0 +1,564 @@ +#!/usr/bin/env bash +# +# 10-check-git-sync.sh - Compare state events to actual git data on disk +# +# PHASE 2 of the GRASP relay to ngit-grasp migration analysis pipeline. +# Compares kind 30618 state events against actual git refs on disk. +# +# USAGE: +# ./10-check-git-sync.sh [--categorize] +# +# EXAMPLES: +# # Check source relay against source git data +# ./10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod +# +# # Check target relay against target git data +# ./10-check-git-sync.sh output/archive/raw/state-events.json /var/lib/ngit-grasp/git output/archive +# +# # Check and categorize in one step (convenience mode) +# ./10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod --categorize +# +# INPUT: +# state-events.json - JSONL file from Phase 1 (01-fetch-events.sh) +# One kind 30618 event per line +# git-base-dir - Base directory containing git repos +# Structure: //.git/ +# +# OUTPUT: +# /git-sync-status.tsv - Tab-separated values: +# reponpubstate_refsgit_refsmatchesreason +# +# With --categorize flag, also outputs: +# /category1-complete-match.txt +# /category2-empty-blank.txt +# /category3-partial-match.txt +# /category4-no-match.txt +# +# CATEGORIES: +# 1. Complete Match - All refs in state event match git data perfectly +# 2. Empty/Blank - No git data available (directory missing or empty) +# 3. Partial Match - Some refs match, some don't +# 4. No Match - Git data exists but commit hashes don't match +# +# PREREQUISITES: +# - nak (for npub encoding) - https://github.com/fiatjaf/nak +# - jq (for JSON parsing) +# - Read access to git directories (may need sudo) +# +# RUNTIME: ~20 minutes on VPS (git operations are slow) +# +# NOTES: +# - Must run on VPS with access to git directories +# - Progress indicator updates every 10 events +# - Handles packed refs (git show-ref) and loose refs +# +# SEE ALSO: +# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide +# 01-fetch-events.sh - Phase 1 script that produces input for this script +# 20-categorize.sh - Phase 3a script that consumes output from this script +# + +set -euo pipefail + +# Colors for output (disabled if not a terminal) +if [[ -t 1 ]]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[0;33m' + BLUE='\033[0;34m' + NC='\033[0m' +else + RED='' + GREEN='' + YELLOW='' + BLUE='' + NC='' +fi + +log_info() { + echo -e "${BLUE}[INFO]${NC} $*" >&2 +} + +log_success() { + echo -e "${GREEN}[OK]${NC} $*" >&2 +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $*" >&2 +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +log_progress() { + # Overwrite current line for progress updates + echo -ne "\r${BLUE}[PROGRESS]${NC} $*" >&2 +} + +usage() { + echo "Usage: $0 [--categorize]" + echo "" + echo "Arguments:" + echo " state-events.json JSONL file from Phase 1 (kind 30618 events)" + echo " git-base-dir Base directory for git repos (e.g., /var/lib/grasp-relay/git)" + echo " output-dir Directory to store output files" + echo " --categorize Optional: also output category files (like Phase 3)" + echo "" + echo "Examples:" + echo " $0 output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod" + echo " $0 output/archive/raw/state-events.json /var/lib/ngit-grasp/git output/archive" + echo "" + echo "Output:" + echo " git-sync-status.tsv - TSV with: repo, npub, state_refs, git_refs, matches, reason" + exit 1 +} + +# Check prerequisites +check_prerequisites() { + local missing=0 + + if ! command -v git &> /dev/null; then + log_error "git not found. Install with your package manager." + missing=1 + fi + + if ! command -v nak &> /dev/null; then + log_error "nak not found. Install from: https://github.com/fiatjaf/nak" + log_error "Or run: nix-shell -p nak jq --run \"$0 $*\"" + missing=1 + fi + + if ! command -v jq &> /dev/null; then + log_error "jq not found. Install with your package manager." + missing=1 + fi + + if [[ $missing -eq 1 ]]; then + exit 1 + fi +} + +# Convert hex pubkey to npub +# Args: $1=hex_pubkey +# Returns: npub string or empty on error +hex_to_npub() { + local hex="$1" + nak encode npub "$hex" 2>/dev/null || echo "" +} + +# Count refs in state event (only refs/heads/) +# Args: $1=event_json +# Returns: count +count_state_refs() { + local event="$1" + echo "$event" | jq '[.tags[] | select(.[0] | startswith("refs/heads/"))] | length' 2>/dev/null || echo "0" +} + +# Get git refs from disk +# Args: $1=git_dir +# Returns: count of refs/heads/ refs +count_git_refs() { + local git_dir="$1" + + if [[ ! -d "$git_dir" ]]; then + echo "0" + return + fi + + # Try git show-ref first (handles packed refs correctly) + # Note: We capture output separately to avoid pipefail issues + local count + if count=$(git --git-dir="$git_dir" show-ref --heads 2>/dev/null | wc -l); then + echo "$count" | tr -d ' ' + return + fi + + # Fallback: count loose refs (when git is not available or fails) + if [[ -d "$git_dir/refs/heads" ]]; then + find "$git_dir/refs/heads" -type f 2>/dev/null | wc -l | tr -d ' ' + else + echo "0" + fi +} + +# Get ref hash from git directory +# Args: $1=git_dir, $2=ref_path (e.g., refs/heads/main) +# Returns: commit hash or empty +get_git_ref_hash() { + local git_dir="$1" + local ref_path="$2" + + # Try git show-ref first (handles packed refs) + local hash + hash=$(git --git-dir="$git_dir" show-ref --hash "$ref_path" 2>/dev/null | head -1 || echo "") + + if [[ -n "$hash" ]]; then + echo "$hash" + return + fi + + # Fallback: read loose ref file + local ref_file="$git_dir/$ref_path" + if [[ -f "$ref_file" ]]; then + cat "$ref_file" 2>/dev/null | tr -d '\n' || echo "" + else + echo "" + fi +} + +# Compare state event refs to git refs +# Args: $1=event_json, $2=git_dir +# Returns: count of matching refs +count_matching_refs() { + local event="$1" + local git_dir="$2" + local matching=0 + + # Extract refs/heads/ tags and compare + while IFS= read -r ref_tag; do + [[ -z "$ref_tag" ]] && continue + + local ref_path expected_hash + ref_path=$(echo "$ref_tag" | jq -r '.[0]' 2>/dev/null || echo "") + expected_hash=$(echo "$ref_tag" | jq -r '.[1]' 2>/dev/null || echo "") + + # Skip if not a heads ref or hash is missing + [[ ! "$ref_path" =~ ^refs/heads/ ]] && continue + [[ -z "$expected_hash" || "$expected_hash" == "null" ]] && continue + + # Get actual hash from git + local actual_hash + actual_hash=$(get_git_ref_hash "$git_dir" "$ref_path") + + if [[ "$expected_hash" == "$actual_hash" ]]; then + matching=$((matching + 1)) + fi + done < <(echo "$event" | jq -c '.tags[] | select(.[0] | startswith("refs/heads/"))' 2>/dev/null) + + echo "$matching" +} + +# Categorize a single entry +# Args: $1=state_refs, $2=git_refs, $3=matches, $4=reason +# Returns: category number (1-4) +categorize_entry() { + local state_refs="$1" + local git_refs="$2" + local matches="$3" + local reason="$4" + + # Category 2: Empty/Blank + if [[ -n "$reason" ]] || [[ "$git_refs" -eq 0 ]]; then + echo "2" + return + fi + + # Category 1: Complete Match + if [[ "$state_refs" -gt 0 ]] && [[ "$state_refs" -eq "$git_refs" ]] && [[ "$matches" -eq "$state_refs" ]]; then + echo "1" + return + fi + + # Category 4: No Match + if [[ "$git_refs" -gt 0 ]] && [[ "$matches" -eq 0 ]]; then + echo "4" + return + fi + + # Category 3: Partial Match (default for anything else with matches > 0) + if [[ "$matches" -gt 0 ]]; then + echo "3" + return + fi + + # Fallback to category 2 + echo "2" +} + +# Format entry for category file +# Args: $1=repo, $2=npub, $3=state_refs, $4=git_refs, $5=matches, $6=reason +format_category_line() { + local repo="$1" + local npub="$2" + local state_refs="$3" + local git_refs="$4" + local matches="$5" + local reason="$6" + + if [[ -n "$reason" ]]; then + echo "$repo | $npub | state_refs=$state_refs | git_refs=$git_refs | matches=$matches | reason=$reason" + else + echo "$repo | $npub | state_refs=$state_refs | git_refs=$git_refs | matches=$matches" + fi +} + +# Process a single state event +# Args: $1=event_json, $2=git_base +# Outputs: TSV line to stdout +process_event() { + local event="$1" + local git_base="$2" + + # Extract repository identifier (d tag) + local identifier + identifier=$(echo "$event" | jq -r '.tags[] | select(.[0] == "d") | .[1]' 2>/dev/null | head -1 || echo "") + + if [[ -z "$identifier" ]]; then + return 1 + fi + + # Extract maintainer pubkey (hex) + local hex_pubkey + hex_pubkey=$(echo "$event" | jq -r '.pubkey' 2>/dev/null || echo "") + + if [[ -z "$hex_pubkey" ]]; then + return 1 + fi + + # Convert to npub + local npub + npub=$(hex_to_npub "$hex_pubkey") + + if [[ -z "$npub" ]]; then + return 1 + fi + + # Count state refs + local state_refs + state_refs=$(count_state_refs "$event") + + # Find git directory + local git_dir="$git_base/${npub}/${identifier}.git" + + # Check git directory status + local git_refs=0 + local matches=0 + local reason="" + + if [[ ! -d "$git_dir" ]]; then + reason="no_git_dir" + elif [[ ! -d "$git_dir/refs/heads" ]] && [[ ! -f "$git_dir/packed-refs" ]]; then + reason="empty_refs" + else + git_refs=$(count_git_refs "$git_dir") + + if [[ "$git_refs" -eq 0 ]]; then + reason="empty_refs" + elif [[ "$state_refs" -eq 0 ]]; then + reason="no_state_refs" + else + matches=$(count_matching_refs "$event" "$git_dir") + fi + fi + + # Output TSV line: repo, npub, state_refs, git_refs, matches, reason + printf '%s\t%s\t%s\t%s\t%s\t%s\n' "$identifier" "$npub" "$state_refs" "$git_refs" "$matches" "$reason" +} + +# Main +main() { + local do_categorize=0 + local args=() + + # Parse arguments + for arg in "$@"; do + if [[ "$arg" == "--categorize" ]]; then + do_categorize=1 + else + args+=("$arg") + fi + done + + if [[ ${#args[@]} -ne 3 ]]; then + usage + fi + + local state_events_file="${args[0]}" + local git_base="${args[1]}" + local output_dir="${args[2]}" + + # Validate inputs + if [[ ! -f "$state_events_file" ]]; then + log_error "State events file not found: $state_events_file" + exit 1 + fi + + if [[ ! -d "$git_base" ]]; then + log_error "Git base directory not found: $git_base" + log_error "This script must run on the VPS with access to git directories." + exit 1 + fi + + # Check read permissions + if ! ls "$git_base" >/dev/null 2>&1; then + log_error "Cannot read git base directory (permission denied): $git_base" + log_error "Try running with sudo or grant read permissions." + exit 1 + fi + + check_prerequisites + + log_info "=== Git State Synchronization Check ===" + log_info "State events: $state_events_file" + log_info "Git base: $git_base" + log_info "Output: $output_dir" + if [[ $do_categorize -eq 1 ]]; then + log_info "Mode: TSV + categorization" + else + log_info "Mode: TSV only (use 20-categorize.sh for categories)" + fi + log_info "Started: $(date)" + echo "" + + # Create output directory + mkdir -p "$output_dir" + + # Output files + local tsv_file="$output_dir/git-sync-status.tsv" + + # Initialize TSV with header + echo -e "repo\tnpub\tstate_refs\tgit_refs\tmatches\treason" > "$tsv_file" + + # Initialize category files if categorizing + local cat1="" cat2="" cat3="" cat4="" + if [[ $do_categorize -eq 1 ]]; then + cat1="$output_dir/category1-complete-match.txt" + cat2="$output_dir/category2-empty-blank.txt" + cat3="$output_dir/category3-partial-match.txt" + cat4="$output_dir/category4-no-match.txt" + > "$cat1" + > "$cat2" + > "$cat3" + > "$cat4" + fi + + # Count total events + local total_events + total_events=$(wc -l < "$state_events_file" | tr -d ' ') + log_info "Processing $total_events state events..." + echo "" + + # Process each event + local count=0 + local processed=0 + local skipped=0 + local count_cat1=0 count_cat2=0 count_cat3=0 count_cat4=0 + local start_time + start_time=$(date +%s) + + while IFS= read -r event; do + count=$((count + 1)) + + # Skip empty lines + [[ -z "$event" ]] && continue + + # Process event + local result + if result=$(process_event "$event" "$git_base"); then + processed=$((processed + 1)) + + # Write to TSV (skip header line) + echo "$result" >> "$tsv_file" + + # Categorize if requested + if [[ $do_categorize -eq 1 ]]; then + # Parse result + IFS=$'\t' read -r repo npub state_refs git_refs matches reason <<< "$result" + + local category + category=$(categorize_entry "$state_refs" "$git_refs" "$matches" "$reason") + + local cat_line + cat_line=$(format_category_line "$repo" "$npub" "$state_refs" "$git_refs" "$matches" "$reason") + + case "$category" in + 1) echo "$cat_line" >> "$cat1"; count_cat1=$((count_cat1 + 1)) ;; + 2) echo "$cat_line" >> "$cat2"; count_cat2=$((count_cat2 + 1)) ;; + 3) echo "$cat_line" >> "$cat3"; count_cat3=$((count_cat3 + 1)) ;; + 4) echo "$cat_line" >> "$cat4"; count_cat4=$((count_cat4 + 1)) ;; + esac + fi + else + skipped=$((skipped + 1)) + fi + + # Progress indicator every 10 events + if [[ $((count % 10)) -eq 0 ]]; then + local elapsed=$(($(date +%s) - start_time)) + local rate=0 + if [[ $elapsed -gt 0 ]]; then + rate=$((count / elapsed)) + fi + local eta="?" + if [[ $rate -gt 0 ]]; then + eta=$(( (total_events - count) / rate )) + fi + log_progress "Processed $count/$total_events events (~${rate}/s, ETA: ${eta}s)..." + fi + done < "$state_events_file" + + # Clear progress line + echo "" >&2 + + local end_time + end_time=$(date +%s) + local duration=$((end_time - start_time)) + + # Summary + echo "" + log_info "=== Analysis Complete ===" + log_info "Finished: $(date)" + log_info "Duration: ${duration}s" + log_info "Processed: $processed events" + if [[ $skipped -gt 0 ]]; then + log_warn "Skipped: $skipped events (missing identifier or pubkey)" + fi + echo "" + + if [[ $do_categorize -eq 1 ]]; then + # Calculate percentages + local total=$((count_cat1 + count_cat2 + count_cat3 + count_cat4)) + local pct1=0 pct2=0 pct3=0 pct4=0 + if [[ $total -gt 0 ]]; then + pct1=$(awk "BEGIN {printf \"%.1f\", ($count_cat1/$total)*100}") + pct2=$(awk "BEGIN {printf \"%.1f\", ($count_cat2/$total)*100}") + pct3=$(awk "BEGIN {printf \"%.1f\", ($count_cat3/$total)*100}") + pct4=$(awk "BEGIN {printf \"%.1f\", ($count_cat4/$total)*100}") + fi + + log_info "=== Category Summary ===" + log_success "Category 1 (Complete Match): $count_cat1 ($pct1%)" + log_warn "Category 2 (Empty/Blank): $count_cat2 ($pct2%)" + log_warn "Category 3 (Partial Match): $count_cat3 ($pct3%)" + log_error "Category 4 (No Match): $count_cat4 ($pct4%)" + echo "" + + # Validation warning + if [[ $count_cat2 -eq $total ]] && [[ $total -gt 0 ]]; then + log_error "WARNING: 100% of repos categorized as Empty/Blank" + log_error "This usually indicates a permission or path issue." + echo "" + log_info "Troubleshooting:" + echo " 1. Verify git data exists: sudo ls -la $git_base | head -10" + echo " 2. Check sample repo: sudo find $git_base -name '*.git' -type d | head -1" + echo " 3. Re-run with sudo if not already using it" + echo "" + fi + fi + + log_info "Output files:" + echo " $tsv_file" + if [[ $do_categorize -eq 1 ]]; then + echo " $cat1" + echo " $cat2" + echo " $cat3" + echo " $cat4" + else + echo "" + log_info "Next step: Run 20-categorize.sh to categorize results" + echo " ./20-categorize.sh $tsv_file $output_dir" + fi +} + +main "$@" diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/20-categorize.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/20-categorize.sh new file mode 100755 index 0000000..b38dc00 --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/20-categorize.sh @@ -0,0 +1,212 @@ +#!/usr/bin/env bash +# +# 20-categorize.sh - Categorize git sync status into 4 categories +# +# PHASE 3a of the GRASP relay to ngit-grasp migration analysis pipeline. +# Takes git-sync-status.tsv from Phase 2 and categorizes into 4 files. +# +# USAGE: +# ./20-categorize.sh +# +# EXAMPLES: +# ./20-categorize.sh output/prod/git-sync-status.tsv output/prod +# ./20-categorize.sh output/archive/git-sync-status.tsv output/archive +# +# INPUT FORMAT (git-sync-status.tsv): +# Tab-separated values with columns: +# reponpubstate_refsgit_refsmatchesreason +# +# Where reason is optional and can be: no_git_dir, empty_refs, no_state_refs +# +# OUTPUT: +# /category1-complete-match.txt - All refs match perfectly +# /category2-empty-blank.txt - No git data available +# /category3-partial-match.txt - Some refs match +# /category4-no-match.txt - Git exists but refs don't match +# +# OUTPUT FORMAT: +# repo | npub | state_refs=N | git_refs=N | matches=N [| reason=X] +# +# CATEGORIES: +# 1. Complete Match: state_refs == git_refs == matches (all > 0) +# 2. Empty/Blank: git_refs == 0 OR reason in (no_git_dir, empty_refs, no_state_refs) +# 3. Partial Match: matches > 0 AND matches < state_refs +# 4. No Match: git_refs > 0 AND matches == 0 +# +# PREREQUISITES: +# - awk (standard Unix tool) +# +# RUNTIME: < 1 second (local processing only) +# +# SEE ALSO: +# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide +# 10-check-git-sync.sh - Phase 2 script that produces input for this script +# + +set -euo pipefail + +# Colors for output (disabled if not a terminal) +if [[ -t 1 ]]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[0;33m' + BLUE='\033[0;34m' + NC='\033[0m' +else + RED='' + GREEN='' + YELLOW='' + BLUE='' + NC='' +fi + +log_info() { + echo -e "${BLUE}[INFO]${NC} $*" >&2 +} + +log_success() { + echo -e "${GREEN}[OK]${NC} $*" >&2 +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $*" >&2 +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +usage() { + echo "Usage: $0 " + echo "" + echo "Arguments:" + echo " git-sync-status.tsv TSV file from Phase 2 (10-check-git-sync.sh)" + echo " output-dir Directory to store categorized output" + echo "" + echo "Examples:" + echo " $0 output/prod/git-sync-status.tsv output/prod" + echo " $0 output/archive/git-sync-status.tsv output/archive" + echo "" + echo "Input format (TSV):" + echo " reponpubstate_refsgit_refsmatchesreason" + echo "" + echo "Output files:" + echo " category1-complete-match.txt - All refs match" + echo " category2-empty-blank.txt - No git data" + echo " category3-partial-match.txt - Some refs match" + echo " category4-no-match.txt - Git exists, refs don't match" + exit 1 +} + +# Main +main() { + if [[ $# -ne 2 ]]; then + usage + fi + + local input_file="$1" + local output_dir="$2" + + # Validate input file + if [[ ! -f "$input_file" ]]; then + log_error "Input file not found: $input_file" + exit 1 + fi + + log_info "Categorizing git sync status" + log_info "Input: $input_file" + log_info "Output: $output_dir" + + # Create output directory + mkdir -p "$output_dir" + + # Output files + local cat1="$output_dir/category1-complete-match.txt" + local cat2="$output_dir/category2-empty-blank.txt" + local cat3="$output_dir/category3-partial-match.txt" + local cat4="$output_dir/category4-no-match.txt" + + # Clear previous results + > "$cat1" + > "$cat2" + > "$cat3" + > "$cat4" + + # Process input file with awk + # Input: reponpubstate_refsgit_refsmatchesreason + awk -F'\t' -v cat1="$cat1" -v cat2="$cat2" -v cat3="$cat3" -v cat4="$cat4" ' + BEGIN { + count1 = 0; count2 = 0; count3 = 0; count4 = 0 + } + NR == 1 && /^repo/ { next } # Skip header if present + NF >= 5 { + repo = $1 + npub = $2 + state_refs = int($3) + git_refs = int($4) + matches = int($5) + reason = (NF >= 6) ? $6 : "" + + # Format output line + if (reason != "") { + line = repo " | " npub " | state_refs=" state_refs " | git_refs=" git_refs " | matches=" matches " | reason=" reason + } else { + line = repo " | " npub " | state_refs=" state_refs " | git_refs=" git_refs " | matches=" matches + } + + # Categorize + if (reason == "no_git_dir" || reason == "empty_refs" || reason == "no_state_refs" || git_refs == 0) { + # Category 2: Empty/Blank + print line >> cat2 + count2++ + } else if (state_refs > 0 && state_refs == git_refs && matches == state_refs) { + # Category 1: Complete Match + print line >> cat1 + count1++ + } else if (matches > 0 && matches < state_refs) { + # Category 3: Partial Match + print line >> cat3 + count3++ + } else if (git_refs > 0 && matches == 0) { + # Category 4: No Match + print line >> cat4 + count4++ + } else if (matches > 0) { + # Edge case: matches > 0 but does not fit other categories + # This can happen when git_refs > state_refs but all state refs match + # Treat as partial match + print line >> cat3 + count3++ + } else { + # Fallback: treat as category 2 (empty/blank) + print line >> cat2 + count2++ + } + } + END { + total = count1 + count2 + count3 + count4 + print "COUNTS:" count1 ":" count2 ":" count3 ":" count4 ":" total + } + ' "$input_file" 2>&1 | while IFS= read -r line; do + if [[ "$line" =~ ^COUNTS: ]]; then + # Parse counts from awk output + IFS=':' read -r _ c1 c2 c3 c4 total <<< "$line" + + echo "" + log_info "=== Categorization Summary ===" + log_info "Total entries: $total" + log_success "Category 1 (Complete Match): $c1" + log_warn "Category 2 (Empty/Blank): $c2" + log_warn "Category 3 (Partial Match): $c3" + log_error "Category 4 (No Match): $c4" + echo "" + log_info "Output files:" + echo " $cat1" + echo " $cat2" + echo " $cat3" + echo " $cat4" + fi + done +} + +main "$@" diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh new file mode 100755 index 0000000..b9c0d30 --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh @@ -0,0 +1,294 @@ +#!/usr/bin/env bash +# +# 21-compare-relays.sh - Compare prod vs archive category files to find gaps +# +# PHASE 3b of the GRASP relay to ngit-grasp migration analysis pipeline. +# Compares categorized output from prod and archive to identify: +# - Repos complete in prod but missing/incomplete in archive +# - Repos in archive but not in prod +# - Status differences between relays +# +# USAGE: +# ./21-compare-relays.sh +# +# EXAMPLES: +# ./21-compare-relays.sh output/prod output/archive output/comparison +# +# INPUT: +# Both prod-dir and archive-dir must contain: +# - category1-complete-match.txt +# - category2-empty-blank.txt +# - category3-partial-match.txt +# - category4-no-match.txt +# +# OUTPUT: +# /complete-in-both.txt - Repos complete in both relays (no action) +# /complete-prod-missing-archive.txt - Complete in prod, not in archive cat1 +# /complete-prod-incomplete-archive.txt - Complete in prod, incomplete in archive +# /incomplete-in-both.txt - Incomplete in both relays +# /in-archive-not-prod.txt - In archive but not in prod +# /summary.txt - Human-readable summary +# +# OUTPUT FORMAT: +# Each file contains lines in the format: +# repo | npub | prod_status | archive_status +# +# PREREQUISITES: +# - awk, sort, comm (standard Unix tools) +# +# RUNTIME: < 1 second (local processing only) +# +# SEE ALSO: +# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide +# 20-categorize.sh - Phase 3a script that produces input for this script +# + +set -euo pipefail + +# Colors for output (disabled if not a terminal) +if [[ -t 1 ]]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[0;33m' + BLUE='\033[0;34m' + NC='\033[0m' +else + RED='' + GREEN='' + YELLOW='' + BLUE='' + NC='' +fi + +log_info() { + echo -e "${BLUE}[INFO]${NC} $*" >&2 +} + +log_success() { + echo -e "${GREEN}[OK]${NC} $*" >&2 +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $*" >&2 +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +usage() { + echo "Usage: $0 " + echo "" + echo "Arguments:" + echo " prod-dir Directory containing prod category files" + echo " archive-dir Directory containing archive category files" + echo " output-dir Directory to store comparison results" + echo "" + echo "Examples:" + echo " $0 output/prod output/archive output/comparison" + echo "" + echo "Required input files in each directory:" + echo " category1-complete-match.txt" + echo " category2-empty-blank.txt" + echo " category3-partial-match.txt" + echo " category4-no-match.txt" + exit 1 +} + +# Extract repo|npub key from category line +# Input: "repo | npub | state_refs=N | ..." +# Output: "repo|npub" +extract_key() { + awk -F' \\| ' '{print $1 "|" $2}' +} + +# Build lookup table from category files +# Args: $1=directory, $2=output_file +build_lookup() { + local dir="$1" + local output="$2" + + # Process all 4 category files + for cat in 1 2 3 4; do + local file="$dir/category${cat}-*.txt" + # shellcheck disable=SC2086 + if ls $file 1>/dev/null 2>&1; then + # shellcheck disable=SC2086 + cat $file | while IFS= read -r line; do + key=$(echo "$line" | extract_key) + echo "${key}|cat${cat}|${line}" + done + fi + done | sort -t'|' -k1,2 > "$output" +} + +# Main +main() { + if [[ $# -ne 3 ]]; then + usage + fi + + local prod_dir="$1" + local archive_dir="$2" + local output_dir="$3" + + # Validate input directories + for dir in "$prod_dir" "$archive_dir"; do + if [[ ! -d "$dir" ]]; then + log_error "Directory not found: $dir" + exit 1 + fi + if [[ ! -f "$dir/category1-complete-match.txt" ]]; then + log_error "Missing category1-complete-match.txt in $dir" + exit 1 + fi + done + + log_info "Comparing relay categories" + log_info "Prod: $prod_dir" + log_info "Archive: $archive_dir" + log_info "Output: $output_dir" + + # Create output directory + mkdir -p "$output_dir" + + # Create temp files for processing + local tmp_dir + tmp_dir=$(mktemp -d) + # shellcheck disable=SC2064 + trap "rm -rf '$tmp_dir'" EXIT + + log_info "Building lookup tables..." + + # Build lookup tables: key|category|full_line + build_lookup "$prod_dir" "$tmp_dir/prod_lookup.txt" + build_lookup "$archive_dir" "$tmp_dir/archive_lookup.txt" + + # Extract just keys for comparison + cut -d'|' -f1,2 "$tmp_dir/prod_lookup.txt" | sort -u > "$tmp_dir/prod_keys.txt" + cut -d'|' -f1,2 "$tmp_dir/archive_lookup.txt" | sort -u > "$tmp_dir/archive_keys.txt" + + log_info "Comparing categories..." + + # Initialize output files + > "$output_dir/complete-in-both.txt" + > "$output_dir/complete-prod-missing-archive.txt" + > "$output_dir/complete-prod-incomplete-archive.txt" + > "$output_dir/incomplete-in-both.txt" + > "$output_dir/in-archive-not-prod.txt" + + # Process prod category 1 (complete) entries + while IFS='|' read -r repo npub cat full_line; do + key="${repo}|${npub}" + + # Look up in archive + archive_entry=$(grep "^${key}|" "$tmp_dir/archive_lookup.txt" 2>/dev/null | head -1 || echo "") + + if [[ -z "$archive_entry" ]]; then + # Not in archive at all + echo "$repo | $npub | prod=complete | archive=missing" >> "$output_dir/complete-prod-missing-archive.txt" + else + archive_cat=$(echo "$archive_entry" | cut -d'|' -f3) + if [[ "$archive_cat" == "cat1" ]]; then + # Complete in both + echo "$repo | $npub | prod=complete | archive=complete" >> "$output_dir/complete-in-both.txt" + else + # Complete in prod, incomplete in archive + echo "$repo | $npub | prod=complete | archive=$archive_cat" >> "$output_dir/complete-prod-incomplete-archive.txt" + fi + fi + done < <(grep '|cat1|' "$tmp_dir/prod_lookup.txt" | sed 's/|cat1|/|cat1|/') + + # Process prod categories 2-4 (incomplete) entries + for cat in cat2 cat3 cat4; do + while IFS='|' read -r repo npub _ full_line; do + key="${repo}|${npub}" + + # Look up in archive + archive_entry=$(grep "^${key}|" "$tmp_dir/archive_lookup.txt" 2>/dev/null | head -1 || echo "") + + if [[ -z "$archive_entry" ]]; then + # Incomplete in prod, missing in archive + echo "$repo | $npub | prod=$cat | archive=missing" >> "$output_dir/incomplete-in-both.txt" + else + archive_cat=$(echo "$archive_entry" | cut -d'|' -f3) + if [[ "$archive_cat" != "cat1" ]]; then + # Incomplete in both + echo "$repo | $npub | prod=$cat | archive=$archive_cat" >> "$output_dir/incomplete-in-both.txt" + fi + # If archive is complete but prod is not, that's unusual but not an error + fi + done < <(grep "|${cat}|" "$tmp_dir/prod_lookup.txt") + done + + # Find entries in archive but not in prod + comm -23 "$tmp_dir/archive_keys.txt" "$tmp_dir/prod_keys.txt" | while IFS='|' read -r repo npub; do + key="${repo}|${npub}" + archive_entry=$(grep "^${key}|" "$tmp_dir/archive_lookup.txt" 2>/dev/null | head -1 || echo "") + archive_cat=$(echo "$archive_entry" | cut -d'|' -f3) + echo "$repo | $npub | prod=missing | archive=$archive_cat" >> "$output_dir/in-archive-not-prod.txt" + done + + # Count results + local count_both count_missing count_incomplete count_both_incomplete count_archive_only + count_both=$(wc -l < "$output_dir/complete-in-both.txt" | tr -d ' ') + count_missing=$(wc -l < "$output_dir/complete-prod-missing-archive.txt" | tr -d ' ') + count_incomplete=$(wc -l < "$output_dir/complete-prod-incomplete-archive.txt" | tr -d ' ') + count_both_incomplete=$(wc -l < "$output_dir/incomplete-in-both.txt" | tr -d ' ') + count_archive_only=$(wc -l < "$output_dir/in-archive-not-prod.txt" | tr -d ' ') + + # Generate summary + cat > "$output_dir/summary.txt" << EOF +# Relay Comparison Summary +Generated: $(date -Iseconds) + +## Input +- Prod: $prod_dir +- Archive: $archive_dir + +## Results + +### No Action Required +- Complete in both relays: $count_both + +### Action/Decision Required +- Complete in prod, MISSING from archive: $count_missing +- Complete in prod, INCOMPLETE in archive: $count_incomplete +- Incomplete in BOTH relays: $count_both_incomplete + +### For Reference +- In archive but not in prod: $count_archive_only + +## Files +- complete-in-both.txt: Repos successfully migrated (no action) +- complete-prod-missing-archive.txt: Need investigation - why not in archive? +- complete-prod-incomplete-archive.txt: Archive sync may still be in progress +- incomplete-in-both.txt: Git data incomplete on both relays +- in-archive-not-prod.txt: May be deleted from prod or new to archive + +## Next Steps +1. Review complete-prod-missing-archive.txt - these repos need attention +2. Check if archive sync is still running for incomplete entries +3. Cross-reference with deletion events (kind 5) from Phase 1 +4. Use Phase 4 logs to understand parse failures and purgatory expiry +EOF + + # Display summary + echo "" + log_info "=== Comparison Summary ===" + log_success "Complete in both: $count_both (no action needed)" + log_error "Complete in prod, MISSING from archive: $count_missing" + log_warn "Complete in prod, incomplete in archive: $count_incomplete" + log_warn "Incomplete in both: $count_both_incomplete" + log_info "In archive only: $count_archive_only" + echo "" + log_info "Output files:" + echo " $output_dir/complete-in-both.txt" + echo " $output_dir/complete-prod-missing-archive.txt" + echo " $output_dir/complete-prod-incomplete-archive.txt" + echo " $output_dir/incomplete-in-both.txt" + echo " $output_dir/in-archive-not-prod.txt" + echo " $output_dir/summary.txt" +} + +main "$@" diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/22-compare-git-data.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/22-compare-git-data.sh new file mode 100755 index 0000000..76521d4 --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/22-compare-git-data.sh @@ -0,0 +1,390 @@ +#!/usr/bin/env bash +# +# 22-compare-git-data.sh - Compare actual git data between prod and archive relays +# +# PHASE 3c of the GRASP relay to ngit-grasp migration analysis pipeline. +# Compares actual git commits between prod and archive to determine which is ahead. +# +# KEY INSIGHT: +# Archive (ngit-grasp) enforces GRASP - git data ALWAYS matches a state event. +# If archive has different/newer data than prod, it means: +# - A state event authorized those commits at some point +# - Archive is actually MORE up-to-date than prod +# - Migration should use archive data (it's already correct) +# +# USAGE: +# ./22-compare-git-data.sh +# +# EXAMPLES: +# ./22-compare-git-data.sh /var/lib/grasp-relay/git /var/lib/ngit-grasp/git \ +# output/comparison/complete-prod-incomplete-archive.txt output/comparison +# +# INPUT: +# prod-git-base Base directory for prod git repos (e.g., /var/lib/grasp-relay/git) +# archive-git-base Base directory for archive git repos (e.g., /var/lib/ngit-grasp/git) +# repo-list File with repos to compare (format: "repo | npub | ...") +# +# OUTPUT: +# /git-ancestry.tsv - Tab-separated values: +# reponpubrelationshipdetails +# +# Relationship values: +# archive-ahead - Archive has all prod commits plus more (GOOD - use archive) +# in-sync - Both have identical commits +# prod-ahead - Prod has commits archive is missing (needs re-sync) +# diverged - Both have unique commits (manual review) +# archive-only - Only archive has git data +# prod-only - Only prod has git data +# both-empty - Neither has git data +# +# PREREQUISITES: +# - git (for ref comparison) +# - Read access to both git directories (may need sudo) +# +# RUNTIME: Depends on number of repos to compare +# +# SEE ALSO: +# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide +# 21-compare-relays.sh - Phase 3b script that identifies repos to compare +# + +set -euo pipefail + +# Colors for output (disabled if not a terminal) +if [[ -t 1 ]]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[0;33m' + BLUE='\033[0;34m' + NC='\033[0m' +else + RED='' + GREEN='' + YELLOW='' + BLUE='' + NC='' +fi + +log_info() { + echo -e "${BLUE}[INFO]${NC} $*" >&2 +} + +log_success() { + echo -e "${GREEN}[OK]${NC} $*" >&2 +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $*" >&2 +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +log_progress() { + echo -ne "\r${BLUE}[PROGRESS]${NC} $*" >&2 +} + +usage() { + echo "Usage: $0 " + echo "" + echo "Arguments:" + echo " prod-git-base Base directory for prod git repos" + echo " archive-git-base Base directory for archive git repos" + echo " repo-list File with repos to compare (format: 'repo | npub | ...')" + echo " output-dir Directory to store output files" + echo "" + echo "Examples:" + echo " $0 /var/lib/grasp-relay/git /var/lib/ngit-grasp/git \\" + echo " output/comparison/complete-prod-incomplete-archive.txt output/comparison" + echo "" + echo "Output:" + echo " git-ancestry.tsv - TSV with: repo, npub, relationship, details" + exit 1 +} + +# Get all branch refs from a git directory +# Args: $1=git_dir +# Returns: sorted list of "ref_name commit_hash" lines +get_git_refs() { + local git_dir="$1" + + if [[ ! -d "$git_dir" ]]; then + return + fi + + git --git-dir="$git_dir" show-ref --heads 2>/dev/null | sort || true +} + +# Check if commit A is ancestor of commit B +# Args: $1=git_dir, $2=commit_a, $3=commit_b +# Returns: 0 if A is ancestor of B, 1 otherwise +is_ancestor() { + local git_dir="$1" + local commit_a="$2" + local commit_b="$3" + + git --git-dir="$git_dir" merge-base --is-ancestor "$commit_a" "$commit_b" 2>/dev/null +} + +# Compare git data between prod and archive for a single repo +# Args: $1=prod_git_dir, $2=archive_git_dir +# Returns: relationship string +compare_repo_git() { + local prod_git="$1" + local archive_git="$2" + + local prod_exists=false + local archive_exists=false + + [[ -d "$prod_git" ]] && prod_exists=true + [[ -d "$archive_git" ]] && archive_exists=true + + # Handle cases where one or both don't exist + if [[ "$prod_exists" == "false" && "$archive_exists" == "false" ]]; then + echo "both-empty" + return + fi + + if [[ "$prod_exists" == "false" ]]; then + echo "archive-only" + return + fi + + if [[ "$archive_exists" == "false" ]]; then + echo "prod-only" + return + fi + + # Both exist - get refs + local prod_refs archive_refs + prod_refs=$(get_git_refs "$prod_git") + archive_refs=$(get_git_refs "$archive_git") + + # Handle empty refs + if [[ -z "$prod_refs" && -z "$archive_refs" ]]; then + echo "both-empty" + return + fi + + if [[ -z "$prod_refs" ]]; then + echo "archive-only" + return + fi + + if [[ -z "$archive_refs" ]]; then + echo "prod-only" + return + fi + + # Compare refs - check if they're identical + if [[ "$prod_refs" == "$archive_refs" ]]; then + echo "in-sync" + return + fi + + # Refs differ - need to check ancestry + # Strategy: For each branch, check if one is ancestor of the other + # If all archive branches are ahead of or equal to prod branches, archive is ahead + # If all prod branches are ahead of or equal to archive branches, prod is ahead + # Otherwise, they've diverged + + local archive_ahead=true + local prod_ahead=true + local has_common_branch=false + + # Create temporary file to use archive as reference repo for ancestry checks + # We need a repo that has both sets of commits to check ancestry + # Use archive since it's the target and should have the superset + + # Check each prod branch against archive + while read -r prod_hash prod_ref; do + [[ -z "$prod_hash" ]] && continue + + # Get the same branch from archive + local archive_hash + archive_hash=$(echo "$archive_refs" | grep " $prod_ref$" | awk '{print $1}' || echo "") + + if [[ -z "$archive_hash" ]]; then + # Branch exists in prod but not archive - prod has something archive doesn't + # But this could be a deleted branch, so don't immediately say prod is ahead + continue + fi + + has_common_branch=true + + if [[ "$prod_hash" == "$archive_hash" ]]; then + # Same commit - neither ahead for this branch + continue + fi + + # Different commits - check ancestry + # First, try to check if prod is ancestor of archive (archive ahead) + if is_ancestor "$archive_git" "$prod_hash" "$archive_hash" 2>/dev/null; then + # Prod commit is ancestor of archive commit - archive is ahead for this branch + prod_ahead=false + elif is_ancestor "$archive_git" "$archive_hash" "$prod_hash" 2>/dev/null; then + # Archive commit is ancestor of prod commit - prod is ahead for this branch + archive_ahead=false + else + # Neither is ancestor - diverged + archive_ahead=false + prod_ahead=false + fi + done <<< "$prod_refs" + + # Also check for branches only in archive (archive has extra branches) + while read -r archive_hash archive_ref; do + [[ -z "$archive_hash" ]] && continue + + local prod_hash + prod_hash=$(echo "$prod_refs" | grep " $archive_ref$" | awk '{print $1}' || echo "") + + if [[ -z "$prod_hash" ]]; then + # Branch exists in archive but not prod - archive has something prod doesn't + # This means archive is ahead (has extra branches) + prod_ahead=false + fi + done <<< "$archive_refs" + + # Determine final relationship + if [[ "$has_common_branch" == "false" ]]; then + # No common branches - completely different + echo "diverged" + return + fi + + if [[ "$archive_ahead" == "true" && "$prod_ahead" == "false" ]]; then + echo "archive-ahead" + elif [[ "$prod_ahead" == "true" && "$archive_ahead" == "false" ]]; then + echo "prod-ahead" + elif [[ "$archive_ahead" == "true" && "$prod_ahead" == "true" ]]; then + # Both true means all common branches are identical + # But one might have extra branches + echo "in-sync" + else + echo "diverged" + fi +} + +# Main +main() { + if [[ $# -ne 4 ]]; then + usage + fi + + local prod_git_base="$1" + local archive_git_base="$2" + local repo_list="$3" + local output_dir="$4" + + # Validate inputs + if [[ ! -d "$prod_git_base" ]]; then + log_error "Prod git base directory not found: $prod_git_base" + exit 1 + fi + + if [[ ! -d "$archive_git_base" ]]; then + log_error "Archive git base directory not found: $archive_git_base" + exit 1 + fi + + if [[ ! -f "$repo_list" ]]; then + log_error "Repo list file not found: $repo_list" + exit 1 + fi + + log_info "=== Git Data Comparison ===" + log_info "Prod git base: $prod_git_base" + log_info "Archive git base: $archive_git_base" + log_info "Repo list: $repo_list" + log_info "Output: $output_dir" + log_info "Started: $(date)" + echo "" + + # Create output directory + mkdir -p "$output_dir" + + # Output file + local tsv_file="$output_dir/git-ancestry.tsv" + + # Initialize TSV with header + echo -e "repo\tnpub\trelationship\tdetails" > "$tsv_file" + + # Count repos + local total_repos + total_repos=$(grep -c -v '^#' "$repo_list" 2>/dev/null || echo "0") + log_info "Processing $total_repos repos..." + echo "" + + # Counters + local count=0 + local count_archive_ahead=0 + local count_in_sync=0 + local count_prod_ahead=0 + local count_diverged=0 + local count_archive_only=0 + local count_prod_only=0 + local count_both_empty=0 + + # Process each repo + while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do + # Skip comments and empty lines + [[ "$repo" =~ ^# ]] && continue + [[ -z "$repo" ]] && continue + + # Clean up whitespace + repo="${repo// /}" + npub="${npub// /}" + + [[ -z "$repo" || -z "$npub" ]] && continue + + count=$((count + 1)) + + # Build git paths + local prod_git="$prod_git_base/${npub}/${repo}.git" + local archive_git="$archive_git_base/${npub}/${repo}.git" + + # Compare + local relationship details="" + relationship=$(compare_repo_git "$prod_git" "$archive_git") + + # Count by relationship + case "$relationship" in + archive-ahead) count_archive_ahead=$((count_archive_ahead + 1)) ;; + in-sync) count_in_sync=$((count_in_sync + 1)) ;; + prod-ahead) count_prod_ahead=$((count_prod_ahead + 1)) ;; + diverged) count_diverged=$((count_diverged + 1)) ;; + archive-only) count_archive_only=$((count_archive_only + 1)) ;; + prod-only) count_prod_only=$((count_prod_only + 1)) ;; + both-empty) count_both_empty=$((count_both_empty + 1)) ;; + esac + + # Output TSV line + printf '%s\t%s\t%s\t%s\n' "$repo" "$npub" "$relationship" "$details" >> "$tsv_file" + + # Progress indicator every 10 repos + if [[ $((count % 10)) -eq 0 ]]; then + log_progress "Processed $count/$total_repos repos..." + fi + done < "$repo_list" + + # Clear progress line + echo "" >&2 + + # Summary + echo "" + log_info "=== Comparison Summary ===" + log_success "Archive ahead (use archive data): $count_archive_ahead" + log_success "In sync: $count_in_sync" + log_warn "Prod ahead (needs re-sync): $count_prod_ahead" + log_error "Diverged (manual review): $count_diverged" + log_info "Archive only: $count_archive_only" + log_info "Prod only: $count_prod_only" + log_info "Both empty: $count_both_empty" + echo "" + log_info "Total: $count repos" + log_info "Output: $tsv_file" +} + +main "$@" diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/30-extract-parse-failures.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/30-extract-parse-failures.sh new file mode 100755 index 0000000..d762aae --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/30-extract-parse-failures.sh @@ -0,0 +1,774 @@ +#!/usr/bin/env bash +# +# 30-extract-parse-failures.sh - Extract parse failure events from systemd logs +# +# PHASE 4a of the GRASP relay to ngit-grasp migration analysis pipeline. +# Extracts structured [PARSE_FAIL] log entries AND "Invalid announcement" +# rejections from journalctl. +# +# USAGE: +# ./30-extract-parse-failures.sh [options] +# +# EXAMPLES: +# # Extract from ngit-grasp service (last 30 days, default) +# ./30-extract-parse-failures.sh ngit-grasp.service output/logs +# +# # Extract with custom time range +# ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-01" +# +# # Extract from specific time window +# ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-15" --until "2026-01-22" +# +# OPTIONS: +# --since Start date for log extraction (default: 30 days ago) +# --until End date for log extraction (default: now) +# --dry-run Show what would be extracted without writing files +# +# ENRICHMENT: +# The script automatically enriches parse failures with repo/npub information +# by extracting from "Added rejected announcement" log entries which include +# pubkey and identifier fields. Hex pubkeys are converted to npub format using +# `nak encode npub ` if the nak tool is available. +# +# OUTPUT: +# /parse-failures.txt +# +# OUTPUT FORMAT (TSV): +# event_idkindreasonreponpub +# +# EXPECTED LOG FORMATS: +# The script looks for three types of log entries: +# +# 1. Structured [PARSE_FAIL] entries: +# 2026-01-22T10:30:45Z ngit-grasp[1234]: [PARSE_FAIL] kind=30618 event_id=abc123... reason="invalid refs format" repo=myrepo npub=npub1... +# +# 2. "Invalid announcement" rejections (write policy): +# Event rejected by write policy event_id=abc123... relay=wss://... kind=30617 reason=Invalid announcement: multiple clone tags found... +# +# 3. "Added rejected announcement" entries (for enrichment): +# Added rejected announcement to two-tier index event_id=abc123... kind=30617 identifier=myrepo pubkey=hex... +# These entries provide pubkey and identifier for enriching write policy rejections. +# +# NOTE: Builder logs ("Rejected repository announcement note1xxx:") are NOT extracted +# because they use bech32 (note1) IDs while write policy logs use hex IDs. Extracting +# both would cause double-counting since deduplication only works within each format. +# Write policy logs contain the same events, so we don't lose any data. +# +# Required fields: kind, event_id, reason +# Enrichment fields: repo (identifier), npub (converted from hex pubkey) +# +# DEPENDENCY: +# This script requires logging improvements in ngit-grasp to emit structured +# [PARSE_FAIL] log entries. Until those are implemented, this script will +# find no matching entries (which is handled gracefully). +# +# "Invalid announcement" rejections are logged by the write policy and +# should be present in any ngit-grasp deployment. +# +# See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section) +# +# Expected Rust logging code for [PARSE_FAIL]: +# tracing::warn!( +# target: "migration", +# "[PARSE_FAIL] kind={} event_id={} reason=\"{}\" repo={} npub={}", +# event.kind, event.id, reason, identifier, npub +# ); +# +# PREREQUISITES: +# - journalctl (systemd) +# - grep, awk, sed (standard Unix tools) +# - Access to systemd journal (may require sudo or journal group membership) +# +# RUNTIME: Depends on log volume, typically < 30 seconds +# +# SEE ALSO: +# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide +# 31-extract-purgatory-expiry.sh - Companion script for purgatory expiry logs +# + +set -euo pipefail + +# Get script directory for sourcing helpers +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Source the service validation helper +if [[ -f "$SCRIPT_DIR/validate-service.sh" ]]; then + source "$SCRIPT_DIR/validate-service.sh" +fi + +# Colors for output (disabled if not a terminal) +if [[ -t 1 ]]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[0;33m' + BLUE='\033[0;34m' + NC='\033[0m' +else + RED='' + GREEN='' + YELLOW='' + BLUE='' + NC='' +fi + +log_info() { + echo -e "${BLUE}[INFO]${NC} $*" >&2 +} + +log_success() { + echo -e "${GREEN}[OK]${NC} $*" >&2 +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $*" >&2 +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +usage() { + echo "Usage: $0 [options]" + echo "" + echo "Arguments:" + echo " service-name Systemd service name (e.g., ngit-grasp.service)" + echo " output-dir Directory to store extracted log data" + echo "" + echo "Options:" + echo " --since Start date (default: 30 days ago)" + echo " --until End date (default: now)" + echo " --dry-run Show what would be extracted without writing" + echo "" + echo "Examples:" + echo " $0 ngit-grasp.service output/logs" + echo " $0 ngit-grasp.service output/logs --since '2026-01-01'" + echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'" + echo "" + echo "Expected log formats:" + echo " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." + echo " Event rejected by write policy event_id=abc123 ... kind=30617 reason=Invalid announcement: ..." + echo "" + echo "Enrichment:" + echo " Parse failures are automatically enriched with repo/npub from" + echo " 'Added rejected announcement' log entries. Hex pubkeys are converted" + echo " to npub format using 'nak encode npub' if available." + exit 1 +} + +# ============================================================================= +# AWK-BASED BATCH PARSING FUNCTIONS +# ============================================================================= +# These functions use awk for efficient batch processing instead of per-line +# grep calls. This provides ~400x speedup for large log files. +# +# NOTE: parse_builder_rejection_line() was removed to fix double-counting bug. +# Builder logs use bech32 (note1) IDs while write policy logs use hex IDs. +# Since deduplication only works within each format, extracting both caused +# the same event to be counted twice. Write policy logs contain the same +# events, so we don't lose any data by only extracting from that source. + +# Parse [PARSE_FAIL] log lines in batch using awk +# Input: file containing log lines with [PARSE_FAIL] +# Output: TSV lines: event_idkindreasonreponpub +parse_parse_fail_batch() { + local input_file="$1" + awk ' + { + # Extract kind=VALUE + kind = "" + if (match($0, /kind=([0-9]+)/, m)) kind = m[1] + + # Extract event_id=VALUE (hex string) + event_id = "" + if (match($0, /event_id=([a-f0-9]+)/, m)) event_id = m[1] + + # Extract reason="VALUE" (quoted string) + reason = "" + if (match($0, /reason="([^"]*)"/, m)) reason = m[1] + + # Extract repo=VALUE (optional) + repo = "" + if (match($0, /repo=([^ ]+)/, m)) repo = m[1] + + # Extract npub=VALUE (optional) + npub = "" + if (match($0, /npub=([^ ]+)/, m)) npub = m[1] + + # Output if we have required fields + if (kind != "" && event_id != "" && reason != "") { + print event_id "\t" kind "\t" reason "\t" repo "\t" npub + } + } + ' "$input_file" +} + +# Parse "Invalid announcement" rejection log lines in batch using awk +# Input: file containing "Event rejected by write policy" log lines +# Output: TSV lines: event_idkindreason +parse_write_policy_rejection_batch() { + local input_file="$1" + awk ' + { + # Extract event_id=VALUE (hex string) + event_id = "" + if (match($0, /event_id=([a-f0-9]+)/, m)) event_id = m[1] + + # Extract kind=VALUE + kind = "" + if (match($0, /kind=([0-9]+)/, m)) kind = m[1] + + # Extract reason=VALUE (everything after "reason=") + reason = "" + if (match($0, /reason=(.*)$/, m)) reason = m[1] + + # Output if we have required fields (repo and npub are empty) + if (kind != "" && event_id != "" && reason != "") { + print event_id "\t" kind "\t" reason "\t\t" + } + } + ' "$input_file" +} + +# Parse "Added rejected announcement" log lines in batch using awk +# Input: file containing "Added rejected announcement to two-tier index" log lines +# Output: TSV lines: event_ididentifierpubkey_hex +parse_rejected_announcement_batch() { + local input_file="$1" + awk ' + { + # Extract event_id=VALUE (hex string) + event_id = "" + if (match($0, /event_id=([a-f0-9]+)/, m)) event_id = m[1] + + # Extract identifier=VALUE (repo name) + identifier = "" + if (match($0, /identifier=([^ ]+)/, m)) identifier = m[1] + + # Extract pubkey=VALUE (hex string) + pubkey = "" + if (match($0, /pubkey=([a-f0-9]+)/, m)) pubkey = m[1] + + # Output if we have all required fields + if (event_id != "" && identifier != "" && pubkey != "") { + print event_id "\t" identifier "\t" pubkey + } + } + ' "$input_file" +} + +# Enrich parse failures with repo/npub by looking up event_id in "Added rejected announcement" log entries +# This is critical because "Invalid announcement" rejections only log event_id and kind, +# not the repo name or npub. Without enrichment, Phase 5 shows event_id|kind instead +# of repo|npub in action-required.txt, making the output unusable. +# +# Arguments: +# $1 - parse failures file to enrich (modified in place) +# $2 - lookup file containing event_id -> identifier|pubkey mappings from logs +# +# The function: +# 1. Uses the lookup table built from "Added rejected announcement" log entries +# 2. For each parse failure with empty repo/npub, looks up the event_id +# 3. Populates repo and npub columns from the lookup +# 4. Converts hex pubkeys to npub format using `nak encode npub` if available +# +# OPTIMIZATION: This function uses batch processing for efficiency: +# - Uses awk for O(n) join instead of per-line grep (O(n*m)) +# - Batches all pubkey->npub conversions in a single nak call +# - This reduces runtime from minutes to seconds for large datasets +enrich_with_repo_npub() { + local parse_failures_file="$1" + local lookup_file="$2" + + # Validate lookup file exists and has content + if [[ ! -f "$lookup_file" ]] || [[ ! -s "$lookup_file" ]]; then + log_warn "No enrichment data available - repo/npub columns will remain empty" + return 0 + fi + + log_info "Enriching parse failures with repo/npub from log entries..." + + # Check if we have nak for pubkey->npub conversion + local can_convert_npub=false + if command -v nak &> /dev/null; then + can_convert_npub=true + log_info " Using 'nak' for pubkey->npub conversion" + else + log_warn " 'nak' not found - will use hex pubkeys instead of npub" + fi + + local lookup_count + lookup_count=$(wc -l < "$lookup_file") + lookup_count="${lookup_count//[^0-9]/}" + log_info " Lookup table has $lookup_count entries" + + # STEP 1: Extract unique pubkeys that need conversion + # Get pubkeys from lookup file (column 3), deduplicate + local unique_pubkeys_file npub_map_file + unique_pubkeys_file=$(mktemp) + npub_map_file=$(mktemp) + + cut -f3 "$lookup_file" | sort -u > "$unique_pubkeys_file" + local unique_pubkey_count + unique_pubkey_count=$(wc -l < "$unique_pubkeys_file") + unique_pubkey_count="${unique_pubkey_count//[^0-9]/}" + log_info " Converting $unique_pubkey_count unique pubkeys to npub format..." + + # STEP 2: Batch convert all pubkeys to npub in a single nak call + # nak reads hex pubkeys from stdin (one per line) and outputs npubs + if [[ "$can_convert_npub" == true && "$unique_pubkey_count" -gt 0 ]]; then + # Create mapping file: pubkey_hexnpub + # nak encode npub reads from stdin and outputs one npub per line + paste "$unique_pubkeys_file" <(nak encode npub < "$unique_pubkeys_file" 2>/dev/null) > "$npub_map_file" || { + # Fallback: if batch conversion fails, use hex pubkeys + log_warn " Batch npub conversion failed, using hex pubkeys" + awk '{print $1 "\t" $1}' "$unique_pubkeys_file" > "$npub_map_file" + } + else + # No nak available, use hex pubkeys as-is + awk '{print $1 "\t" $1}' "$unique_pubkeys_file" > "$npub_map_file" + fi + + rm -f "$unique_pubkeys_file" + + # STEP 3: Use awk for efficient join (O(n) instead of O(n*m) grep per line) + # This joins parse_failures with lookup_file on event_id, then with npub_map on pubkey + local enriched_file + enriched_file=$(mktemp) + + # Copy header lines + grep '^#' "$parse_failures_file" > "$enriched_file" 2>/dev/null || true + + # Use awk to perform the join efficiently + # Input files (order matters for ARGIND): + # 1. npub_map_file: pubkey_hexnpub + # 2. lookup_file: event_ididentifierpubkey_hex + # 3. parse_failures_file: event_idkindreasonreponpub + awk -F'\t' -v OFS='\t' ' + # Track which file we are processing + FNR==1 { file_num++ } + + # First file: npub_map (pubkey_hex -> npub) + file_num==1 { + npub_map[$1] = $2 + next + } + # Second file: lookup (event_id -> identifier, pubkey_hex) + file_num==2 { + lookup_repo[$1] = $2 + lookup_pubkey[$1] = $3 + next + } + # Third file: parse_failures + /^#/ { next } # Skip headers (already copied) + { + event_id = $1 + kind = $2 + reason = $3 + repo = $4 + npub = $5 + + # If repo/npub empty, try to enrich from lookup + if (repo == "" && event_id in lookup_repo) { + repo = lookup_repo[event_id] + } + if (npub == "" && event_id in lookup_pubkey) { + pubkey = lookup_pubkey[event_id] + if (pubkey in npub_map) { + npub = npub_map[pubkey] + } else { + npub = pubkey # Fallback to hex + } + } + + print event_id, kind, reason, repo, npub + } + ' "$npub_map_file" "$lookup_file" "$parse_failures_file" >> "$enriched_file" + + rm -f "$npub_map_file" + + # Count enriched entries + local enriched_count total_count + total_count=$(grep -v '^#' "$parse_failures_file" | wc -l) + total_count="${total_count//[^0-9]/}" + # Count entries that have non-empty repo AND npub after enrichment + enriched_count=$(grep -v '^#' "$enriched_file" | awk -F'\t' '$4 != "" && $5 != ""' | wc -l) + enriched_count="${enriched_count//[^0-9]/}" + + # Replace original with enriched version + mv "$enriched_file" "$parse_failures_file" + + log_info " Enriched $enriched_count of $total_count parse failures with repo/npub" + log_success "Enrichment complete" +} + +# Parse "Added rejected announcement" log entries to build enrichment lookup table +# Input: log line containing "Added rejected announcement to two-tier index" +# Output: TSV line: event_ididentifierpubkey_hex +parse_rejected_announcement_line() { + local line="$1" + + local event_id identifier pubkey_hex + + # Extract event_id=VALUE (hex string) + event_id=$(echo "$line" | grep -oP 'event_id=\K[a-f0-9]+' || echo "") + + # Extract identifier=VALUE (repo name) + identifier=$(echo "$line" | grep -oP 'identifier=\K[^ ]+' || echo "") + + # Extract pubkey=VALUE (hex string) + pubkey_hex=$(echo "$line" | grep -oP 'pubkey=\K[a-f0-9]+' || echo "") + + # Only output if we have all required fields + if [[ -n "$event_id" && -n "$identifier" && -n "$pubkey_hex" ]]; then + printf '%s\t%s\t%s\n' "$event_id" "$identifier" "$pubkey_hex" + fi +} + +# Main +main() { + if [[ $# -lt 2 ]]; then + usage + fi + + local service="$1" + local output_dir="$2" + shift 2 + + # Default time range: last 30 days + local since_date + since_date=$(date -d "30 days ago" "+%Y-%m-%d" 2>/dev/null || date -v-30d "+%Y-%m-%d" 2>/dev/null || echo "") + local until_date="" + local dry_run=false + + # Parse options + while [[ $# -gt 0 ]]; do + case "$1" in + --since) + since_date="$2" + shift 2 + ;; + --until) + until_date="$2" + shift 2 + ;; + --dry-run) + dry_run=true + shift + ;; + *) + log_error "Unknown option: $1" + usage + ;; + esac + done + + # Validate service name format + if [[ ! "$service" =~ \.service$ ]]; then + service="${service}.service" + fi + + # Validate service is appropriate for structured logging + # This prevents the common mistake of using ngit-relay instead of ngit-grasp + if type validate_service_for_structured_logging &>/dev/null; then + # Use non-interactive mode if not a terminal, skip log check (we'll do our own) + local interactive="true" + [[ ! -t 0 ]] && interactive="false" + + if ! validate_service_for_structured_logging "$service" "false" "$interactive"; then + log_error "Service validation failed. Use an ngit-grasp service for structured logging." + exit 1 + fi + else + # Fallback validation if helper not available + if [[ "$service" == *"ngit-relay"* ]]; then + log_error "Service name appears to be ngit-relay: $service" + log_error "Structured logging ([PARSE_FAIL]) only exists in ngit-grasp services." + log_error "Please use the ngit-grasp archive service instead." + log_error "" + log_error "To find the correct service:" + log_error " systemctl list-units 'ngit-grasp*' --all" + exit 1 + fi + fi + + log_info "Extracting parse failures from systemd logs" + log_info "Service: $service" + log_info "Output: $output_dir" + log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" + + # Check if journalctl is available + if ! command -v journalctl &> /dev/null; then + log_error "journalctl not found. This script requires systemd." + exit 1 + fi + + # Validate service exists (check if journalctl can find any logs for it) + # Note: We don't require the service to be running, just that it has logs + if ! journalctl --no-pager -u "$service" -n 1 &>/dev/null; then + log_warn "Could not query logs for service: $service" + log_warn "This may indicate the service doesn't exist or you lack permissions." + log_warn "" + log_warn "To list available ngit-grasp services:" + log_warn " systemctl list-units 'ngit-grasp*' --all" + log_warn " journalctl --list-boots # Check if you have journal access" + log_warn "" + # Continue anyway - the service might exist but have no logs yet + fi + + # Build journalctl command + local journal_cmd="journalctl -u $service --no-pager -o short-iso" + + if [[ -n "$since_date" ]]; then + journal_cmd="$journal_cmd --since '$since_date'" + fi + + if [[ -n "$until_date" ]]; then + journal_cmd="$journal_cmd --until '$until_date'" + fi + + log_info "Running: $journal_cmd | grep '[PARSE_FAIL]' or 'Invalid announcement'" + + if [[ "$dry_run" == true ]]; then + log_info "[DRY RUN] Would extract to: $output_dir/parse-failures.txt" + + # Show sample of what would be extracted + log_info "Checking for matching log entries..." + local parse_fail_count invalid_announcement_count + parse_fail_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0") + parse_fail_count="${parse_fail_count//[^0-9]/}" # Strip non-numeric characters + parse_fail_count="${parse_fail_count:-0}" + + invalid_announcement_count=$(eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep -c 'Invalid announcement' || echo "0") + invalid_announcement_count="${invalid_announcement_count//[^0-9]/}" + invalid_announcement_count="${invalid_announcement_count:-0}" + + log_info "Found $parse_fail_count [PARSE_FAIL] entries" + log_info "Found $invalid_announcement_count 'Invalid announcement' rejections" + + if [[ "$parse_fail_count" -eq 0 && "$invalid_announcement_count" -eq 0 ]]; then + log_warn "No matching entries found in logs." + log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." + log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" + fi + + exit 0 + fi + + # Create output directory + mkdir -p "$output_dir" + + local output_file="$output_dir/parse-failures.txt" + local temp_file + temp_file=$(mktemp) + + # Extract and parse log entries using streaming (avoids loading all logs into memory) + log_info "Extracting log entries..." + + # Create temp files for intermediate results + local temp_stderr temp_parse_fail temp_write_policy_rejection temp_rejected_announcement + temp_stderr=$(mktemp) + temp_parse_fail=$(mktemp) + temp_write_policy_rejection=$(mktemp) + temp_rejected_announcement=$(mktemp) + + # Extract [PARSE_FAIL] entries directly to temp file (streaming) + log_info " Searching for [PARSE_FAIL] entries..." + eval "$journal_cmd" 2>"$temp_stderr" | grep '\[PARSE_FAIL\]' > "$temp_parse_fail" || true + + local journal_stderr + journal_stderr=$(cat "$temp_stderr" 2>/dev/null || true) + if [[ -n "$journal_stderr" ]]; then + log_warn "journalctl reported: $journal_stderr" + fi + + # Extract "Event rejected by write policy" with "Invalid announcement" (streaming) + # NOTE: We only extract from write policy logs (hex IDs), not builder logs (note1 IDs) + # to avoid double-counting. Both log sources contain the same events. + log_info " Searching for write policy rejections..." + eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep 'Invalid announcement' > "$temp_write_policy_rejection" || true + + # Extract "Added rejected announcement" entries for enrichment (streaming) + # These contain pubkey and identifier which we use to enrich write policy rejections + log_info " Searching for rejected announcement entries (for enrichment)..." + eval "$journal_cmd" 2>/dev/null | grep 'Added rejected announcement to two-tier index' > "$temp_rejected_announcement" || true + + rm -f "$temp_stderr" + + # Check if we found anything + local parse_fail_line_count write_policy_line_count rejected_announcement_line_count + parse_fail_line_count=$(wc -l < "$temp_parse_fail") + parse_fail_line_count="${parse_fail_line_count//[^0-9]/}" + write_policy_line_count=$(wc -l < "$temp_write_policy_rejection") + write_policy_line_count="${write_policy_line_count//[^0-9]/}" + rejected_announcement_line_count=$(wc -l < "$temp_rejected_announcement") + rejected_announcement_line_count="${rejected_announcement_line_count//[^0-9]/}" + + log_info " Found $parse_fail_line_count [PARSE_FAIL] log lines" + log_info " Found $write_policy_line_count write policy rejection log lines" + log_info " Found $rejected_announcement_line_count rejected announcement log lines (for enrichment)" + + local total_invalid_announcement_lines=$write_policy_line_count + + if [[ "$parse_fail_line_count" -eq 0 && "$total_invalid_announcement_lines" -eq 0 ]]; then + log_warn "No matching entries found in logs." + log_warn "" + log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." + log_warn "The script looks for:" + log_warn "" + log_warn " 1. [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." + log_warn " 2. Event rejected by write policy event_id=... kind=30617 reason=Invalid announcement: ..." + log_warn "" + log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" + log_warn "" + + # Create empty output file with header comment + { + echo "# Parse failures and invalid announcements extracted from $service" + echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" + echo "# Extracted: $(date -Iseconds)" + echo "#" + echo "# Includes:" + echo "# - [PARSE_FAIL] structured log entries" + echo "# - \"Invalid announcement\" rejections" + echo "#" + echo "# Format: event_idkindreasonreponpub" + echo "# Note: repo and npub may be empty for some entries" + echo "#" + echo "# NOTE: No matching entries found." + echo "# This is expected if ngit-grasp logging improvements are not yet deployed." + } > "$output_file" + + rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_rejected_announcement" + log_info "Created empty output file: $output_file" + exit 0 + fi + + # Write header + { + echo "# Parse failures and invalid announcements extracted from $service" + echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" + echo "# Extracted: $(date -Iseconds)" + echo "#" + echo "# Includes:" + echo "# - [PARSE_FAIL] structured log entries" + echo "# - \"Invalid announcement\" rejections" + echo "#" + echo "# Format: event_idkindreasonreponpub" + echo "# Note: repo and npub may be empty for some entries" + } > "$output_file" + + # Parse [PARSE_FAIL] entries using batch awk processing + log_info " Parsing [PARSE_FAIL] entries..." + local parse_fail_count=0 + if [[ "$parse_fail_line_count" -gt 0 ]]; then + parse_parse_fail_batch "$temp_parse_fail" >> "$output_file" + parse_fail_count=$(grep -v '^#' "$output_file" | wc -l) + parse_fail_count="${parse_fail_count//[^0-9]/}" + fi + + # Parse write policy rejection entries using batch awk processing + log_info " Parsing write policy rejection entries..." + local write_policy_count=0 + if [[ "$write_policy_line_count" -gt 0 ]]; then + local before_count + before_count=$(grep -v '^#' "$output_file" 2>/dev/null | wc -l || echo "0") + before_count="${before_count//[^0-9]/}" + before_count="${before_count:-0}" + parse_write_policy_rejection_batch "$temp_write_policy_rejection" >> "$output_file" + local after_count + after_count=$(grep -v '^#' "$output_file" 2>/dev/null | wc -l || echo "0") + after_count="${after_count//[^0-9]/}" + after_count="${after_count:-0}" + write_policy_count=$((after_count - before_count)) + fi + + local invalid_announcement_count=$write_policy_count + + # Build enrichment lookup table from "Added rejected announcement" entries + local enrichment_lookup_file + enrichment_lookup_file=$(mktemp) + + log_info " Building enrichment lookup table..." + if [[ "$rejected_announcement_line_count" -gt 0 ]]; then + parse_rejected_announcement_batch "$temp_rejected_announcement" > "$enrichment_lookup_file" + fi + + rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_rejected_announcement" + + # Deduplicate by event_id (first column) - keep first occurrence + log_info " Deduplicating entries..." + local deduped_file + deduped_file=$(mktemp) + # Preserve header lines (starting with #) and deduplicate data lines + grep '^#' "$output_file" > "$deduped_file" + grep -v '^#' "$output_file" | sort -t$'\t' -k1,1 -u >> "$deduped_file" + mv "$deduped_file" "$output_file" + + # Deduplicate enrichment lookup table by event_id + if [[ -s "$enrichment_lookup_file" ]]; then + sort -t$'\t' -k1,1 -u "$enrichment_lookup_file" > "$enrichment_lookup_file.deduped" + mv "$enrichment_lookup_file.deduped" "$enrichment_lookup_file" + fi + + # Enrich with repo/npub from "Added rejected announcement" log entries + # This is critical for usability - without it, action-required.txt shows + # event_id|kind instead of repo|npub, making parse failures unidentifiable + enrich_with_repo_npub "$output_file" "$enrichment_lookup_file" + + rm -f "$enrichment_lookup_file" + + # Count final entries (excluding header lines) + local count + count=$(grep -v '^#' "$output_file" | wc -l) + count="${count//[^0-9]/}" # Strip whitespace + count="${count:-0}" + + rm -f "$temp_file" + + # Summary + echo "" + log_info "=== Extraction Summary ===" + log_info "Service: $service" + log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" + log_success "Extracted $count total entries" + log_info " - [PARSE_FAIL] entries: $parse_fail_count" + log_info " - Invalid announcement rejections: $invalid_announcement_count" + echo "" + log_info "Output file: $output_file" + + if [[ $count -gt 0 ]]; then + echo "" + log_info "Sample entries (first 5):" + # Use a subshell to avoid SIGPIPE issues with set -e + # New format: event_idkindreasonreponpub + (grep -v '^#' "$output_file" | head -5 | while IFS=$'\t' read -r event_id kind reason repo npub; do + echo " kind=$kind event_id=${event_id:0:16}... reason=\"${reason:0:60}...\"" + done) || true + fi + + # Breakdown by kind + if [[ $count -gt 0 ]]; then + echo "" + log_info "Breakdown by event kind:" + # Use a subshell to avoid SIGPIPE issues with set -e + # kind is now column 2 + (grep -v '^#' "$output_file" | awk -F'\t' '{print $2}' | sort | uniq -c | sort -rn | while read -r cnt kind; do + echo " kind $kind: $cnt failures" + done) || true + fi + + # Breakdown by reason pattern (for invalid announcements) + if [[ $invalid_announcement_count -gt 0 ]]; then + echo "" + log_info "Breakdown by reason pattern:" + # Extract the main reason type (before the colon details) + (grep -v '^#' "$output_file" | awk -F'\t' '{print $3}' | sed 's/:.*//' | sort | uniq -c | sort -rn | head -10 | while read -r cnt reason; do + echo " $reason: $cnt" + done) || true + fi + + # Explicit success exit + exit 0 +} + +main "$@" diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/31-extract-purgatory-expiry.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/31-extract-purgatory-expiry.sh new file mode 100755 index 0000000..a0c8ad0 --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/31-extract-purgatory-expiry.sh @@ -0,0 +1,408 @@ +#!/usr/bin/env bash +# +# 31-extract-purgatory-expiry.sh - Extract purgatory expiry events from systemd logs +# +# PHASE 4b of the GRASP relay to ngit-grasp migration analysis pipeline. +# Extracts structured [PURGATORY_EXPIRED] log entries from journalctl. +# +# USAGE: +# ./31-extract-purgatory-expiry.sh [options] +# +# EXAMPLES: +# # Extract from ngit-grasp service (last 30 days, default) +# ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs +# +# # Extract with custom time range +# ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs --since "2026-01-01" +# +# # Extract from specific time window +# ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs --since "2026-01-15" --until "2026-01-22" +# +# OPTIONS: +# --since Start date for log extraction (default: 30 days ago) +# --until End date for log extraction (default: now) +# --dry-run Show what would be extracted without writing files +# +# OUTPUT: +# /purgatory-expired.txt +# +# OUTPUT FORMAT (TSV): +# reponpubtimestampreason +# +# EXPECTED LOG FORMAT: +# The script looks for structured log entries in this format: +# +# 2026-01-22T10:30:45Z ngit-grasp[1234]: [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason="clone URL unreachable after 7 days" +# +# Required fields: repo, npub +# Optional fields: reason (explains why purgatory expired) +# +# BACKGROUND: +# "Purgatory" is the state where ngit-grasp has received an announcement event +# but cannot yet sync the git data (e.g., clone URL unreachable, git server down). +# After a configurable timeout (default 7 days), the repository is marked as +# expired and removed from purgatory. +# +# Purgatory expiry during migration analysis indicates repositories that: +# - Had valid announcements on the production relay +# - Could not be synced to the archive relay +# - May need manual intervention or investigation +# +# DEPENDENCY: +# This script requires logging improvements in ngit-grasp to emit structured +# [PURGATORY_EXPIRED] log entries. Until those are implemented, this script +# will find no matching entries (which is handled gracefully). +# +# See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section) +# +# Expected Rust logging code: +# tracing::warn!( +# target: "migration", +# "[PURGATORY_EXPIRED] repo={} npub={} reason=\"{}\"", +# identifier, npub, reason +# ); +# +# PREREQUISITES: +# - journalctl (systemd) +# - grep, awk (standard Unix tools) +# - Access to systemd journal (may require sudo or journal group membership) +# +# RUNTIME: Depends on log volume, typically < 30 seconds +# +# SEE ALSO: +# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide +# 30-extract-parse-failures.sh - Companion script for parse failure logs +# + +set -euo pipefail + +# Get script directory for sourcing helpers +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Source the service validation helper +if [[ -f "$SCRIPT_DIR/validate-service.sh" ]]; then + source "$SCRIPT_DIR/validate-service.sh" +fi + +# Colors for output (disabled if not a terminal) +if [[ -t 1 ]]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[0;33m' + BLUE='\033[0;34m' + NC='\033[0m' +else + RED='' + GREEN='' + YELLOW='' + BLUE='' + NC='' +fi + +log_info() { + echo -e "${BLUE}[INFO]${NC} $*" >&2 +} + +log_success() { + echo -e "${GREEN}[OK]${NC} $*" >&2 +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $*" >&2 +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +usage() { + echo "Usage: $0 [options]" + echo "" + echo "Arguments:" + echo " service-name Systemd service name (e.g., ngit-grasp.service)" + echo " output-dir Directory to store extracted log data" + echo "" + echo "Options:" + echo " --since Start date (default: 30 days ago)" + echo " --until End date (default: now)" + echo " --dry-run Show what would be extracted without writing" + echo "" + echo "Examples:" + echo " $0 ngit-grasp.service output/logs" + echo " $0 ngit-grasp.service output/logs --since '2026-01-01'" + echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'" + echo "" + echo "Expected log format:" + echo " [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason=\"...\"" + exit 1 +} + +# Parse a single log line and extract fields +# Input: log line containing [PURGATORY_EXPIRED] +# Output: TSV line: reponpubtimestampreason +parse_log_line() { + local line="$1" + + # Extract timestamp from the beginning of the log line + # Format: 2026-01-22T10:30:45+0000 or similar ISO format + local timestamp repo npub reason + + # Extract ISO timestamp from beginning of line + timestamp=$(echo "$line" | grep -oP '^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}' || echo "") + + # Extract repo=VALUE (unquoted identifier) + repo=$(echo "$line" | grep -oP 'repo=\K[^ ]+' || echo "") + + # Extract npub=VALUE (npub1... format) + npub=$(echo "$line" | grep -oP 'npub=\K[^ ]+' || echo "") + + # Extract reason="VALUE" (quoted string, optional) + reason=$(echo "$line" | grep -oP 'reason="\K[^"]*' || echo "") + + # Only output if we have the required fields + if [[ -n "$repo" && -n "$npub" ]]; then + printf '%s\t%s\t%s\t%s\n' "$repo" "$npub" "$timestamp" "$reason" + fi +} + +# Main +main() { + if [[ $# -lt 2 ]]; then + usage + fi + + local service="$1" + local output_dir="$2" + shift 2 + + # Default time range: last 30 days + local since_date + since_date=$(date -d "30 days ago" "+%Y-%m-%d" 2>/dev/null || date -v-30d "+%Y-%m-%d" 2>/dev/null || echo "") + local until_date="" + local dry_run=false + + # Parse options + while [[ $# -gt 0 ]]; do + case "$1" in + --since) + since_date="$2" + shift 2 + ;; + --until) + until_date="$2" + shift 2 + ;; + --dry-run) + dry_run=true + shift + ;; + *) + log_error "Unknown option: $1" + usage + ;; + esac + done + + # Validate service name format + if [[ ! "$service" =~ \.service$ ]]; then + service="${service}.service" + fi + + # Validate service is appropriate for structured logging + # This prevents the common mistake of using ngit-relay instead of ngit-grasp + if type validate_service_for_structured_logging &>/dev/null; then + # Use non-interactive mode if not a terminal, skip log check (we'll do our own) + local interactive="true" + [[ ! -t 0 ]] && interactive="false" + + if ! validate_service_for_structured_logging "$service" "false" "$interactive"; then + log_error "Service validation failed. Use an ngit-grasp service for structured logging." + exit 1 + fi + else + # Fallback validation if helper not available + if [[ "$service" == *"ngit-relay"* ]]; then + log_error "Service name appears to be ngit-relay: $service" + log_error "Structured logging ([PURGATORY_EXPIRED]) only exists in ngit-grasp services." + log_error "Please use the ngit-grasp archive service instead." + log_error "" + log_error "To find the correct service:" + log_error " systemctl list-units 'ngit-grasp*' --all" + exit 1 + fi + fi + + log_info "Extracting purgatory expiry events from systemd logs" + log_info "Service: $service" + log_info "Output: $output_dir" + log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" + + # Check if journalctl is available + if ! command -v journalctl &> /dev/null; then + log_error "journalctl not found. This script requires systemd." + exit 1 + fi + + # Validate service exists (check if journalctl can find any logs for it) + # Note: We don't require the service to be running, just that it has logs + if ! journalctl --no-pager -u "$service" -n 1 &>/dev/null; then + log_warn "Could not query logs for service: $service" + log_warn "This may indicate the service doesn't exist or you lack permissions." + log_warn "" + log_warn "To list available ngit-grasp services:" + log_warn " systemctl list-units 'ngit-grasp*' --all" + log_warn " journalctl --list-boots # Check if you have journal access" + log_warn "" + # Continue anyway - the service might exist but have no logs yet + fi + + # Build journalctl command + local journal_cmd="journalctl -u $service --no-pager -o short-iso" + + if [[ -n "$since_date" ]]; then + journal_cmd="$journal_cmd --since '$since_date'" + fi + + if [[ -n "$until_date" ]]; then + journal_cmd="$journal_cmd --until '$until_date'" + fi + + log_info "Running: $journal_cmd | grep '\\[PURGATORY_EXPIRED\\]'" + + if [[ "$dry_run" == true ]]; then + log_info "[DRY RUN] Would extract to: $output_dir/purgatory-expired.txt" + + # Show sample of what would be extracted + log_info "Checking for matching log entries..." + local sample_count + sample_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PURGATORY_EXPIRED\]' || echo "0") + sample_count="${sample_count//[^0-9]/}" # Strip non-numeric characters + sample_count="${sample_count:-0}" + log_info "Found $sample_count matching log entries" + + if [[ "$sample_count" -eq 0 ]]; then + log_warn "No [PURGATORY_EXPIRED] entries found in logs." + log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." + log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" + fi + + exit 0 + fi + + # Create output directory + mkdir -p "$output_dir" + + local output_file="$output_dir/purgatory-expired.txt" + local temp_file + temp_file=$(mktemp) + + # Extract and parse log entries + log_info "Extracting log entries..." + + # Get raw log lines containing [PURGATORY_EXPIRED] + # Capture stderr separately to detect journalctl errors + local raw_lines journal_stderr journal_exit + local temp_stderr + temp_stderr=$(mktemp) + + raw_lines=$(eval "$journal_cmd" 2>"$temp_stderr" | grep '\[PURGATORY_EXPIRED\]' || true) + journal_exit=$? + journal_stderr=$(cat "$temp_stderr" 2>/dev/null || true) + rm -f "$temp_stderr" + + # Report any journalctl errors (but don't fail - empty logs are valid) + if [[ -n "$journal_stderr" ]]; then + log_warn "journalctl reported: $journal_stderr" + fi + + if [[ -z "$raw_lines" ]]; then + log_warn "No [PURGATORY_EXPIRED] entries found in logs." + log_warn "" + log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." + log_warn "The structured log format required by this script:" + log_warn "" + log_warn " [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason=\"...\"" + log_warn "" + log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" + log_warn "" + + # Create empty output file with header comment + { + echo "# Purgatory expiry events extracted from $service" + echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" + echo "# Extracted: $(date -Iseconds)" + echo "# Format: reponpubtimestampreason" + echo "#" + echo "# NOTE: No [PURGATORY_EXPIRED] entries found." + echo "# This is expected if ngit-grasp logging improvements are not yet deployed." + } > "$output_file" + + log_info "Created empty output file: $output_file" + exit 0 + fi + + # Write header + { + echo "# Purgatory expiry events extracted from $service" + echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" + echo "# Extracted: $(date -Iseconds)" + echo "# Format: reponpubtimestampreason" + } > "$output_file" + + # Parse each line + local count=0 + while IFS= read -r line; do + local parsed + parsed=$(parse_log_line "$line") + if [[ -n "$parsed" ]]; then + echo "$parsed" >> "$output_file" + count=$((count + 1)) + fi + done <<< "$raw_lines" + + rm -f "$temp_file" + + # Summary + echo "" + log_info "=== Extraction Summary ===" + log_info "Service: $service" + log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" + log_success "Extracted $count purgatory expiry entries" + echo "" + log_info "Output file: $output_file" + + if [[ $count -gt 0 ]]; then + echo "" + log_info "Sample entries (first 5):" + # Use a subshell to avoid SIGPIPE issues with set -e + (tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub timestamp reason; do + echo " repo=$repo npub=${npub:0:20}... timestamp=$timestamp" + done) || true + fi + + # Show unique repos affected + if [[ $count -gt 0 ]]; then + echo "" + local unique_repos + unique_repos=$(tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort -u | wc -l) + log_info "Unique repositories affected: $unique_repos" + + echo "" + log_info "Repositories with purgatory expiry:" + # Use a subshell to avoid SIGPIPE issues with set -e + (tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort | uniq -c | sort -rn | head -10 | while read -r cnt repo; do + echo " $repo: $cnt expiry events" + done) || true + + local total_repos + total_repos=$(tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort -u | wc -l) + if [[ $total_repos -gt 10 ]]; then + echo " ... and $((total_repos - 10)) more repositories" + fi + fi + + # Explicit success exit + exit 0 +} + +main "$@" diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/40-classify-actions.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/40-classify-actions.sh new file mode 100755 index 0000000..8b61636 --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/40-classify-actions.sh @@ -0,0 +1,662 @@ +#!/usr/bin/env bash +# +# 40-classify-actions.sh - Classify repos by migration action required +# +# Implements the redesigned classification system (Option B) with user feedback: +# +# Tier 1: No Action Required (ready-for-migration.txt) +# - Complete in both (prod=cat1, archive=cat1) +# - Deleted by user (kind 5 event) +# - Empty in prod (prod=cat2, any archive status) +# - Archive-only (archive=any, prod=missing) +# - Not in prod (purgatory-only, prod=missing) +# - Archive ahead (archive has newer git data than prod - GRASP enforced) +# +# Tier 2: Action Required (needs-resync.txt) +# - Complete in prod, missing from archive (with purgatory context) +# - Complete in prod, incomplete in archive AND prod is ahead (with purgatory context) +# +# Tier 3: Manual Investigation (manual-review.txt) +# - Partial in prod (prod=cat3) +# - No-match in prod (prod=cat4) +# - Parse failures +# - Conflicting states +# - Diverged git history (both have unique commits) +# +# KEY INSIGHT: +# Archive (ngit-grasp) enforces GRASP - git data ALWAYS matches a state event. +# If archive has different/newer data than prod, it means: +# - A state event authorized those commits at some point +# - Archive is actually MORE up-to-date than prod +# - Migration should use archive data (it's already correct) +# +# Usage: ./40-classify-actions.sh +# +# Output format: repo | npub | prod_status | archive_status | context | action +# + +set -euo pipefail + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +log_info() { echo -e "${BLUE}[INFO]${NC} $*"; } +log_success() { echo -e "${GREEN}[OK]${NC} $*"; } +log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; } + +# Check arguments +if [[ $# -lt 1 ]]; then + echo "Usage: $0 " + echo "Example: $0 work/migration-analysis-20260123-200701" + exit 1 +fi + +ANALYSIS_DIR="$1" + +# Validate analysis directory +if [[ ! -d "$ANALYSIS_DIR" ]]; then + log_error "Analysis directory not found: $ANALYSIS_DIR" + exit 1 +fi + +# Define paths +PROD_DIR="$ANALYSIS_DIR/prod" +ARCHIVE_DIR="$ANALYSIS_DIR/archive" +COMPARISON_DIR="$ANALYSIS_DIR/comparison" +LOGS_DIR="$ANALYSIS_DIR/logs" +RESULTS_DIR="$ANALYSIS_DIR/results" + +# Validate required directories +for dir in "$PROD_DIR" "$ARCHIVE_DIR" "$COMPARISON_DIR" "$LOGS_DIR"; do + if [[ ! -d "$dir" ]]; then + log_error "Required directory not found: $dir" + exit 1 + fi +done + +# Create results directory +mkdir -p "$RESULTS_DIR" + +# Output files +READY_FILE="$RESULTS_DIR/ready-for-migration.txt" +RESYNC_FILE="$RESULTS_DIR/needs-resync.txt" +REVIEW_FILE="$RESULTS_DIR/manual-review.txt" +SUMMARY_FILE="$RESULTS_DIR/summary.txt" + +# Temporary files for processing +TMP_DIR=$(mktemp -d) +trap 'rm -rf "$TMP_DIR"' EXIT + +log_info "Starting classification with revised system (Option B)" +log_info "Analysis directory: $ANALYSIS_DIR" + +# ============================================================================ +# Phase 1: Build lookup tables from source data +# ============================================================================ + +log_info "Building lookup tables..." + +# Build prod category lookup: repo|npub -> category +declare -A PROD_CAT +while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do + repo="${repo// /}" # Remove all spaces + npub="${npub// /}" # Remove all spaces + [[ -z "$repo" || -z "$npub" ]] && continue + PROD_CAT["$repo|$npub"]="cat1" +done < "$PROD_DIR/category1-complete-match.txt" + +while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do + repo="${repo// /}" + npub="${npub// /}" + [[ -z "$repo" || -z "$npub" ]] && continue + PROD_CAT["$repo|$npub"]="cat2" +done < "$PROD_DIR/category2-empty-blank.txt" + +while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do + repo="${repo// /}" + npub="${npub// /}" + [[ -z "$repo" || -z "$npub" ]] && continue + PROD_CAT["$repo|$npub"]="cat3" +done < "$PROD_DIR/category3-partial-match.txt" + +while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do + repo="${repo// /}" + npub="${npub// /}" + [[ -z "$repo" || -z "$npub" ]] && continue + PROD_CAT["$repo|$npub"]="cat4" +done < "$PROD_DIR/category4-no-match.txt" + +log_info "Loaded ${#PROD_CAT[@]} prod entries" + +# Build archive category lookup: repo|npub -> category +declare -A ARCHIVE_CAT +while IFS='|' read -r repo npub rest; do + repo="${repo// /}" + npub="${npub// /}" + [[ -z "$repo" || -z "$npub" ]] && continue + ARCHIVE_CAT["$repo|$npub"]="cat1" +done < "$ARCHIVE_DIR/category1-complete-match.txt" + +while IFS='|' read -r repo npub rest; do + repo="${repo// /}" + npub="${npub// /}" + [[ -z "$repo" || -z "$npub" ]] && continue + ARCHIVE_CAT["$repo|$npub"]="cat2" +done < "$ARCHIVE_DIR/category2-empty-blank.txt" + +while IFS='|' read -r repo npub rest; do + repo="${repo// /}" + npub="${npub// /}" + [[ -z "$repo" || -z "$npub" ]] && continue + ARCHIVE_CAT["$repo|$npub"]="cat3" +done < "$ARCHIVE_DIR/category3-partial-match.txt" + +while IFS='|' read -r repo npub rest; do + repo="${repo// /}" + npub="${npub// /}" + [[ -z "$repo" || -z "$npub" ]] && continue + ARCHIVE_CAT["$repo|$npub"]="cat4" +done < "$ARCHIVE_DIR/category4-no-match.txt" + +log_info "Loaded ${#ARCHIVE_CAT[@]} archive entries" + +# Build purgatory lookup: repo|npub -> 1 (if purgatory expired) +declare -A PURGATORY +PURGATORY_COUNT=0 +if [[ -f "$LOGS_DIR/purgatory-expired.txt" ]]; then + while IFS=$'\t' read -r repo npub timestamp reason || [[ -n "$repo" ]]; do + # Skip comments and empty lines + [[ "$repo" =~ ^# ]] && continue + [[ -z "$repo" || -z "$npub" ]] && continue + PURGATORY["$repo|$npub"]=1 + PURGATORY_COUNT=$((PURGATORY_COUNT + 1)) + done < "$LOGS_DIR/purgatory-expired.txt" +fi +log_info "Loaded $PURGATORY_COUNT purgatory entries" + +# Build parse failure lookup: repo|npub -> 1 (if parse failure logged) +# Parse failures file format: event_idkindreasonreponpub +declare -A PARSE_FAIL +PARSE_FAIL_COUNT=0 +if [[ -f "$LOGS_DIR/parse-failures.txt" ]]; then + while IFS=$'\t' read -r event_id kind reason repo npub || [[ -n "$event_id" ]]; do + # Skip comments and empty lines + [[ "$event_id" =~ ^# ]] && continue + [[ -z "$repo" || -z "$npub" ]] && continue + PARSE_FAIL["$repo|$npub"]=1 + PARSE_FAIL_COUNT=$((PARSE_FAIL_COUNT + 1)) + done < "$LOGS_DIR/parse-failures.txt" +fi +log_info "Loaded $PARSE_FAIL_COUNT parse failure entries" + +# Build deletion lookup: repo|npub -> 1 (if kind 5 deletion event) +# Deletions are in NDJSON format with "a" tags like "30617:pubkey_hex:repo" +# We need to convert hex pubkeys to npub format using nak +declare -A DELETED + +# Helper function to process deletion file (NDJSON format) +# Extracts unique pubkey_hex:repo pairs and converts to npub +process_deletions() { + local file="$1" + [[ ! -f "$file" ]] && return + + # Extract unique pubkey_hex|repo pairs from NDJSON + # Each line is a JSON object, extract "a" tags + local pairs + pairs=$(jq -r '.tags[] | select(.[0] == "a") | .[1]' "$file" 2>/dev/null | \ + sed 's/^30617://' | awk -F: '{print $1 "|" $2}' | sort -u) + + # Get unique hex pubkeys for batch conversion + local hex_keys + hex_keys=$(echo "$pairs" | cut -d'|' -f1 | sort -u) + + # Build hex->npub lookup via batch nak call + declare -A HEX_TO_NPUB + while read -r hex; do + [[ -z "$hex" ]] && continue + local npub + npub=$(nak encode npub "$hex" 2>/dev/null || echo "") + [[ -n "$npub" ]] && HEX_TO_NPUB["$hex"]="$npub" + done <<< "$hex_keys" + + # Now process pairs with cached npub values + while IFS='|' read -r pubkey_hex repo; do + [[ -z "$repo" || -z "$pubkey_hex" ]] && continue + local npub="${HEX_TO_NPUB[$pubkey_hex]:-}" + [[ -z "$npub" ]] && continue + DELETED["$repo|$npub"]=1 + done <<< "$pairs" +} + +# Process prod and archive deletions +process_deletions "$PROD_DIR/raw/deletions.json" +process_deletions "$ARCHIVE_DIR/raw/deletions.json" +DELETED_COUNT=0 +[[ ${#DELETED[@]} -gt 0 ]] && DELETED_COUNT=${#DELETED[@]} +log_info "Loaded $DELETED_COUNT deletion entries" + +# Build git ancestry lookup: repo|npub -> relationship (archive-ahead, prod-ahead, diverged, etc.) +# This data comes from 22-compare-git-data.sh which compares actual git commits +declare -A GIT_ANCESTRY +GIT_ANCESTRY_COUNT=0 +if [[ -f "$COMPARISON_DIR/git-ancestry.tsv" ]]; then + while IFS=$'\t' read -r repo npub relationship details || [[ -n "$repo" ]]; do + # Skip header and comments + [[ "$repo" == "repo" ]] && continue + [[ "$repo" =~ ^# ]] && continue + [[ -z "$repo" || -z "$npub" ]] && continue + GIT_ANCESTRY["$repo|$npub"]="$relationship" + GIT_ANCESTRY_COUNT=$((GIT_ANCESTRY_COUNT + 1)) + done < "$COMPARISON_DIR/git-ancestry.tsv" + log_info "Loaded $GIT_ANCESTRY_COUNT git ancestry entries" +else + log_warn "No git-ancestry.tsv found - will not check if archive is ahead of prod" + log_warn "Run 22-compare-git-data.sh to enable archive-ahead detection" +fi + +# ============================================================================ +# Phase 2: Build unique repo list from all sources +# ============================================================================ + +log_info "Building unique repo list..." + +declare -A ALL_REPOS +for key in "${!PROD_CAT[@]}"; do + ALL_REPOS["$key"]=1 +done +for key in "${!ARCHIVE_CAT[@]}"; do + ALL_REPOS["$key"]=1 +done +for key in "${!PURGATORY[@]}"; do + ALL_REPOS["$key"]=1 +done + +log_info "Total unique repos: ${#ALL_REPOS[@]}" + +# ============================================================================ +# Phase 3: Classify each repo according to revised decision tree +# ============================================================================ + +log_info "Classifying repos..." + +# Counters for summary +declare -A COUNTS +COUNTS[ready_complete_both]=0 +COUNTS[ready_deleted]=0 +COUNTS[ready_empty_prod]=0 +COUNTS[ready_archive_only]=0 +COUNTS[ready_not_in_prod]=0 +COUNTS[ready_archive_ahead]=0 +COUNTS[resync_missing_archive]=0 +COUNTS[resync_incomplete_archive]=0 +COUNTS[review_partial_prod]=0 +COUNTS[review_nomatch_prod]=0 +COUNTS[review_parse_failure]=0 +COUNTS[review_conflicting]=0 +COUNTS[review_diverged]=0 + +# Output arrays +declare -a READY_LINES +declare -a RESYNC_LINES +declare -a REVIEW_LINES + +# Helper function to get context string +get_context() { + local key="$1" + local prod_status="$2" + local archive_status="$3" + local context="" + + # Check purgatory + if [[ -n "${PURGATORY[$key]:-}" ]]; then + context="purgatory-expired" + fi + + # Check parse failure + if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then + if [[ -n "$context" ]]; then + context="$context, parse-failure" + else + context="parse-failure" + fi + fi + + # Add archive context for unexpected states + if [[ "$prod_status" == "empty" && "$archive_status" != "missing" && "$archive_status" != "empty" ]]; then + if [[ -n "$context" ]]; then + context="$context, archive-has-data" + else + context="archive-has-data" + fi + fi + + echo "${context:-none}" +} + +# Helper to convert category to human-readable status +cat_to_status() { + case "$1" in + cat1) echo "complete" ;; + cat2) echo "empty" ;; + cat3) echo "partial" ;; + cat4) echo "no-match" ;; + missing) echo "missing" ;; + *) echo "$1" ;; + esac +} + +LOOP_COUNT=0 +for key in "${!ALL_REPOS[@]}"; do + LOOP_COUNT=$((LOOP_COUNT + 1)) + [[ $((LOOP_COUNT % 100)) -eq 0 ]] && log_info "Processed $LOOP_COUNT repos..." + IFS='|' read -r repo npub <<< "$key" + + prod_cat="${PROD_CAT[$key]:-missing}" + archive_cat="${ARCHIVE_CAT[$key]:-missing}" + prod_status=$(cat_to_status "$prod_cat") + archive_status=$(cat_to_status "$archive_cat") + + # Decision tree implementation + + # 1. Is there a kind 5 deletion event? + if [[ -n "${DELETED[$key]:-}" ]]; then + context=$(get_context "$key" "$prod_status" "$archive_status") + READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | deleted by user") + COUNTS[ready_deleted]=$((COUNTS[ready_deleted] + 1)) + continue + fi + + # 2. What is the prod status? + case "$prod_cat" in + missing) + # Not in prod + if [[ "$archive_cat" != "missing" ]]; then + # In archive but not in prod -> no action (archive-only) + context=$(get_context "$key" "$prod_status" "$archive_status") + READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive-only (not in prod)") + COUNTS[ready_archive_only]=$((COUNTS[ready_archive_only] + 1)) + elif [[ -n "${PURGATORY[$key]:-}" ]]; then + # Purgatory only, not in prod -> no action + context="purgatory-expired" + READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | purgatory-only (not in prod)") + COUNTS[ready_not_in_prod]=$((COUNTS[ready_not_in_prod] + 1)) + fi + # Otherwise skip (not a real repo - no data anywhere) + ;; + + cat2) + # Empty in prod -> ALWAYS no action required + context=$(get_context "$key" "$prod_status" "$archive_status") + READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | empty in prod (user never pushed)") + COUNTS[ready_empty_prod]=$((COUNTS[ready_empty_prod] + 1)) + ;; + + cat1) + # Complete in prod + if [[ "$archive_cat" == "cat1" ]]; then + # Complete in both -> no action + context=$(get_context "$key" "$prod_status" "$archive_status") + READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in both") + COUNTS[ready_complete_both]=$((COUNTS[ready_complete_both] + 1)) + else + # Complete in prod, missing/incomplete in archive + # Check for parse failure - if so, needs manual review + if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then + context=$(get_context "$key" "$prod_status" "$archive_status") + REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in prod with parse failure") + COUNTS[review_parse_failure]=$((COUNTS[review_parse_failure] + 1)) + else + # Check git ancestry to see if archive is actually ahead + git_relationship="${GIT_ANCESTRY[$key]:-unknown}" + + if [[ "$git_relationship" == "archive-ahead" || "$git_relationship" == "in-sync" ]]; then + # Archive has newer/same git data - this is GOOD + # Archive's git data was authorized by a state event (GRASP enforced) + context=$(get_context "$key" "$prod_status" "$archive_status") + if [[ -n "$context" && "$context" != "none" ]]; then + context="$context, git=$git_relationship" + else + context="git=$git_relationship" + fi + READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive ahead (use archive data)") + COUNTS[ready_archive_ahead]=$((COUNTS[ready_archive_ahead] + 1)) + elif [[ "$git_relationship" == "diverged" ]]; then + # Git histories diverged - needs manual review + context=$(get_context "$key" "$prod_status" "$archive_status") + if [[ -n "$context" && "$context" != "none" ]]; then + context="$context, git=diverged" + else + context="git=diverged" + fi + REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | git histories diverged (manual review)") + COUNTS[review_diverged]=$((COUNTS[review_diverged] + 1)) + else + # prod-ahead, archive-only, prod-only, both-empty, or unknown + # These need resync - include purgatory context + context=$(get_context "$key" "$prod_status" "$archive_status") + if [[ "$git_relationship" != "unknown" ]]; then + if [[ -n "$context" && "$context" != "none" ]]; then + context="$context, git=$git_relationship" + else + context="git=$git_relationship" + fi + fi + if [[ "$archive_cat" == "missing" ]]; then + RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync to archive") + COUNTS[resync_missing_archive]=$((COUNTS[resync_missing_archive] + 1)) + else + RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync (archive incomplete)") + COUNTS[resync_incomplete_archive]=$((COUNTS[resync_incomplete_archive] + 1)) + fi + fi + fi + fi + ;; + + cat3) + # Partial in prod -> ALWAYS manual investigation + context=$(get_context "$key" "$prod_status" "$archive_status") + REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | partial in prod (investigate git data)") + COUNTS[review_partial_prod]=$((COUNTS[review_partial_prod] + 1)) + ;; + + cat4) + # No-match in prod -> ALWAYS manual investigation + context=$(get_context "$key" "$prod_status" "$archive_status") + REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | no-match in prod (git corruption)") + COUNTS[review_nomatch_prod]=$((COUNTS[review_nomatch_prod] + 1)) + ;; + esac +done + +# ============================================================================ +# Phase 4: Write output files +# ============================================================================ + +log_info "Writing output files..." + +TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%S+00:00") + +# Write ready-for-migration.txt +{ + echo "# Ready for Migration - No action required" + echo "# Generated: $TIMESTAMP" + echo "# Format: repo | npub | prod_status | archive_status | context | reason" + echo "#" + for line in "${READY_LINES[@]}"; do + echo "$line" + done +} > "$READY_FILE" + +# Write needs-resync.txt +{ + echo "# Needs Re-sync - Action required" + echo "# Generated: $TIMESTAMP" + echo "# Format: repo | npub | prod_status | archive_status | context | action" + echo "#" + echo "# Context meanings:" + echo "# purgatory-expired = archive tried to sync but failed (30min timeout)" + echo "# none = archive never tried or announcement missing" + echo "#" + for line in "${RESYNC_LINES[@]}"; do + echo "$line" + done +} > "$RESYNC_FILE" + +# Write manual-review.txt +{ + echo "# Manual Review Required - Investigation needed" + echo "# Generated: $TIMESTAMP" + echo "# Format: repo | npub | prod_status | archive_status | context | reason" + echo "#" + for line in "${REVIEW_LINES[@]}"; do + echo "$line" + done +} > "$REVIEW_FILE" + +# ============================================================================ +# Phase 5: Generate summary +# ============================================================================ + +log_info "Generating summary..." + +TOTAL_READY="${#READY_LINES[@]}" +TOTAL_RESYNC="${#RESYNC_LINES[@]}" +TOTAL_REVIEW="${#REVIEW_LINES[@]}" +TOTAL=$((TOTAL_READY + TOTAL_RESYNC + TOTAL_REVIEW)) + +# Calculate percentages +if [[ $TOTAL -gt 0 ]]; then + PCT_READY=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_READY / $TOTAL) * 100}") + PCT_RESYNC=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_RESYNC / $TOTAL) * 100}") + PCT_REVIEW=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_REVIEW / $TOTAL) * 100}") +else + PCT_READY="0.0" + PCT_RESYNC="0.0" + PCT_REVIEW="0.0" +fi + +{ + echo "# Migration Classification Summary" + echo "Generated: $TIMESTAMP" + echo "Analysis Directory: $ANALYSIS_DIR" + echo "" + echo "## Overview" + echo "" + echo "| Category | Count | Percentage |" + echo "|----------|-------|------------|" + echo "| Ready for Migration | $TOTAL_READY | $PCT_READY% |" + echo "| Needs Re-sync | $TOTAL_RESYNC | $PCT_RESYNC% |" + echo "| Manual Review | $TOTAL_REVIEW | $PCT_REVIEW% |" + echo "| **Total** | **$TOTAL** | **100%** |" + echo "" + echo "## Tier 1: Ready for Migration ($TOTAL_READY repos)" + echo "" + echo "These repositories are ready for migration or don't need migration:" + echo "" + echo "| Reason | Count |" + echo "|--------|-------|" + echo "| complete in both prod and archive | ${COUNTS[ready_complete_both]} |" + echo "| archive ahead (has newer git data) | ${COUNTS[ready_archive_ahead]} |" + echo "| deleted by user | ${COUNTS[ready_deleted]} |" + echo "| empty in prod (user never pushed) | ${COUNTS[ready_empty_prod]} |" + echo "| archive-only (not in prod) | ${COUNTS[ready_archive_only]} |" + echo "| purgatory-only (not in prod) | ${COUNTS[ready_not_in_prod]} |" + echo "" + echo "## Tier 2: Needs Re-sync ($TOTAL_RESYNC repos)" + echo "" + echo "These repositories need re-sync to archive before migration:" + echo "" + echo "| Reason | Count | Action |" + echo "|--------|-------|--------|" + echo "| complete in prod, missing from archive | ${COUNTS[resync_missing_archive]} | trigger re-sync |" + echo "| complete in prod, incomplete in archive | ${COUNTS[resync_incomplete_archive]} | trigger re-sync |" + echo "" + echo "### Purgatory Context" + echo "" + echo "Repos in needs-resync.txt include purgatory context:" + echo "- **purgatory-expired**: Archive tried to sync but failed (30min timeout)" + echo "- **none**: Archive never tried or announcement missing" + echo "" + echo "## Tier 3: Manual Review ($TOTAL_REVIEW repos)" + echo "" + echo "These repositories require human investigation:" + echo "" + echo "| Reason | Count |" + echo "|--------|-------|" + echo "| partial in prod (cat3) | ${COUNTS[review_partial_prod]} |" + echo "| no-match in prod (cat4) | ${COUNTS[review_nomatch_prod]} |" + echo "| complete in prod with parse failure | ${COUNTS[review_parse_failure]} |" + echo "| git histories diverged | ${COUNTS[review_diverged]} |" + echo "" + echo "## Input Data Summary" + echo "" + echo "### Prod Categories" + echo "- Category 1 (complete): $(wc -l < "$PROD_DIR/category1-complete-match.txt")" + echo "- Category 2 (empty): $(wc -l < "$PROD_DIR/category2-empty-blank.txt")" + echo "- Category 3 (partial): $(wc -l < "$PROD_DIR/category3-partial-match.txt")" + echo "- Category 4 (no match): $(wc -l < "$PROD_DIR/category4-no-match.txt")" + echo "" + echo "### Archive Categories" + echo "- Category 1 (complete): $(wc -l < "$ARCHIVE_DIR/category1-complete-match.txt")" + echo "- Category 2 (empty): $(wc -l < "$ARCHIVE_DIR/category2-empty-blank.txt")" + echo "- Category 3 (partial): $(wc -l < "$ARCHIVE_DIR/category3-partial-match.txt")" + echo "- Category 4 (no match): $(wc -l < "$ARCHIVE_DIR/category4-no-match.txt")" + echo "" + echo "### Logs" + echo "- Parse failures: $(grep -c -v '^#' "$LOGS_DIR/parse-failures.txt" 2>/dev/null || echo 0)" + echo "- Purgatory expired: $(grep -c -v '^#' "$LOGS_DIR/purgatory-expired.txt" 2>/dev/null || echo 0)" + echo "" + echo "## Output Files" + echo "" + echo "- \`results/ready-for-migration.txt\` - $TOTAL_READY repos ready for migration" + echo "- \`results/needs-resync.txt\` - $TOTAL_RESYNC repos needing re-sync" + echo "- \`results/manual-review.txt\` - $TOTAL_REVIEW repos needing investigation" + echo "- \`results/summary.txt\` - This summary file" + echo "" + echo "## Recommended Next Steps" + echo "" + echo "1. **Review needs-resync.txt** - Trigger re-sync for these repos" + echo "2. **Review manual-review.txt** - Investigate unusual states" + echo "3. **Verify ready-for-migration.txt** - Spot-check a few repos" + echo "4. **Plan migration window** - Schedule cutover when action items resolved" +} > "$SUMMARY_FILE" + +# ============================================================================ +# Phase 6: Print summary to console +# ============================================================================ + +echo "" +log_success "Classification complete!" +echo "" +echo "=== Summary ===" +echo "Ready for Migration: $TOTAL_READY ($PCT_READY%)" +echo " - Complete in both: ${COUNTS[ready_complete_both]}" +echo " - Archive ahead: ${COUNTS[ready_archive_ahead]}" +echo " - Deleted by user: ${COUNTS[ready_deleted]}" +echo " - Empty in prod: ${COUNTS[ready_empty_prod]}" +echo " - Archive-only: ${COUNTS[ready_archive_only]}" +echo " - Purgatory-only: ${COUNTS[ready_not_in_prod]}" +echo "" +echo "Needs Re-sync: $TOTAL_RESYNC ($PCT_RESYNC%)" +echo " - Missing from archive: ${COUNTS[resync_missing_archive]}" +echo " - Incomplete in archive: ${COUNTS[resync_incomplete_archive]}" +echo "" +echo "Manual Review: $TOTAL_REVIEW ($PCT_REVIEW%)" +echo " - Partial in prod: ${COUNTS[review_partial_prod]}" +echo " - No-match in prod: ${COUNTS[review_nomatch_prod]}" +echo " - Parse failures: ${COUNTS[review_parse_failure]}" +echo " - Git diverged: ${COUNTS[review_diverged]}" +echo "" +echo "Total: $TOTAL repos" +echo "" +echo "Output files:" +echo " $READY_FILE" +echo " $RESYNC_FILE" +echo " $REVIEW_FILE" +echo " $SUMMARY_FILE" diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/run-migration-analysis.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/run-migration-analysis.sh new file mode 100755 index 0000000..acc5e44 --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/run-migration-analysis.sh @@ -0,0 +1,779 @@ +#!/usr/bin/env bash +# +# run-migration-analysis.sh - Orchestrate the complete GRASP relay to ngit-grasp migration analysis +# +# This script runs all 5 phases of the migration analysis pipeline in sequence, +# with proper error handling, progress reporting, and timing information. +# +# QUICK START: +# # Basic usage (local analysis only - Phases 1, 3, 5) +# ./run-migration-analysis.sh --prod-relay wss://relay.ngit.dev --archive-relay wss://archive.relay.ngit.dev +# +# # Full analysis including git sync check (requires VPS access) +# ./run-migration-analysis.sh \ +# --prod-relay wss://relay.ngit.dev \ +# --archive-relay wss://archive.relay.ngit.dev \ +# --prod-git /var/lib/grasp-relay/git \ +# --archive-git /var/lib/ngit-grasp/git +# +# USAGE: +# ./run-migration-analysis.sh [options] +# +# REQUIRED OPTIONS: +# --prod-relay Production relay WebSocket URL (e.g., wss://relay.ngit.dev) +# --archive-relay Archive relay WebSocket URL (e.g., wss://archive.relay.ngit.dev) +# +# OPTIONAL OPTIONS: +# --prod-git Git base directory for prod (enables Phase 2) +# --archive-git Git base directory for archive (enables Phase 2) +# --service Systemd service name for log extraction (enables Phase 4) +# --output Output directory (default: work/migration-analysis-YYYYMMDD-HHMM) +# --since Start date for log extraction (default: 30 days ago) +# --until End date for log extraction (default: now) +# +# PHASE CONTROL: +# --skip-phase-1 Skip event fetching (use existing data) +# --skip-phase-2 Skip git sync check (use existing data) +# --skip-phase-3 Skip categorization (use existing data) +# --skip-phase-4 Skip log extraction (use existing data) +# --skip-phase-5 Skip final classification +# --only-phase-N Run only phase N (1-5) +# --from-phase-N Start from phase N (skip earlier phases) +# +# OTHER OPTIONS: +# --dry-run Show what would be executed without running +# --continue-on-error Continue to next phase even if current phase fails +# --help Show this help message +# +# PHASES: +# Phase 1: Fetch events from both relays (~30s each, local) +# Phase 2: Check git sync status (~20 min each, requires VPS) +# Phase 3: Categorize and compare results (fast, local) +# Phase 4: Extract logs from systemd (requires VPS) +# Phase 5: Final classification (fast, local) +# +# EXAMPLES: +# # Dry run to see what would happen +# ./run-migration-analysis.sh --prod-relay wss://relay.ngit.dev --archive-relay wss://archive.relay.ngit.dev --dry-run +# +# # Run only Phase 1 (fetch events) +# ./run-migration-analysis.sh --prod-relay wss://relay.ngit.dev --archive-relay wss://archive.relay.ngit.dev --only-phase-1 +# +# # Resume from Phase 3 using existing Phase 1-2 data +# ./run-migration-analysis.sh --prod-relay wss://relay.ngit.dev --archive-relay wss://archive.relay.ngit.dev --from-phase-3 --output work/migration-analysis-20260122-1430 +# +# # Full analysis on VPS with all features +# ./run-migration-analysis.sh \ +# --prod-relay wss://relay.ngit.dev \ +# --archive-relay wss://archive.relay.ngit.dev \ +# --prod-git /var/lib/grasp-relay/git \ +# --archive-git /var/lib/ngit-grasp/git \ +# --service ngit-grasp.service +# +# SEE ALSO: +# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide +# + +set -euo pipefail + +# Get script directory for finding other scripts +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Colors for output (disabled if not a terminal) +if [[ -t 1 ]]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[0;33m' + BLUE='\033[0;34m' + CYAN='\033[0;36m' + BOLD='\033[1m' + NC='\033[0m' +else + RED='' + GREEN='' + YELLOW='' + BLUE='' + CYAN='' + BOLD='' + NC='' +fi + +# Logging functions +log_header() { + echo "" + echo -e "${BOLD}${CYAN}════════════════════════════════════════════════════════════════${NC}" + echo -e "${BOLD}${CYAN} $*${NC}" + echo -e "${BOLD}${CYAN}════════════════════════════════════════════════════════════════${NC}" + echo "" +} + +log_phase() { + echo "" + echo -e "${BOLD}${BLUE}┌──────────────────────────────────────────────────────────────┐${NC}" + echo -e "${BOLD}${BLUE}│ $*${NC}" + echo -e "${BOLD}${BLUE}└──────────────────────────────────────────────────────────────┘${NC}" +} + +log_info() { + echo -e "${BLUE}[INFO]${NC} $*" >&2 +} + +log_success() { + echo -e "${GREEN}[OK]${NC} $*" >&2 +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $*" >&2 +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +log_step() { + echo -e "${CYAN} →${NC} $*" >&2 +} + +# Default values +PROD_RELAY="" +ARCHIVE_RELAY="" +PROD_GIT="" +ARCHIVE_GIT="" +SERVICE_NAME="" +OUTPUT_DIR="" +DRY_RUN=false +CONTINUE_ON_ERROR=false +LOG_SINCE="" +LOG_UNTIL="" + +# Phase control +SKIP_PHASE_1=false +SKIP_PHASE_2=false +SKIP_PHASE_3=false +SKIP_PHASE_4=false +SKIP_PHASE_5=false +ONLY_PHASE="" +FROM_PHASE="" + +# Timing +declare -A PHASE_TIMES + +usage() { + head -73 "$0" | tail -n +3 | sed 's/^# //' | sed 's/^#//' + exit 0 +} + +# Parse command line arguments +parse_args() { + while [[ $# -gt 0 ]]; do + case "$1" in + --prod-relay) + PROD_RELAY="$2" + shift 2 + ;; + --archive-relay) + ARCHIVE_RELAY="$2" + shift 2 + ;; + --prod-git) + PROD_GIT="$2" + shift 2 + ;; + --archive-git) + ARCHIVE_GIT="$2" + shift 2 + ;; + --service) + SERVICE_NAME="$2" + shift 2 + ;; + --output) + OUTPUT_DIR="$2" + shift 2 + ;; + --skip-phase-1) + SKIP_PHASE_1=true + shift + ;; + --skip-phase-2) + SKIP_PHASE_2=true + shift + ;; + --skip-phase-3) + SKIP_PHASE_3=true + shift + ;; + --skip-phase-4) + SKIP_PHASE_4=true + shift + ;; + --skip-phase-5) + SKIP_PHASE_5=true + shift + ;; + --only-phase-1|--only-phase-2|--only-phase-3|--only-phase-4|--only-phase-5) + ONLY_PHASE="${1#--only-phase-}" + shift + ;; + --from-phase-1|--from-phase-2|--from-phase-3|--from-phase-4|--from-phase-5) + FROM_PHASE="${1#--from-phase-}" + shift + ;; + --dry-run) + DRY_RUN=true + shift + ;; + --continue-on-error) + CONTINUE_ON_ERROR=true + shift + ;; + --since) + LOG_SINCE="$2" + shift 2 + ;; + --until) + LOG_UNTIL="$2" + shift 2 + ;; + --help|-h) + usage + ;; + *) + log_error "Unknown option: $1" + echo "Use --help for usage information." + exit 1 + ;; + esac + done +} + +# Validate required arguments +validate_args() { + local errors=0 + + if [[ -z "$PROD_RELAY" ]]; then + log_error "Missing required option: --prod-relay" + errors=1 + fi + + if [[ -z "$ARCHIVE_RELAY" ]]; then + log_error "Missing required option: --archive-relay" + errors=1 + fi + + # Validate relay URLs + if [[ -n "$PROD_RELAY" && ! "$PROD_RELAY" =~ ^wss?:// ]]; then + log_error "Invalid prod relay URL: $PROD_RELAY (must start with ws:// or wss://)" + errors=1 + fi + + if [[ -n "$ARCHIVE_RELAY" && ! "$ARCHIVE_RELAY" =~ ^wss?:// ]]; then + log_error "Invalid archive relay URL: $ARCHIVE_RELAY (must start with ws:// or wss://)" + errors=1 + fi + + # Validate git paths if provided + if [[ -n "$PROD_GIT" && ! -d "$PROD_GIT" ]]; then + log_warn "Prod git directory not found: $PROD_GIT" + log_warn "Phase 2 will fail unless running on VPS with access to this path." + fi + + if [[ -n "$ARCHIVE_GIT" && ! -d "$ARCHIVE_GIT" ]]; then + log_warn "Archive git directory not found: $ARCHIVE_GIT" + log_warn "Phase 2 will fail unless running on VPS with access to this path." + fi + + if [[ $errors -eq 1 ]]; then + echo "" + echo "Use --help for usage information." + exit 1 + fi +} + +# Check prerequisites +check_prerequisites() { + local missing=0 + + log_info "Checking prerequisites..." + + # Required tools + for tool in git nak jq awk sort; do + if command -v "$tool" &> /dev/null; then + log_step "$tool: found" + else + log_error "$tool: NOT FOUND" + missing=1 + fi + done + + # Optional tools + if command -v journalctl &> /dev/null; then + log_step "journalctl: found (Phase 4 available)" + else + log_step "journalctl: not found (Phase 4 will be skipped)" + SKIP_PHASE_4=true + fi + + if [[ $missing -eq 1 ]]; then + log_error "Missing required tools. Install them and try again." + exit 1 + fi + + # Check scripts exist + for script in 01-fetch-events.sh 10-check-git-sync.sh 20-categorize.sh 21-compare-relays.sh 22-compare-git-data.sh 30-extract-parse-failures.sh 31-extract-purgatory-expiry.sh 40-classify-actions.sh; do + if [[ ! -x "$SCRIPT_DIR/$script" ]]; then + log_error "Script not found or not executable: $SCRIPT_DIR/$script" + missing=1 + fi + done + + if [[ $missing -eq 1 ]]; then + exit 1 + fi + + log_success "All prerequisites satisfied" +} + +# Determine which phases to run +determine_phases() { + # Handle --only-phase-N + if [[ -n "$ONLY_PHASE" ]]; then + for i in 1 2 3 4 5; do + if [[ "$i" != "$ONLY_PHASE" ]]; then + eval "SKIP_PHASE_$i=true" + fi + done + fi + + # Handle --from-phase-N + if [[ -n "$FROM_PHASE" ]]; then + for i in 1 2 3 4 5; do + if [[ "$i" -lt "$FROM_PHASE" ]]; then + eval "SKIP_PHASE_$i=true" + fi + done + fi + + # Auto-skip Phase 2 if git paths not provided + if [[ -z "$PROD_GIT" && -z "$ARCHIVE_GIT" ]]; then + if [[ "$SKIP_PHASE_2" != "true" ]]; then + log_warn "No git paths provided. Phase 2 (git sync check) will be skipped." + log_warn "Use --prod-git and --archive-git to enable Phase 2." + SKIP_PHASE_2=true + fi + fi + + # Auto-skip Phase 4 if service not provided + if [[ -z "$SERVICE_NAME" ]]; then + if [[ "$SKIP_PHASE_4" != "true" ]]; then + log_warn "No service name provided. Phase 4 (log extraction) will be skipped." + log_warn "Use --service to enable Phase 4." + SKIP_PHASE_4=true + fi + fi +} + +# Setup output directory +setup_output_dir() { + if [[ -z "$OUTPUT_DIR" ]]; then + OUTPUT_DIR="work/migration-analysis-$(date +%Y%m%d-%H%M)" + fi + + log_info "Output directory: $OUTPUT_DIR" + + if [[ "$DRY_RUN" == "true" ]]; then + log_info "[DRY RUN] Would create directory structure" + return + fi + + mkdir -p "$OUTPUT_DIR"/{prod/raw,archive/raw,comparison,logs,results} + + # Save configuration + cat > "$OUTPUT_DIR/config.txt" << EOF +# Migration Analysis Configuration +# Generated: $(date -Iseconds) + +PROD_RELAY=$PROD_RELAY +ARCHIVE_RELAY=$ARCHIVE_RELAY +PROD_GIT=$PROD_GIT +ARCHIVE_GIT=$ARCHIVE_GIT +SERVICE_NAME=$SERVICE_NAME +OUTPUT_DIR=$OUTPUT_DIR +EOF + + log_success "Created output directory structure" +} + +# Run a phase with timing and error handling +run_phase() { + local phase_num="$1" + local phase_name="$2" + shift 2 + local cmd=("$@") + + local skip_var="SKIP_PHASE_$phase_num" + if [[ "${!skip_var}" == "true" ]]; then + log_phase "Phase $phase_num: $phase_name [SKIPPED]" + return 0 + fi + + log_phase "Phase $phase_num: $phase_name" + + if [[ "$DRY_RUN" == "true" ]]; then + log_info "[DRY RUN] Would execute:" + for c in "${cmd[@]}"; do + echo " $c" + done + return 0 + fi + + local start_time + start_time=$(date +%s) + + local exit_code=0 + + # Execute the command(s) + for c in "${cmd[@]}"; do + log_step "Running: $c" + if ! eval "$c"; then + exit_code=1 + if [[ "$CONTINUE_ON_ERROR" == "true" ]]; then + log_warn "Command failed, continuing due to --continue-on-error" + else + log_error "Command failed" + break + fi + fi + done + + local end_time + end_time=$(date +%s) + local duration=$((end_time - start_time)) + PHASE_TIMES[$phase_num]=$duration + + if [[ $exit_code -eq 0 ]]; then + log_success "Phase $phase_num completed in ${duration}s" + else + log_error "Phase $phase_num failed after ${duration}s" + if [[ "$CONTINUE_ON_ERROR" != "true" ]]; then + return 1 + fi + fi + + return $exit_code +} + +# Phase 1: Fetch events +run_phase_1() { + local cmds=() + + # Fetch from prod relay + cmds+=("'$SCRIPT_DIR/01-fetch-events.sh' '$PROD_RELAY' '$OUTPUT_DIR/prod'") + + # Fetch from archive relay + cmds+=("'$SCRIPT_DIR/01-fetch-events.sh' '$ARCHIVE_RELAY' '$OUTPUT_DIR/archive'") + + run_phase 1 "Fetch Events (~30s each)" "${cmds[@]}" +} + +# Phase 2: Git sync check +run_phase_2() { + local cmds=() + + if [[ -n "$PROD_GIT" ]]; then + cmds+=("'$SCRIPT_DIR/10-check-git-sync.sh' '$OUTPUT_DIR/prod/raw/state-events.json' '$PROD_GIT' '$OUTPUT_DIR/prod' --categorize") + else + log_warn "Skipping prod git sync check (no --prod-git provided)" + fi + + if [[ -n "$ARCHIVE_GIT" ]]; then + cmds+=("'$SCRIPT_DIR/10-check-git-sync.sh' '$OUTPUT_DIR/archive/raw/state-events.json' '$ARCHIVE_GIT' '$OUTPUT_DIR/archive' --categorize") + else + log_warn "Skipping archive git sync check (no --archive-git provided)" + fi + + if [[ ${#cmds[@]} -eq 0 ]]; then + log_warn "No git paths provided, skipping Phase 2" + return 0 + fi + + run_phase 2 "Git Sync Check (~20 min each)" "${cmds[@]}" +} + +# Phase 3: Categorize and compare +run_phase_3() { + local cmds=() + + # Check if we have git-sync-status.tsv files (from Phase 2) + # If not, we can't run categorization + local has_prod_sync=false + local has_archive_sync=false + + if [[ -f "$OUTPUT_DIR/prod/git-sync-status.tsv" ]]; then + has_prod_sync=true + fi + + if [[ -f "$OUTPUT_DIR/archive/git-sync-status.tsv" ]]; then + has_archive_sync=true + fi + + # Run categorization if we have sync data but no category files + if [[ "$has_prod_sync" == "true" && ! -f "$OUTPUT_DIR/prod/category1-complete-match.txt" ]]; then + cmds+=("'$SCRIPT_DIR/20-categorize.sh' '$OUTPUT_DIR/prod/git-sync-status.tsv' '$OUTPUT_DIR/prod'") + fi + + if [[ "$has_archive_sync" == "true" && ! -f "$OUTPUT_DIR/archive/category1-complete-match.txt" ]]; then + cmds+=("'$SCRIPT_DIR/20-categorize.sh' '$OUTPUT_DIR/archive/git-sync-status.tsv' '$OUTPUT_DIR/archive'") + fi + + # Run comparison if we have category files + if [[ -f "$OUTPUT_DIR/prod/category1-complete-match.txt" && -f "$OUTPUT_DIR/archive/category1-complete-match.txt" ]]; then + cmds+=("'$SCRIPT_DIR/21-compare-relays.sh' '$OUTPUT_DIR/prod' '$OUTPUT_DIR/archive' '$OUTPUT_DIR/comparison'") + else + log_warn "Missing category files for comparison." + log_warn "Phase 2 must complete successfully before Phase 3 can compare relays." + + # Create placeholder comparison files if they don't exist + if [[ "$DRY_RUN" != "true" ]]; then + mkdir -p "$OUTPUT_DIR/comparison" + for f in complete-in-both.txt complete-prod-missing-archive.txt complete-prod-incomplete-archive.txt incomplete-in-both.txt in-archive-not-prod.txt; do + if [[ ! -f "$OUTPUT_DIR/comparison/$f" ]]; then + echo "# Placeholder - Phase 2 data not available" > "$OUTPUT_DIR/comparison/$f" + fi + done + echo "# Comparison not available - Phase 2 data missing" > "$OUTPUT_DIR/comparison/summary.txt" + fi + fi + + if [[ ${#cmds[@]} -eq 0 ]]; then + log_warn "No categorization or comparison needed (already done or missing input)" + return 0 + fi + + run_phase 3 "Categorize & Compare (fast)" "${cmds[@]}" + + # Phase 3c: Compare git data between relays (requires git paths) + # This determines if archive is ahead of prod for repos with mismatched state + if [[ -n "$PROD_GIT" && -n "$ARCHIVE_GIT" ]]; then + # Build list of repos to compare: those where prod=complete but archive is not + local repos_to_compare="$OUTPUT_DIR/comparison/complete-prod-incomplete-archive.txt" + if [[ -f "$repos_to_compare" ]] && [[ ! -f "$OUTPUT_DIR/comparison/git-ancestry.tsv" ]]; then + log_info "Running git ancestry comparison (Phase 3c)..." + run_phase 3 "Git Ancestry Comparison" "'$SCRIPT_DIR/22-compare-git-data.sh' '$PROD_GIT' '$ARCHIVE_GIT' '$repos_to_compare' '$OUTPUT_DIR/comparison'" + fi + else + log_warn "Git paths not provided - skipping git ancestry comparison" + log_warn "Without git comparison, repos where archive is ahead will be incorrectly flagged as needing re-sync" + fi +} + +# Phase 4: Extract logs +run_phase_4() { + if [[ -z "$SERVICE_NAME" ]]; then + log_warn "No service name provided, skipping Phase 4" + return 0 + fi + + # Validate service name before running Phase 4 + # Structured logging only exists in ngit-grasp, not ngit-relay + if [[ "$SERVICE_NAME" == *"ngit-relay"* ]]; then + log_error "SERVICE_NAME appears to be ngit-relay: $SERVICE_NAME" + log_error "" + log_error "Phase 4 requires an ngit-grasp service with structured logging." + log_error "Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) only exists" + log_error "in ngit-grasp services, NOT in ngit-relay services." + log_error "" + log_error "Please update --service to use the ngit-grasp archive service." + log_error "" + log_error "To find the correct service name:" + log_error " systemctl list-units 'ngit-grasp*' --all" + log_error "" + log_error "Common ngit-grasp service names:" + log_error " - ngit-grasp.service" + log_error " - ngit-grasp-relay-ngit-dev.service (NixOS multi-instance)" + log_error " - ngit-grasp-archive.service" + return 1 + fi + + # Warn if service name doesn't look like ngit-grasp + if [[ "$SERVICE_NAME" != *"ngit-grasp"* && "$SERVICE_NAME" != *"grasp"* ]]; then + log_warn "SERVICE_NAME doesn't contain 'ngit-grasp': $SERVICE_NAME" + log_warn "Structured logging only exists in ngit-grasp services." + log_warn "If this is not an ngit-grasp service, Phase 4 will find no logs." + fi + + local cmds=() + + # Build log extraction options + local log_opts="" + if [[ -n "$LOG_SINCE" ]]; then + log_opts="$log_opts --since '$LOG_SINCE'" + fi + if [[ -n "$LOG_UNTIL" ]]; then + log_opts="$log_opts --until '$LOG_UNTIL'" + fi + + cmds+=("'$SCRIPT_DIR/30-extract-parse-failures.sh' '$SERVICE_NAME' '$OUTPUT_DIR/logs' $log_opts") + cmds+=("'$SCRIPT_DIR/31-extract-purgatory-expiry.sh' '$SERVICE_NAME' '$OUTPUT_DIR/logs' $log_opts") + + run_phase 4 "Extract Logs (VPS required)" "${cmds[@]}" +} + +# Phase 5: Final classification +run_phase_5() { + # Check if we have the minimum required files + local can_run=true + + if [[ ! -d "$OUTPUT_DIR/prod" ]]; then + log_warn "Missing prod directory" + can_run=false + fi + + if [[ ! -d "$OUTPUT_DIR/archive" ]]; then + log_warn "Missing archive directory" + can_run=false + fi + + if [[ ! -d "$OUTPUT_DIR/comparison" ]]; then + log_warn "Missing comparison directory" + can_run=false + fi + + # Create logs directory with empty files if missing + if [[ "$DRY_RUN" != "true" ]]; then + mkdir -p "$OUTPUT_DIR/logs" + for f in parse-failures.txt purgatory-expired.txt; do + if [[ ! -f "$OUTPUT_DIR/logs/$f" ]]; then + echo "# No data - Phase 4 not run" > "$OUTPUT_DIR/logs/$f" + fi + done + fi + + if [[ "$can_run" == "false" ]]; then + log_error "Cannot run Phase 5 - missing required input directories" + return 1 + fi + + run_phase 5 "Final Classification (fast)" "'$SCRIPT_DIR/40-classify-actions.sh' '$OUTPUT_DIR'" +} + +# Display summary +display_summary() { + log_header "Migration Analysis Complete" + + echo "Output Directory: $OUTPUT_DIR" + echo "" + + # Phase timing summary + echo "Phase Timing:" + local total_time=0 + for phase in 1 2 3 4 5; do + local skip_var="SKIP_PHASE_$phase" + if [[ "${!skip_var}" == "true" ]]; then + echo " Phase $phase: SKIPPED" + elif [[ -n "${PHASE_TIMES[$phase]:-}" ]]; then + local t="${PHASE_TIMES[$phase]}" + echo " Phase $phase: ${t}s" + total_time=$((total_time + t)) + else + echo " Phase $phase: N/A" + fi + done + echo " ─────────────" + echo " Total: ${total_time}s" + echo "" + + # Results summary + if [[ -f "$OUTPUT_DIR/results/summary.txt" ]]; then + echo "Results Summary:" + echo "" + # Extract key metrics from summary + if grep -q "No Action Required" "$OUTPUT_DIR/results/summary.txt"; then + grep -A1 "No Action Required" "$OUTPUT_DIR/results/summary.txt" | head -2 + fi + if grep -q "Action Required" "$OUTPUT_DIR/results/summary.txt"; then + grep -A1 "Action Required" "$OUTPUT_DIR/results/summary.txt" | head -2 + fi + if grep -q "Manual Investigation" "$OUTPUT_DIR/results/summary.txt"; then + grep -A1 "Manual Investigation" "$OUTPUT_DIR/results/summary.txt" | head -2 + fi + echo "" + fi + + # Output files + echo "Output Files:" + echo " $OUTPUT_DIR/results/no-action-required.txt" + echo " $OUTPUT_DIR/results/action-required.txt" + echo " $OUTPUT_DIR/results/manual-investigation.txt" + echo " $OUTPUT_DIR/results/summary.txt" + echo "" + + # Next steps + echo "Next Steps:" + echo " 1. Review results/summary.txt for overview" + echo " 2. Address items in results/action-required.txt" + echo " 3. Investigate items in results/manual-investigation.txt" + echo " 4. Plan migration window when action items are resolved" + echo "" +} + +# Main +main() { + parse_args "$@" + + log_header "GRASP Relay to ngit-grasp Migration Analysis" + + validate_args + check_prerequisites + determine_phases + setup_output_dir + + # Show configuration + log_info "Configuration:" + log_step "Prod relay: $PROD_RELAY" + log_step "Archive relay: $ARCHIVE_RELAY" + [[ -n "$PROD_GIT" ]] && log_step "Prod git: $PROD_GIT" + [[ -n "$ARCHIVE_GIT" ]] && log_step "Archive git: $ARCHIVE_GIT" + [[ -n "$SERVICE_NAME" ]] && log_step "Service: $SERVICE_NAME" + log_step "Output: $OUTPUT_DIR" + echo "" + + # Show phase plan + log_info "Phase Plan:" + for phase in 1 2 3 4 5; do + local skip_var="SKIP_PHASE_$phase" + if [[ "${!skip_var}" == "true" ]]; then + log_step "Phase $phase: SKIP" + else + log_step "Phase $phase: RUN" + fi + done + echo "" + + if [[ "$DRY_RUN" == "true" ]]; then + log_warn "DRY RUN MODE - No changes will be made" + echo "" + fi + + # Run phases + local overall_exit=0 + + run_phase_1 || overall_exit=1 + run_phase_2 || overall_exit=1 + run_phase_3 || overall_exit=1 + run_phase_4 || overall_exit=1 + run_phase_5 || overall_exit=1 + + # Display summary + if [[ "$DRY_RUN" != "true" ]]; then + display_summary + fi + + if [[ $overall_exit -ne 0 ]]; then + log_warn "Some phases failed. Review output for details." + fi + + exit $overall_exit +} + +main "$@" diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/validate-service.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/validate-service.sh new file mode 100755 index 0000000..6988af3 --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/validate-service.sh @@ -0,0 +1,151 @@ +#!/usr/bin/env bash +# +# validate-service.sh - Validate service name for structured logging +# +# This helper script validates that a service name is appropriate for +# Phase 4 log extraction. Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) +# only exists in ngit-grasp services, NOT in ngit-relay services. +# +# USAGE: +# Source this script and call the validation function: +# +# source validate-service.sh +# validate_service_for_structured_logging "$SERVICE_NAME" || exit 1 +# +# BACKGROUND: +# Phase 4 of the migration analysis extracts structured log entries from +# journald. These log entries only exist in ngit-grasp services. If you +# accidentally specify an ngit-relay service, Phase 4 will find no logs +# and produce empty results. +# +# This validation prevents that common mistake by: +# 1. Checking if the service name contains "ngit-relay" (error) +# 2. Warning if the service name doesn't contain "ngit-grasp" +# 3. Optionally checking if structured logs actually exist +# +# SEE ALSO: +# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide +# 30-extract-parse-failures.sh - Uses this validation +# 31-extract-purgatory-expiry.sh - Uses this validation +# + +# Colors for output (disabled if not a terminal) +if [[ -t 1 ]]; then + _VS_RED='\033[0;31m' + _VS_YELLOW='\033[0;33m' + _VS_NC='\033[0m' +else + _VS_RED='' + _VS_YELLOW='' + _VS_NC='' +fi + +# Validates that the service name is appropriate for structured logging +# +# Arguments: +# $1 - service_name: The systemd service name to validate +# $2 - check_logs: Whether to check if logs actually exist (default: "true") +# $3 - interactive: Whether to prompt for confirmation (default: "true") +# +# Returns: +# 0 - Service is valid for structured logging +# 1 - Service is invalid or user declined to continue +# +# Example: +# validate_service_for_structured_logging "ngit-grasp.service" || exit 1 +# validate_service_for_structured_logging "ngit-grasp.service" "false" # Skip log check +# validate_service_for_structured_logging "ngit-grasp.service" "true" "false" # Non-interactive +# +validate_service_for_structured_logging() { + local service_name="$1" + local check_logs="${2:-true}" + local interactive="${3:-true}" + + # Check if service name looks like ngit-relay (ERROR - wrong service type) + if [[ "$service_name" == *"ngit-relay"* ]]; then + echo -e "${_VS_RED}ERROR: Service name appears to be ngit-relay: $service_name${_VS_NC}" >&2 + echo "" >&2 + echo "Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) only exists in" >&2 + echo "ngit-grasp services, NOT in ngit-relay services." >&2 + echo "" >&2 + echo "Please use the ngit-grasp archive service instead." >&2 + echo "" >&2 + echo "To find the correct service name:" >&2 + echo " systemctl list-units 'ngit-grasp*' --all" >&2 + echo "" >&2 + echo "Common ngit-grasp service names:" >&2 + echo " - ngit-grasp.service" >&2 + echo " - ngit-grasp-relay-ngit-dev.service (NixOS multi-instance)" >&2 + echo " - ngit-grasp-archive.service" >&2 + return 1 + fi + + # Check if service name looks like ngit-grasp (WARNING if not) + if [[ "$service_name" != *"ngit-grasp"* && "$service_name" != *"grasp"* ]]; then + echo -e "${_VS_YELLOW}WARNING: Service name doesn't contain 'ngit-grasp': $service_name${_VS_NC}" >&2 + echo "" >&2 + echo "Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) only exists in" >&2 + echo "ngit-grasp services." >&2 + echo "" >&2 + + if [[ "$interactive" == "true" ]]; then + read -p "Continue anyway? (y/N) " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + return 1 + fi + else + echo "Non-interactive mode: proceeding despite warning" >&2 + fi + fi + + # Optionally check if structured logs actually exist + if [[ "$check_logs" == "true" ]]; then + # Check if journalctl is available + if ! command -v journalctl &> /dev/null; then + echo -e "${_VS_YELLOW}WARNING: journalctl not available, cannot verify logs exist${_VS_NC}" >&2 + return 0 + fi + + # Check for structured log entries + # IMPORTANT: Use --no-pager to prevent hanging when run non-interactively (e.g., via SSH) + local has_parse_fail has_purgatory + has_parse_fail=$(journalctl --no-pager -u "$service_name" --since "7 days ago" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0") + has_purgatory=$(journalctl --no-pager -u "$service_name" --since "7 days ago" 2>/dev/null | grep -c '\[PURGATORY_EXPIRED\]' || echo "0") + + # Strip any non-numeric characters (grep -c can have trailing whitespace) + has_parse_fail="${has_parse_fail//[^0-9]/}" + has_purgatory="${has_purgatory//[^0-9]/}" + has_parse_fail="${has_parse_fail:-0}" + has_purgatory="${has_purgatory:-0}" + + if [[ "$has_parse_fail" -eq 0 && "$has_purgatory" -eq 0 ]]; then + echo -e "${_VS_YELLOW}WARNING: No structured logs found in $service_name (last 7 days)${_VS_NC}" >&2 + echo "" >&2 + echo "This may indicate:" >&2 + echo " 1. Wrong service (should be ngit-grasp archive service, not ngit-relay)" >&2 + echo " 2. Structured logging not yet deployed to this ngit-grasp instance" >&2 + echo " 3. No parse failures or purgatory expiry events in the time window" >&2 + echo "" >&2 + echo "To verify you have the right service:" >&2 + echo " systemctl list-units 'ngit-grasp*' --all" >&2 + echo " journalctl -u | grep -E '\\[PARSE_FAIL\\]|\\[PURGATORY_EXPIRED\\]' | head -5" >&2 + echo "" >&2 + + if [[ "$interactive" == "true" ]]; then + read -p "Continue anyway? (y/N) " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + return 1 + fi + else + echo "Non-interactive mode: proceeding despite warning" >&2 + fi + fi + fi + + return 0 +} + +# Export the function so it can be used after sourcing +export -f validate_service_for_structured_logging diff --git a/docs/how-to/README.md b/docs/how-to/README.md index f755be1..087ae53 100644 --- a/docs/how-to/README.md +++ b/docs/how-to/README.md @@ -110,18 +110,6 @@ How-to guides are **recipes** that show you how to solve specific problems or ac --- -### [Migrate to ngit-grasp](migrate-to-ngit-grasp.md) -**Status:** ✅ Available - -**Problem:** Switch from another GRASP implementation -**You'll learn:** -- Analyze existing relay data -- Identify repositories needing attention -- Run migration analysis scripts -- Plan and execute cutover - ---- - ## How to Use How-To Guides 1. **Find your problem** - Browse or search for what you need diff --git a/docs/how-to/migrate-to-ngit-grasp.md b/docs/how-to/migrate-to-ngit-grasp.md deleted file mode 100644 index abe2191..0000000 --- a/docs/how-to/migrate-to-ngit-grasp.md +++ /dev/null @@ -1,1030 +0,0 @@ -# Migrate to ngit-grasp from another GRASP implementation - -This guide walks you through migrating a production GRASP relay to ngit-grasp. The process involves analyzing your existing data to identify repositories that need attention before switching over. - -## Compatibility - -This migration process works with any GRASP implementation that: - -- Stores git data in the `/.git` directory structure -- Uses standard GRASP events (kind 30617 announcements, kind 30618 state, kind 5 deletions) -- Exposes a Nostr relay WebSocket endpoint - -**Known compatible implementations:** -- ngit-relay (reference implementation) -- ngit-grasp (when migrating between instances or from archive mode) -- Other GRASP-compliant relays following the specification - -The migration scripts analyze Nostr events and git data directly, making them implementation-agnostic. - -## Quick Start - -Run the migration analysis with a single command: - -```bash -# Basic analysis (fetches events, compares relays) -./docs/how-to/migration-scripts/run-migration-analysis.sh \ - --prod-relay wss://source-relay.example.com \ - --archive-relay wss://target-relay.example.com - -# Full analysis (includes git sync check - run on VPS) -./docs/how-to/migration-scripts/run-migration-analysis.sh \ - --prod-relay wss://source-relay.example.com \ - --archive-relay wss://target-relay.example.com \ - --prod-git /var/lib/grasp-relay/git \ - --archive-git /var/lib/ngit-grasp/git \ - --service ngit-grasp.service -``` - -The script produces three output files: -- `results/no-action-required.txt` - Repos ready for migration -- `results/action-required.txt` - Repos needing intervention -- `results/manual-investigation.txt` - Repos needing human review - -See [Running the Analysis](#running-the-analysis) for detailed options. - -## Prerequisites - -### Required Tools - -- **nak** - Nostr Army Knife for fetching events ([install](https://github.com/fiatjaf/nak)) -- **jq** - JSON processing (install via package manager) - -### For Full Analysis (VPS) - -- SSH access to the VPS running your source relay -- Read access to git data directories -- Access to systemd journal (for log extraction) - -### Verify Installation - -```bash -# Check required tools -nak --version -jq --version -git --version - -# Check optional tools (for VPS phases) -journalctl --version -``` - -## Gotchas and Common Issues - -Before running the analysis, be aware of these common issues discovered during real migrations: - -### Git Must Be Installed - -The analysis scripts require `git` to be installed and in PATH. This may not be present on minimal VPS installations. - -```bash -# Check if git is available -which git || echo "Git not found - install it first" - -# Install on Debian/Ubuntu -apt install git - -# Install on NixOS (add to configuration.nix) -environment.systemPackages = [ pkgs.git ]; -``` - -### Archive Relay May Only Be Accessible Locally - -If your archive relay is configured to listen only on localhost (e.g., `ws://localhost:7443`), you must run the analysis **on the VPS itself**, not from a remote machine. - -```bash -# Check if archive relay is accessible -# This will fail if run remotely against a localhost-only relay -nak req -k 30618 --limit 1 ws://localhost:7443 - -# Solution: SSH into the VPS and run analysis there -ssh user@your-vps -cd /path/to/scripts -./run-migration-analysis.sh --archive-relay ws://localhost:7443 ... -``` - -### Git Data Paths May Differ from Defaults - -Different deployments store git data in different locations. **Always verify paths before running the analysis.** - -```bash -# Find actual git data paths from service configuration -systemctl cat ngit-relay.service | grep -E 'ExecStart|WorkingDirectory|Environment' -systemctl cat ngit-grasp-*.service | grep -E 'ExecStart|WorkingDirectory|Environment' - -# Common locations: -# - /var/lib/ngit-relay/git (default) -# - /var/lib/ngit-grasp/git (default) -# - /persistent/*/data/repos (custom deployments) - -# Verify the path exists and contains expected structure -ls /path/to/git/npub1*/ # Should show *.git directories -``` - -### Phase 4 Needs the Correct Service Name - -> **CRITICAL:** Phase 4 extracts structured logs (`[PARSE_FAIL]`, `[PURGATORY_EXPIRED]`, `Invalid announcement` rejections) from journald. These logs **ONLY exist in ngit-grasp services**, NOT in ngit-relay services. - -If you specify an ngit-relay service (like `ngit-relay.service`), Phase 4 will find **zero logs** and produce empty results. This is a common mistake that wastes time and produces misleading analysis. - -**Correct service names (ngit-grasp):** -- `ngit-grasp.service` -- `ngit-grasp-relay-ngit-dev.service` (NixOS multi-instance) -- `ngit-grasp-archive.service` - -**Incorrect service names (ngit-relay - NO structured logging):** -- `ngit-relay.service` -- `relay-ngit-dev.service` - -```bash -# Find all ngit-related services -systemctl list-units 'ngit-*' --all - -# Check which service has structured logging (should be ngit-grasp) -journalctl -u ngit-grasp-*.service | grep -E '\[PARSE_FAIL\]|\[PURGATORY_EXPIRED\]|Invalid announcement' | head -5 - -# Verify ngit-relay does NOT have structured logging -journalctl -u ngit-relay.service | grep -E '\[PARSE_FAIL\]|\[PURGATORY_EXPIRED\]|Invalid announcement' | head -5 -# ^ This should return nothing - -# Use the archive service name for Phase 4 -./run-migration-analysis.sh ... --service ngit-grasp-relay-ngit-dev.service -``` - -The migration scripts now validate the service name and will **error** if you specify an ngit-relay service, preventing this common mistake. - -### Permission Issues with Service-Owned Directories - -Git data directories are typically owned by the service user and may require elevated permissions to read. - -```bash -# Check directory permissions -ls -la /var/lib/ngit-grasp/git - -# Options: -# 1. Run as root/sudo -sudo ./run-migration-analysis.sh ... - -# 2. Run as the service user -sudo -u ngit-grasp ./run-migration-analysis.sh ... - -# 3. Add your user to the service group -sudo usermod -aG ngit-grasp $USER -# (logout/login required) -``` - -### Service Names Vary by Deployment - -NixOS multi-instance deployments use service names like `ngit-grasp-.service`. Always check actual service names. - -```bash -# List all ngit services -systemctl list-units 'ngit-*' --all --no-pager - -# Example output: -# ngit-relay.service loaded active running ngit-relay -# ngit-grasp-relay-ngit-dev.service loaded active running ngit-grasp (relay-ngit-dev) -``` - -## Migration Overview - -The migration process has three stages: - -### Stage 1: Deploy Archive Instance - -Deploy ngit-grasp alongside your production relay: - -1. Configure ngit-grasp with: - - `domain` set to `.internal` (temporary) - - `archiveService` set to your production domain - - Running on a different port - -2. Let it sync for ~1 hour to gather all events and git data - -### Stage 2: Analyze Data - -Run the migration analysis to identify: -- Repositories successfully migrated (no action needed) -- Repositories with incomplete data (need investigation) -- Repositories with parse failures (may need re-announcement) - -### Stage 3: Switch Over - -Once all issues are resolved: -1. Set `domain` to your production URL -2. Disable archive mode -3. Update your reverse proxy to point to ngit-grasp - -## Running the Analysis - -### Before You Start - -**Verify paths and service names** before running the analysis. Incorrect paths are the most common source of errors. - -```bash -# 1. Find actual git data paths -systemctl cat ngit-relay.service | grep -E 'ExecStart|data|git' -systemctl cat ngit-grasp-*.service | grep -E 'ExecStart|data|git' - -# 2. Find service names -systemctl list-units 'ngit-*' --all --no-pager - -# 3. Verify git data exists at the paths -ls /path/to/prod/git/npub1*/ | head -5 -ls /path/to/archive/git/npub1*/ | head -5 - -# 4. Check if archive relay is accessible -nak req -k 30618 --limit 1 ws://localhost:7443 # or your archive URL -``` - -### Basic Usage - -```bash -# Preview what will happen (dry run) -./run-migration-analysis.sh \ - --prod-relay wss://source-relay.example.com \ - --archive-relay wss://target-relay.example.com \ - --dry-run - -# Run the analysis -./run-migration-analysis.sh \ - --prod-relay wss://source-relay.example.com \ - --archive-relay wss://target-relay.example.com -``` - -### Full Analysis on VPS - -**Important:** If your archive relay is localhost-only, you must run this on the VPS. - -```bash -# First, discover your actual paths (see "Before You Start" above) -# Then run with the correct values: - -./run-migration-analysis.sh \ - --prod-relay wss://source-relay.example.com \ - --archive-relay ws://localhost:7443 \ - --prod-git /path/to/prod/git \ - --archive-git /path/to/archive/git \ - --service ngit-grasp-your-instance.service -``` - -### Phase Control - -Skip or run specific phases: - -```bash -# Skip Phase 2 (use cached git sync data) -./run-migration-analysis.sh ... --skip-phase-2 - -# Run only Phase 1 (fetch events) -./run-migration-analysis.sh ... --only-phase-1 - -# Resume from Phase 3 (using existing data) -./run-migration-analysis.sh ... --from-phase-3 --output work/migration-analysis-20260122-1430 -``` - -### All Options - -| Option | Description | -|--------|-------------| -| `--prod-relay ` | Source relay WebSocket URL (required) | -| `--archive-relay ` | Target relay WebSocket URL (required) | -| `--prod-git ` | Git base directory for prod (enables Phase 2) | -| `--archive-git ` | Git base directory for archive (enables Phase 2) | -| `--service ` | Systemd service name for Phase 4 log extraction. **MUST be an ngit-grasp service** (not ngit-relay). Structured logging only exists in ngit-grasp. | -| `--output ` | Output directory (default: auto-generated) | -| `--skip-phase-N` | Skip phase N (1-5) | -| `--only-phase-N` | Run only phase N | -| `--from-phase-N` | Start from phase N | -| `--dry-run` | Show what would be executed | -| `--continue-on-error` | Continue even if a phase fails | - -## Understanding Results - -### Summary File - -The `results/summary.txt` file provides an overview: - -``` -## Overview - -| Category | Count | Percentage | -|----------|-------|------------| -| No Action Required | 450 | 85.7% | -| Action Required | 52 | 9.9% | -| Manual Investigation | 23 | 4.4% | -``` - -### No Action Required - -Repositories in `no-action-required.txt` are ready for migration: - -``` -myrepo | npub1abc... | complete in both prod and archive -oldrepo | npub1def... | deleted by user -testrepo | npub1ghi... | empty/blank in both (user never pushed) -``` - -**Common reasons:** -- `complete in both prod and archive` - Successfully migrated -- `deleted by user` - User requested deletion (kind 5 event) -- `empty/blank in both` - No git data was ever pushed -- `purgatory expired` - System already handled the timeout - -### Action Required - -Repositories in `action-required.txt` need intervention: - -``` -myrepo | npub1abc... | complete in prod, missing from archive | trigger re-sync or investigate -otherrepo | npub1def... | incomplete in both (prod=cat3, archive=cat2) | investigate git data source -``` - -**Common actions:** -- **Re-sync needed**: Trigger the archive to re-fetch from the source -- **Wait for sync**: Archive sync may still be in progress -- **Investigate git source**: Original git data may be incomplete -- **Fix parse failure**: Event format issue, may need re-announcement - -### Manual Investigation - -Repositories in `manual-investigation.txt` have unusual states: - -``` -weirdrepo | npub1abc... | in archive (cat1) but not in prod | may be new announcement or deleted from prod -conflictrepo | npub1def... | complete in prod, missing from archive, parse failure logged | investigate parse failure -``` - -These require human judgment to determine the correct action. - -## Troubleshooting - -### "nak not found" - -Install nak from https://github.com/fiatjaf/nak: - -```bash -# Using Go -go install github.com/fiatjaf/nak@latest - -# Or download binary from releases -``` - -### "git not found" - -Git must be installed and in PATH: - -```bash -# Check if git is available -which git - -# Install on Debian/Ubuntu -sudo apt install git - -# Install on NixOS (add to configuration.nix) -environment.systemPackages = [ pkgs.git ]; -``` - -### "Permission denied" on git directories - -Run with sudo or ensure your user has read access: - -```bash -# Check permissions -ls -la /var/lib/grasp-relay/git - -# Option 1: Run with sudo -sudo ./run-migration-analysis.sh ... - -# Option 2: Run as service user -sudo -u ngit-grasp ./run-migration-analysis.sh ... -``` - -### Archive relay connection failed - -If you get connection errors to the archive relay: - -```bash -# Check if relay is running -systemctl status ngit-grasp-*.service - -# Check if it's localhost-only -# If archive is ws://localhost:7443, you MUST run on the VPS -ssh user@your-vps -./run-migration-analysis.sh --archive-relay ws://localhost:7443 ... -``` - -### Wrong git paths / "No such file or directory" - -Git data paths vary by deployment. Discover the actual paths: - -```bash -# Find paths from service configuration -systemctl cat ngit-relay.service | grep -E 'ExecStart|WorkingDirectory|Environment' -systemctl cat ngit-grasp-*.service | grep -E 'ExecStart|WorkingDirectory|Environment' - -# Verify the path contains git repos -ls /discovered/path/npub1*/ -``` - -### Phase 2 takes too long - -The git sync check processes each repository individually (~20 minutes total). To speed up iteration: - -1. Run Phase 2 once and save the output -2. Use `--skip-phase-2` for subsequent runs -3. Use `--from-phase-3` to re-run classification with existing data - -### No parse failures found - -This is expected if: -- ngit-grasp logging improvements aren't deployed yet -- No events actually failed to parse - -The analysis will continue without log data. - -### Phase 4 finds no structured logs - -**Symptom:** Phase 4 completes but `parse-failures.txt` and `purgatory-expired.txt` are empty or contain only header comments. - -**Most common cause:** You're querying the wrong service (ngit-relay instead of ngit-grasp). - -Structured logging (`[PARSE_FAIL]`, `[PURGATORY_EXPIRED]`, `Invalid announcement` rejections) **only exists in ngit-grasp services**. If you specify an ngit-relay service, Phase 4 will find zero logs. - -**How to diagnose:** - -```bash -# 1. Check what service you configured -cat /path/to/output/config.txt | grep SERVICE_NAME - -# 2. If it contains "ngit-relay", that's the problem! -# ngit-relay does NOT have structured logging - -# 3. Find the correct ngit-grasp service -systemctl list-units 'ngit-grasp*' --all - -# 4. Verify the ngit-grasp service has structured logs -journalctl -u ngit-grasp-relay-ngit-dev.service --since "7 days ago" | \ - grep -E '\[PARSE_FAIL\]|\[PURGATORY_EXPIRED\]|Invalid announcement' | head -5 -``` - -**How to fix:** - -```bash -# Update SERVICE_NAME to the ngit-grasp archive service and re-run -./run-migration-analysis.sh \ - --prod-relay wss://relay.ngit.dev \ - --archive-relay ws://localhost:7443 \ - --service ngit-grasp-relay-ngit-dev.service \ - --from-phase-4 # Skip phases 1-3, just re-run phase 4 -``` - -**Other possible causes:** - -1. **Structured logging not deployed:** If the ngit-grasp instance doesn't have the logging improvements deployed, no structured logs will exist. Check the ngit-grasp version. - -2. **No events in time window:** If there genuinely were no parse failures, purgatory expiry events, or invalid announcement rejections, the files will be empty. This is valid - it means everything parsed successfully. - -3. **Wrong time range:** The default is 30 days. If your archive has been running longer, you may need `--since` to extend the range. - -**Prevention:** The migration scripts now validate the service name and will error if you specify an ngit-relay service. - -**Note on "Invalid announcement" rejections:** These are announcements (kind 30617) that were rejected by the write policy due to format violations. The most common reason is "multiple clone tags found" - the NIP-34 spec requires a single clone tag with multiple values, not multiple clone tags. These rejections are logged as `Event rejected by write policy ... reason=Invalid announcement: ...`. - -### Event counts are multiples of 250 - -This suggests pagination may have failed. The scripts use `--paginate` by default, but if you see exactly 250, 500, 750 events, verify the relay is responding correctly. - -## Architecture - -### Analysis Phases - -The analysis is split into 5 modular phases: - -| Phase | Name | Time | Location | Description | -|-------|------|------|----------|-------------| -| 1 | Fetch Events | ~30s each | Local | Fetch events from both relays | -| 2 | Git Sync Check | ~20 min each | VPS | Compare state events to git data | -| 3 | Categorize & Compare | <1s | Local | Categorize and compare results | -| 4 | Extract Logs | <30s | VPS | Extract parse failures and purgatory expiry | -| 5 | Final Classification | <5s | Local | Combine all data into actionable results | - -### Phase Flow Diagram - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ PHASE 1: Fetch Events (~30s, local) │ -│ Fetches kind 30618 (state), 30617 (announcements), 5 (deletion) │ -│ Run twice: once for prod, once for archive │ -└─────────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────────┐ -│ PHASE 2: Git Sync Check (~20 mins, VPS required) │ -│ Compares state event refs to actual git data on disk │ -│ Categorizes into: complete, empty, partial, no-match │ -└─────────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────────┐ -│ PHASE 3: Categorize & Compare (fast, local) │ -│ Compares prod vs archive categories │ -│ Identifies gaps and sync issues │ -└─────────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────────┐ -│ PHASE 4: Log-Based Categories (VPS required) │ -│ Extracts structured logs from the archive service: │ -│ - [PARSE_FAIL] - Events that failed to parse │ -│ - [PURGATORY_EXPIRED] - Repos where git data never arrived │ -│ - "Invalid announcement" - Announcements rejected for format │ -│ violations (e.g., multiple clone tags) │ -│ Provides context for why repos failed to sync │ -└─────────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────────┐ -│ PHASE 5: Final Classification (fast, local) │ -│ Combines all data sources │ -│ Outputs: no-action, action-required, manual-investigation │ -└─────────────────────────────────────────────────────────────────┘ -``` - -### Git Sync Categories - -Phase 2 categorizes repositories into 4 categories: - -| Category | Description | Meaning | -|----------|-------------|---------| -| 1 | Complete Match | All refs in state event match git data | -| 2 | Empty/Blank | No git data available | -| 3 | Partial Match | Some refs match, some don't | -| 4 | No Match | Git data exists but refs don't match | - -### Output Directory Structure - -``` -work/migration-analysis-YYYYMMDD-HHMM/ -├── prod/ -│ ├── raw/ -│ │ ├── state-events.json # Phase 1 -│ │ ├── announcements.json # Phase 1 -│ │ └── deletions.json # Phase 1 -│ ├── git-sync-status.tsv # Phase 2 -│ └── category*.txt # Phase 2/3 -├── archive/ -│ └── (same structure as prod) -├── comparison/ -│ ├── complete-in-both.txt # Phase 3 -│ ├── complete-prod-missing-archive.txt -│ ├── complete-prod-incomplete-archive.txt -│ ├── incomplete-in-both.txt -│ ├── in-archive-not-prod.txt -│ └── summary.txt -├── logs/ -│ ├── parse-failures.txt # Phase 4 -│ └── purgatory-expired.txt # Phase 4 -└── results/ - ├── no-action-required.txt # Phase 5 - ├── action-required.txt # Phase 5 - ├── manual-investigation.txt # Phase 5 - └── summary.txt # Phase 5 -``` - -## Why Migration May Require Attention - -Different GRASP implementations may handle edge cases differently. ngit-grasp has stricter validation and better observability, which can surface issues that were previously hidden: - -| Aspect | Typical Source Relay | ngit-grasp | -|--------|---------------------|------------| -| Git data validation | May accept partial data | Requires all git data to reproduce state | -| PR refs cleanup | May not clear `refs/nostr/` | Properly manages PR refs | -| Parse failures | May silently ignore | Logs structured `[PARSE_FAIL]` entries | -| Sync timeout | May have no timeout | Purgatory expires after configurable period | - -These differences explain why some repositories may need attention during migration - ngit-grasp's stricter validation catches issues that other implementations may have silently accepted. - -## Next Steps - -After running the analysis: - -1. **Review the summary** - Check `results/summary.txt` for the overview -2. **Address action items** - Work through `results/action-required.txt` -3. **Investigate edge cases** - Review `results/manual-investigation.txt` -4. **Re-run analysis** - After fixing issues, re-run to verify -5. **Plan cutover** - Schedule the switch when all issues are resolved - -### When to Re-run - -Re-run the analysis when: -- Archive sync has had time to complete -- You've fixed parse failures or re-announced events -- You want to verify fixes before cutover - -```bash -# Re-run with existing Phase 2 data (faster) -./run-migration-analysis.sh ... --skip-phase-2 --output work/migration-analysis-20260122-1430 -``` - -## Individual Scripts - -For advanced usage, you can run individual phase scripts: - -```bash -# Phase 1: Fetch events -./migration-scripts/01-fetch-events.sh wss://source-relay.example.com output/prod - -# Phase 2: Git sync check -./migration-scripts/10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod --categorize - -# Phase 3a: Categorize -./migration-scripts/20-categorize.sh output/prod/git-sync-status.tsv output/prod - -# Phase 3b: Compare relays -./migration-scripts/21-compare-relays.sh output/prod output/archive output/comparison - -# Phase 4a: Extract parse failures -./migration-scripts/30-extract-parse-failures.sh ngit-grasp.service output/logs - -# Phase 4b: Extract purgatory expiry -./migration-scripts/31-extract-purgatory-expiry.sh ngit-grasp.service output/logs - -# Phase 5: Final classification -./migration-scripts/40-classify-actions.sh work/migration-analysis-20260122-1430 -``` - -Each script has detailed help available with `--help` or by reading the script header. - -## relay.ngit.dev Migration Notes - -This section documents the specific configuration and lessons learned from migrating relay.ngit.dev from ngit-relay to ngit-grasp. Use this as a reference for similar deployments. - -### Deployment Configuration - -| Component | Value | -|-----------|-------| -| **Production relay** | `wss://relay.ngit.dev` | -| **Production service** | `ngit-relay.service` | -| **Production git path** | `/persistent/relay-ngit-dev-ngit-relay/data/repos` | -| **Archive relay** | `ws://localhost:7443` (localhost only) | -| **Archive service** | `ngit-grasp-relay-ngit-dev.service` | -| **Archive git path** | `/persistent/grasp/relay-ngit-dev/git` | - -### Key Differences from Defaults - -1. **Git paths are non-standard**: The production relay uses `/persistent/relay-ngit-dev-ngit-relay/data/repos` instead of `/var/lib/ngit-relay/git` - -2. **Archive is localhost-only**: The archive relay listens on `ws://localhost:7443`, not a public URL. All analysis must run on the VPS. - -3. **Service names include instance**: NixOS multi-instance deployment uses `ngit-grasp-relay-ngit-dev.service`, not `ngit-grasp.service` - -### Analysis Command - -```bash -# Run on VPS (archive is localhost-only) -./docs/how-to/migration-scripts/run-migration-analysis.sh \ - --prod-relay wss://relay.ngit.dev \ - --archive-relay ws://localhost:7443 \ - --prod-git /persistent/relay-ngit-dev-ngit-relay/data/repos \ - --archive-git /persistent/grasp/relay-ngit-dev/git \ - --service ngit-grasp-relay-ngit-dev.service -``` - -### Analysis Results (January 2026) - -| Category | Count | Notes | -|----------|-------|-------| -| Complete in both | ~400 | Ready for migration | -| Complete in prod, missing from archive | 315 | Need re-sync | -| Empty in both | 100 | Users never pushed git data | -| Manual investigation | 5 | Unusual states | -| Purgatory expired | 382 | Structured logging working | - -### Lessons Learned - -1. **Always verify paths first**: The default paths in examples didn't match the actual deployment. Use `systemctl cat ` to find real paths. - -2. **Check archive accessibility**: We initially tried to run analysis remotely, but the archive relay was localhost-only. Had to SSH to VPS. - -3. **Use archive service for Phase 4 (CRITICAL)**: Structured logging (`[PARSE_FAIL]`, `[PURGATORY_EXPIRED]`) is **ONLY** in the ngit-grasp archive service, NOT the ngit-relay production service. Running Phase 4 against `ngit-relay.service` produces zero results because ngit-relay doesn't emit structured logs. The scripts now validate this and error if you specify an ngit-relay service. - -4. **Install git on VPS**: Git wasn't installed on the minimal VPS. The scripts now check for this in prerequisites. - -5. **Permissions matter**: Some directories required `sudo` to access. Running as root or the service user resolved this. - -### Next Steps for relay.ngit.dev - -1. **Re-sync 315 repos**: Trigger archive to re-fetch from production -2. **Investigate 5 edge cases**: Manual review of unusual states -3. **Monitor purgatory**: 382 expired entries indicate sync issues to investigate -4. **Plan cutover**: Once re-sync complete, switch DNS/proxy to ngit-grasp - -## ngit-relay Troubleshooting - -This section covers common issues encountered when running ngit-relay in production, including git permission errors and repository corruption. These issues were discovered during the relay.ngit.dev migration and may affect other deployments. - -### Git Permission Denied Errors - -#### Symptoms - -When cloning repositories, you see: - -```bash -$ git clone https://relay.ngit.dev/npub.../repo.git -Cloning into 'repo'... -remote: warning: unable to access '/root/.config/git/attributes': Permission denied -``` - -Or in container logs: - -``` -warning: unable to access '/root/.config/git/attributes': Permission denied -``` - -#### Explanation - -This occurs when: -1. Git operations run as a non-root user (typically `nginx` user, UID 101) -2. Git tries to access `/root/.config/git/attributes` for global git configuration -3. The `/root` directory has permissions `0700` (drwx------), preventing non-root users from traversing into it -4. Even though the `attributes` file itself may be world-readable, the nginx user cannot reach it due to parent directory permissions - -**Root cause:** The container runs git commands via fcgiwrap as the nginx user, but `/root` is only accessible by root. - -#### Quick Fix (Temporary - Does Not Survive Container Restart) - -This fix resolves the issue immediately but will be lost when containers restart: - -```bash -# For each ngit-relay container, exec in and create the git config directory -sudo podman exec sh -c "mkdir -p /root/.config/git && touch /root/.config/git/attributes && chmod 644 /root/.config/git/attributes" - -# Example for specific containers: -sudo podman exec gitnostr-com-ngit-relay sh -c "mkdir -p /root/.config/git && touch /root/.config/git/attributes && chmod 644 /root/.config/git/attributes" - -sudo podman exec relay-ngit-dev-ngit-relay sh -c "mkdir -p /root/.config/git && touch /root/.config/git/attributes && chmod 644 /root/.config/git/attributes" -``` - -**Important:** This fix is temporary and will be lost when the container restarts. For a permanent solution, see the NixOS configuration below. - -#### Permanent Fix (NixOS Configuration) - -For NixOS deployments, add systemd services that automatically fix `/root` permissions after each container start: - -```nix -# In your ngit-relay service configuration (e.g., services/relay-ngit-dev-ngit-relay.nix) - -systemd.services.relay-ngit-dev-fix-root-perms = { - description = "Fix /root permissions in relay.ngit.dev container for git access"; - after = [ "podman-relay-ngit-dev-ngit-relay.service" ]; - requires = [ "podman-relay-ngit-dev-ngit-relay.service" ]; - wantedBy = [ "multi-user.target" ]; - serviceConfig = { - Type = "oneshot"; - RemainAfterExit = true; - ExecStart = "${pkgs.bash}/bin/bash -c 'sleep 5 && ${pkgs.podman}/bin/podman exec relay-ngit-dev-ngit-relay chmod 711 /root'"; - Restart = "on-failure"; - RestartSec = "10s"; - }; -}; -``` - -This changes `/root` permissions from `0700` to `0711`, allowing the nginx user to traverse through `/root` to reach `/root/.config/git/`. - -**Why 711?** -- `7` (owner/root): Full read/write/execute -- `1` (group): Execute only (traverse) -- `1` (other): Execute only (traverse) - -This allows non-root users to traverse through `/root` to access subdirectories, while still protecting `/root` contents from being listed or read. - -#### Verification - -After applying the fix: - -```bash -# Test that cloning works without permission warnings -git clone https://relay.ngit.dev/npub.../repo.git - -# Should clone successfully with no "Permission denied" warnings - -# Verify /root permissions inside container -sudo podman exec relay-ngit-dev-ngit-relay ls -ld /root -# Should show: drwx--x--x (711) - -# Verify nginx user can access git config -sudo podman exec relay-ngit-dev-ngit-relay su -s /bin/sh nginx -c "cat /root/.config/git/attributes" -# Should succeed without "Permission denied" -``` - -### Git Repository Corruption - -#### Symptoms - -When cloning repositories, you see: - -```bash -$ git clone https://relay.ngit.dev/npub.../repo.git -Cloning into 'repo'... -remote: fatal: bad tree object 8b765235809eb27159657eb4c97fb37d21c29bf0 -remote: aborting due to possible repository corruption on the remote side. -fatal: early EOF -fatal: fetch-pack: invalid index-pack output -``` - -Or when running `git fsck` on the server: - -``` -broken link from tree 7d60270e1904c30ae6cef7b465ef842a9f9f63c3 - to tree 8b765235809eb27159657eb4c97fb37d21c29bf0 -missing tree 8b765235809eb27159657eb4c97fb37d21c29bf0 -``` - -#### Explanation - -Repository corruption typically occurs due to: - -1. **Incomplete push operations**: A git push was interrupted mid-transfer, creating a commit that references objects that were never written to disk -2. **Permission issues during push**: The git-receive-pack process couldn't write objects due to permission problems (e.g., files owned by wrong user) -3. **Disk/filesystem issues**: Rare cases of disk errors or filesystem corruption - -**Common pattern:** A commit exists with references to tree objects, but those tree objects are missing from the repository. Sometimes individual blobs (files) exist as "dangling" objects but were never properly linked into the tree structure. - -**Warning signs:** -- HEAD file or objects owned by root when they should be owned by the service user (UID 101) -- Dangling blobs in `git fsck` output -- Recent permission denied errors in logs - -#### How to Fix - -**Step 1: Locate the corrupted repository** - -```bash -# SSH to the server -ssh dc@ngit.dev - -# Find the repository path -# For relay.ngit.dev: /persistent/relay-ngit-dev-ngit-relay/data/repos/npub.../repo.git -# For gitnostr.com: /persistent/gitnostr-com-ngit-relay/data/repos/npub.../repo.git - -cd /persistent/relay-ngit-dev-ngit-relay/data/repos/npub1c03rad0r6q833vh57kyd3ndu2jry30nkr0wepqfpsm05vq7he25slryrnw/axepool.git -``` - -**Step 2: Diagnose the corruption** - -```bash -# Run git fsck to identify missing/corrupted objects -git fsck --full - -# Example output: -# broken link from tree 7d60270e1904c30ae6cef7b465ef842a9f9f63c3 -# to tree 8b765235809eb27159657eb4c97fb37d21c29bf0 -# missing tree 8b765235809eb27159657eb4c97fb37d21c29bf0 -# dangling blob 94490b902c9bceb6f901cd0c7c25b685e3685d87 - -# Check which commit references the missing object -git log --all --oneline | head -10 - -# Inspect the broken commit -git cat-file -p -# This will show which tree is missing -``` - -**Step 3: Attempt automatic repair** - -Try these in order: - -```bash -# Option A: Repack and garbage collect -git gc --aggressive --prune=now - -# Then check if corruption is fixed -git fsck --full - -# Option B: If that doesn't work, try recovering from pack files -git unpack-objects < .git/objects/pack/*.pack -git fsck --full -``` - -**Step 4: Manual reconstruction (if automatic repair fails)** - -If the missing tree object can be reconstructed from dangling blobs: - -```bash -# 1. Identify what should be in the missing tree -# Look at the commit message and nearby commits to understand the structure - -# 2. Find dangling blobs that might belong to the tree -git fsck --full | grep "dangling blob" - -# 3. Examine each dangling blob to identify files -git cat-file -p 94490b902c9bceb6f901cd0c7c25b685e3685d87 - -# 4. Reconstruct the tree manually -# This requires creating a new tree object with the correct structure -# Example (advanced): -git mktree < filename1.rs -100644 blob filename2.rs -EOF -# This outputs a new tree hash - -# 5. Create a new commit with the fixed tree -git commit-tree -p -m "Reconstructed commit message" -# This outputs a new commit hash - -# 6. Update the branch reference -git update-ref refs/heads/ - -# 7. Clean up -git gc --prune=now -``` - -**Step 5: Verify the fix** - -```bash -# Run fsck again - should show no errors -git fsck --full - -# Test clone locally -git clone /path/to/repo.git /tmp/test-clone - -# Test clone via HTTP -git clone https://relay.ngit.dev/npub.../repo.git /tmp/test-clone-http -``` - -**Step 6: Fix ownership and permissions** - -Ensure all repository files are owned by the correct user: - -```bash -# For ngit-relay containers, files should be owned by UID 101 (nginx user) -sudo chown -R 101:101 /persistent/relay-ngit-dev-ngit-relay/data/repos/npub.../repo.git - -# Verify -ls -la /persistent/relay-ngit-dev-ngit-relay/data/repos/npub.../repo.git -``` - -**Step 7: Replicate fix to other instances (if applicable)** - -If you have multiple relay instances (e.g., gitnostr.com and relay.ngit.dev), replicate the fix: - -```bash -# Copy the repaired pack files -sudo cp /persistent/relay-ngit-dev-ngit-relay/data/repos/npub.../repo.git/objects/pack/* \ - /persistent/gitnostr-com-ngit-relay/data/repos/npub.../repo.git/objects/pack/ - -# Update the branch reference -cd /persistent/gitnostr-com-ngit-relay/data/repos/npub.../repo.git -git update-ref refs/heads/ - -# Fix ownership -sudo chown -R 101:101 /persistent/gitnostr-com-ngit-relay/data/repos/npub.../repo.git - -# Clean up -git gc --prune=now -``` - -#### Prevention - -To prevent future corruption: - -1. **Fix permission issues first**: Ensure the permission denied errors are resolved (see previous section) -2. **Monitor for root-owned files**: Files in git repositories should be owned by UID 101, not root -3. **Check disk health**: Run `df -h` and `smartctl` to ensure disk is healthy -4. **Enable git fsck in monitoring**: Periodically run `git fsck` on repositories to catch corruption early - -```bash -# Add to monitoring/cron (example) -find /persistent/*/data/repos -name "*.git" -type d | while read repo; do - echo "Checking $repo" - git -C "$repo" fsck --full 2>&1 | grep -v "^Checking\|^dangling" -done -``` - -#### Real-World Example: axepool.git Corruption - -During the relay.ngit.dev migration, the `axepool.git` repository was corrupted: - -**Problem:** -- Commit `e84518b` referenced tree `8b765235...` (the `src` directory) -- Tree `8b765235...` was missing from the repository -- Blob `94490b90...` (mint_client.rs) existed as a dangling object but wasn't linked - -**Root cause:** -- An incomplete push operation -- Permission issues (HEAD file was owned by root) -- The commit was created but the tree object was never written - -**Solution:** -1. Identified the missing tree should contain: `lib.rs`, `main.rs`, `mint_client.rs` -2. Found the dangling blob `94490b90...` was `mint_client.rs` -3. Reconstructed the `src` tree with all three files -4. Created new commit `e12bc3cf...` with the fixed tree -5. Updated `refs/heads/add-missing-hooks` to point to the new commit -6. Ran `git gc --prune=now` to clean up -7. Replicated fix to gitnostr.com instance - -**Result:** Both relays now clone successfully with all files intact. - -### Additional Resources - -- **ngit-relay repository**: https://github.com/danconwaydev/ngit-relay -- **Git internals documentation**: https://git-scm.com/book/en/v2/Git-Internals-Plumbing-and-Porcelain -- **Podman documentation**: https://docs.podman.io/ diff --git a/docs/how-to/migration-scripts/01-fetch-events.sh b/docs/how-to/migration-scripts/01-fetch-events.sh deleted file mode 100755 index e0d6f26..0000000 --- a/docs/how-to/migration-scripts/01-fetch-events.sh +++ /dev/null @@ -1,206 +0,0 @@ -#!/usr/bin/env bash -# -# 01-fetch-events.sh - Fetch nostr events from a relay for migration analysis -# -# PHASE 1 of the GRASP relay to ngit-grasp migration analysis pipeline. -# Fetches kind 30618 (state), 30617 (announcement), and 5 (deletion) events. -# -# USAGE: -# ./01-fetch-events.sh -# -# EXAMPLES: -# # Fetch from production relay -# ./01-fetch-events.sh wss://relay.ngit.dev output/prod -# -# # Fetch from archive relay -# ./01-fetch-events.sh wss://archive.relay.ngit.dev output/archive -# -# # Full migration analysis setup -# mkdir -p work/migration-analysis-$(date +%Y%m%d-%H%M) -# ./01-fetch-events.sh wss://relay.ngit.dev work/migration-analysis-*/prod -# ./01-fetch-events.sh wss://archive.relay.ngit.dev work/migration-analysis-*/archive -# -# OUTPUT: -# /raw/state-events.json - kind 30618 events (one per line, JSONL) -# /raw/announcements.json - kind 30617 events (one per line, JSONL) -# /raw/deletions.json - kind 5 events (one per line, JSONL) -# -# OUTPUT FORMAT: -# Each file contains one JSON event per line (JSONL format). -# Events are the raw nostr event objects as returned by the relay. -# -# PREREQUISITES: -# - nak (Nostr Army Knife) - https://github.com/fiatjaf/nak -# - jq (for counting/validation) -# -# RUNTIME: ~30 seconds per relay (depends on network and event count) -# -# NOTES: -# - Uses --paginate to ensure all events are fetched (not just first page) -# - If event counts are exact multiples of 250, pagination may have failed -# - Run Phase 1 and Phase 2 back-to-back for accurate snapshot -# -# SEE ALSO: -# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide -# - -set -euo pipefail - -# Colors for output (disabled if not a terminal) -if [[ -t 1 ]]; then - RED='\033[0;31m' - GREEN='\033[0;32m' - YELLOW='\033[0;33m' - BLUE='\033[0;34m' - NC='\033[0m' # No Color -else - RED='' - GREEN='' - YELLOW='' - BLUE='' - NC='' -fi - -log_info() { - echo -e "${BLUE}[INFO]${NC} $*" >&2 -} - -log_success() { - echo -e "${GREEN}[OK]${NC} $*" >&2 -} - -log_warn() { - echo -e "${YELLOW}[WARN]${NC} $*" >&2 -} - -log_error() { - echo -e "${RED}[ERROR]${NC} $*" >&2 -} - -usage() { - echo "Usage: $0 " - echo "" - echo "Arguments:" - echo " relay-url WebSocket URL of the relay (e.g., wss://relay.ngit.dev)" - echo " output-dir Directory to store fetched events (e.g., output/prod)" - echo "" - echo "Examples:" - echo " $0 wss://relay.ngit.dev output/prod" - echo " $0 wss://archive.relay.ngit.dev output/archive" - exit 1 -} - -# Check prerequisites -check_prerequisites() { - local missing=0 - - if ! command -v nak &> /dev/null; then - log_error "nak not found. Install from: https://github.com/fiatjaf/nak" - missing=1 - fi - - if ! command -v jq &> /dev/null; then - log_error "jq not found. Install with your package manager." - missing=1 - fi - - if [[ $missing -eq 1 ]]; then - exit 1 - fi -} - -# Fetch events of a specific kind -# Args: $1=relay, $2=kind, $3=output_file, $4=description -fetch_kind() { - local relay="$1" - local kind="$2" - local output_file="$3" - local description="$4" - - log_info "Fetching $description (kind $kind) from $relay..." - - local start_time - start_time=$(date +%s) - - # Use --paginate to ensure we get all events, not just first page - # nak outputs one event per line (JSONL format) - if ! nak req -k "$kind" --paginate "$relay" > "$output_file" 2>/dev/null; then - log_error "Failed to fetch $description from $relay" - return 1 - fi - - local end_time - end_time=$(date +%s) - local duration=$((end_time - start_time)) - - # Count events - local count - count=$(wc -l < "$output_file" | tr -d ' ') - - # Warn if count is suspicious (exact multiple of 250 suggests pagination issue) - if [[ $count -gt 0 ]] && [[ $((count % 250)) -eq 0 ]]; then - log_warn "$description count ($count) is exact multiple of 250 - pagination may have failed!" - fi - - log_success "Fetched $count $description in ${duration}s -> $output_file" - - echo "$count" -} - -# Main -main() { - if [[ $# -ne 2 ]]; then - usage - fi - - local relay="$1" - local output_dir="$2" - - # Validate relay URL - if [[ ! "$relay" =~ ^wss?:// ]]; then - log_error "Invalid relay URL: $relay (must start with ws:// or wss://)" - exit 1 - fi - - check_prerequisites - - log_info "Starting event fetch from $relay" - log_info "Output directory: $output_dir" - - # Create output directory structure - local raw_dir="$output_dir/raw" - mkdir -p "$raw_dir" - - local total_start - total_start=$(date +%s) - - # Fetch each event type - local state_count announcement_count deletion_count - - state_count=$(fetch_kind "$relay" 30618 "$raw_dir/state-events.json" "state events") - announcement_count=$(fetch_kind "$relay" 30617 "$raw_dir/announcements.json" "announcements") - deletion_count=$(fetch_kind "$relay" 5 "$raw_dir/deletions.json" "deletion requests") - - local total_end - total_end=$(date +%s) - local total_duration=$((total_end - total_start)) - - # Summary - echo "" - log_info "=== Fetch Summary ===" - log_info "Relay: $relay" - log_info "Output: $output_dir" - log_info "State events (30618): $state_count" - log_info "Announcements (30617): $announcement_count" - log_info "Deletions (5): $deletion_count" - log_info "Total time: ${total_duration}s" - echo "" - - # Output file listing for easy copy/paste - log_info "Output files:" - echo " $raw_dir/state-events.json" - echo " $raw_dir/announcements.json" - echo " $raw_dir/deletions.json" -} - -main "$@" diff --git a/docs/how-to/migration-scripts/10-check-git-sync.sh b/docs/how-to/migration-scripts/10-check-git-sync.sh deleted file mode 100755 index b4536cb..0000000 --- a/docs/how-to/migration-scripts/10-check-git-sync.sh +++ /dev/null @@ -1,564 +0,0 @@ -#!/usr/bin/env bash -# -# 10-check-git-sync.sh - Compare state events to actual git data on disk -# -# PHASE 2 of the GRASP relay to ngit-grasp migration analysis pipeline. -# Compares kind 30618 state events against actual git refs on disk. -# -# USAGE: -# ./10-check-git-sync.sh [--categorize] -# -# EXAMPLES: -# # Check source relay against source git data -# ./10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod -# -# # Check target relay against target git data -# ./10-check-git-sync.sh output/archive/raw/state-events.json /var/lib/ngit-grasp/git output/archive -# -# # Check and categorize in one step (convenience mode) -# ./10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod --categorize -# -# INPUT: -# state-events.json - JSONL file from Phase 1 (01-fetch-events.sh) -# One kind 30618 event per line -# git-base-dir - Base directory containing git repos -# Structure: //.git/ -# -# OUTPUT: -# /git-sync-status.tsv - Tab-separated values: -# reponpubstate_refsgit_refsmatchesreason -# -# With --categorize flag, also outputs: -# /category1-complete-match.txt -# /category2-empty-blank.txt -# /category3-partial-match.txt -# /category4-no-match.txt -# -# CATEGORIES: -# 1. Complete Match - All refs in state event match git data perfectly -# 2. Empty/Blank - No git data available (directory missing or empty) -# 3. Partial Match - Some refs match, some don't -# 4. No Match - Git data exists but commit hashes don't match -# -# PREREQUISITES: -# - nak (for npub encoding) - https://github.com/fiatjaf/nak -# - jq (for JSON parsing) -# - Read access to git directories (may need sudo) -# -# RUNTIME: ~20 minutes on VPS (git operations are slow) -# -# NOTES: -# - Must run on VPS with access to git directories -# - Progress indicator updates every 10 events -# - Handles packed refs (git show-ref) and loose refs -# -# SEE ALSO: -# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide -# 01-fetch-events.sh - Phase 1 script that produces input for this script -# 20-categorize.sh - Phase 3a script that consumes output from this script -# - -set -euo pipefail - -# Colors for output (disabled if not a terminal) -if [[ -t 1 ]]; then - RED='\033[0;31m' - GREEN='\033[0;32m' - YELLOW='\033[0;33m' - BLUE='\033[0;34m' - NC='\033[0m' -else - RED='' - GREEN='' - YELLOW='' - BLUE='' - NC='' -fi - -log_info() { - echo -e "${BLUE}[INFO]${NC} $*" >&2 -} - -log_success() { - echo -e "${GREEN}[OK]${NC} $*" >&2 -} - -log_warn() { - echo -e "${YELLOW}[WARN]${NC} $*" >&2 -} - -log_error() { - echo -e "${RED}[ERROR]${NC} $*" >&2 -} - -log_progress() { - # Overwrite current line for progress updates - echo -ne "\r${BLUE}[PROGRESS]${NC} $*" >&2 -} - -usage() { - echo "Usage: $0 [--categorize]" - echo "" - echo "Arguments:" - echo " state-events.json JSONL file from Phase 1 (kind 30618 events)" - echo " git-base-dir Base directory for git repos (e.g., /var/lib/grasp-relay/git)" - echo " output-dir Directory to store output files" - echo " --categorize Optional: also output category files (like Phase 3)" - echo "" - echo "Examples:" - echo " $0 output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod" - echo " $0 output/archive/raw/state-events.json /var/lib/ngit-grasp/git output/archive" - echo "" - echo "Output:" - echo " git-sync-status.tsv - TSV with: repo, npub, state_refs, git_refs, matches, reason" - exit 1 -} - -# Check prerequisites -check_prerequisites() { - local missing=0 - - if ! command -v git &> /dev/null; then - log_error "git not found. Install with your package manager." - missing=1 - fi - - if ! command -v nak &> /dev/null; then - log_error "nak not found. Install from: https://github.com/fiatjaf/nak" - log_error "Or run: nix-shell -p nak jq --run \"$0 $*\"" - missing=1 - fi - - if ! command -v jq &> /dev/null; then - log_error "jq not found. Install with your package manager." - missing=1 - fi - - if [[ $missing -eq 1 ]]; then - exit 1 - fi -} - -# Convert hex pubkey to npub -# Args: $1=hex_pubkey -# Returns: npub string or empty on error -hex_to_npub() { - local hex="$1" - nak encode npub "$hex" 2>/dev/null || echo "" -} - -# Count refs in state event (only refs/heads/) -# Args: $1=event_json -# Returns: count -count_state_refs() { - local event="$1" - echo "$event" | jq '[.tags[] | select(.[0] | startswith("refs/heads/"))] | length' 2>/dev/null || echo "0" -} - -# Get git refs from disk -# Args: $1=git_dir -# Returns: count of refs/heads/ refs -count_git_refs() { - local git_dir="$1" - - if [[ ! -d "$git_dir" ]]; then - echo "0" - return - fi - - # Try git show-ref first (handles packed refs correctly) - # Note: We capture output separately to avoid pipefail issues - local count - if count=$(git --git-dir="$git_dir" show-ref --heads 2>/dev/null | wc -l); then - echo "$count" | tr -d ' ' - return - fi - - # Fallback: count loose refs (when git is not available or fails) - if [[ -d "$git_dir/refs/heads" ]]; then - find "$git_dir/refs/heads" -type f 2>/dev/null | wc -l | tr -d ' ' - else - echo "0" - fi -} - -# Get ref hash from git directory -# Args: $1=git_dir, $2=ref_path (e.g., refs/heads/main) -# Returns: commit hash or empty -get_git_ref_hash() { - local git_dir="$1" - local ref_path="$2" - - # Try git show-ref first (handles packed refs) - local hash - hash=$(git --git-dir="$git_dir" show-ref --hash "$ref_path" 2>/dev/null | head -1 || echo "") - - if [[ -n "$hash" ]]; then - echo "$hash" - return - fi - - # Fallback: read loose ref file - local ref_file="$git_dir/$ref_path" - if [[ -f "$ref_file" ]]; then - cat "$ref_file" 2>/dev/null | tr -d '\n' || echo "" - else - echo "" - fi -} - -# Compare state event refs to git refs -# Args: $1=event_json, $2=git_dir -# Returns: count of matching refs -count_matching_refs() { - local event="$1" - local git_dir="$2" - local matching=0 - - # Extract refs/heads/ tags and compare - while IFS= read -r ref_tag; do - [[ -z "$ref_tag" ]] && continue - - local ref_path expected_hash - ref_path=$(echo "$ref_tag" | jq -r '.[0]' 2>/dev/null || echo "") - expected_hash=$(echo "$ref_tag" | jq -r '.[1]' 2>/dev/null || echo "") - - # Skip if not a heads ref or hash is missing - [[ ! "$ref_path" =~ ^refs/heads/ ]] && continue - [[ -z "$expected_hash" || "$expected_hash" == "null" ]] && continue - - # Get actual hash from git - local actual_hash - actual_hash=$(get_git_ref_hash "$git_dir" "$ref_path") - - if [[ "$expected_hash" == "$actual_hash" ]]; then - matching=$((matching + 1)) - fi - done < <(echo "$event" | jq -c '.tags[] | select(.[0] | startswith("refs/heads/"))' 2>/dev/null) - - echo "$matching" -} - -# Categorize a single entry -# Args: $1=state_refs, $2=git_refs, $3=matches, $4=reason -# Returns: category number (1-4) -categorize_entry() { - local state_refs="$1" - local git_refs="$2" - local matches="$3" - local reason="$4" - - # Category 2: Empty/Blank - if [[ -n "$reason" ]] || [[ "$git_refs" -eq 0 ]]; then - echo "2" - return - fi - - # Category 1: Complete Match - if [[ "$state_refs" -gt 0 ]] && [[ "$state_refs" -eq "$git_refs" ]] && [[ "$matches" -eq "$state_refs" ]]; then - echo "1" - return - fi - - # Category 4: No Match - if [[ "$git_refs" -gt 0 ]] && [[ "$matches" -eq 0 ]]; then - echo "4" - return - fi - - # Category 3: Partial Match (default for anything else with matches > 0) - if [[ "$matches" -gt 0 ]]; then - echo "3" - return - fi - - # Fallback to category 2 - echo "2" -} - -# Format entry for category file -# Args: $1=repo, $2=npub, $3=state_refs, $4=git_refs, $5=matches, $6=reason -format_category_line() { - local repo="$1" - local npub="$2" - local state_refs="$3" - local git_refs="$4" - local matches="$5" - local reason="$6" - - if [[ -n "$reason" ]]; then - echo "$repo | $npub | state_refs=$state_refs | git_refs=$git_refs | matches=$matches | reason=$reason" - else - echo "$repo | $npub | state_refs=$state_refs | git_refs=$git_refs | matches=$matches" - fi -} - -# Process a single state event -# Args: $1=event_json, $2=git_base -# Outputs: TSV line to stdout -process_event() { - local event="$1" - local git_base="$2" - - # Extract repository identifier (d tag) - local identifier - identifier=$(echo "$event" | jq -r '.tags[] | select(.[0] == "d") | .[1]' 2>/dev/null | head -1 || echo "") - - if [[ -z "$identifier" ]]; then - return 1 - fi - - # Extract maintainer pubkey (hex) - local hex_pubkey - hex_pubkey=$(echo "$event" | jq -r '.pubkey' 2>/dev/null || echo "") - - if [[ -z "$hex_pubkey" ]]; then - return 1 - fi - - # Convert to npub - local npub - npub=$(hex_to_npub "$hex_pubkey") - - if [[ -z "$npub" ]]; then - return 1 - fi - - # Count state refs - local state_refs - state_refs=$(count_state_refs "$event") - - # Find git directory - local git_dir="$git_base/${npub}/${identifier}.git" - - # Check git directory status - local git_refs=0 - local matches=0 - local reason="" - - if [[ ! -d "$git_dir" ]]; then - reason="no_git_dir" - elif [[ ! -d "$git_dir/refs/heads" ]] && [[ ! -f "$git_dir/packed-refs" ]]; then - reason="empty_refs" - else - git_refs=$(count_git_refs "$git_dir") - - if [[ "$git_refs" -eq 0 ]]; then - reason="empty_refs" - elif [[ "$state_refs" -eq 0 ]]; then - reason="no_state_refs" - else - matches=$(count_matching_refs "$event" "$git_dir") - fi - fi - - # Output TSV line: repo, npub, state_refs, git_refs, matches, reason - printf '%s\t%s\t%s\t%s\t%s\t%s\n' "$identifier" "$npub" "$state_refs" "$git_refs" "$matches" "$reason" -} - -# Main -main() { - local do_categorize=0 - local args=() - - # Parse arguments - for arg in "$@"; do - if [[ "$arg" == "--categorize" ]]; then - do_categorize=1 - else - args+=("$arg") - fi - done - - if [[ ${#args[@]} -ne 3 ]]; then - usage - fi - - local state_events_file="${args[0]}" - local git_base="${args[1]}" - local output_dir="${args[2]}" - - # Validate inputs - if [[ ! -f "$state_events_file" ]]; then - log_error "State events file not found: $state_events_file" - exit 1 - fi - - if [[ ! -d "$git_base" ]]; then - log_error "Git base directory not found: $git_base" - log_error "This script must run on the VPS with access to git directories." - exit 1 - fi - - # Check read permissions - if ! ls "$git_base" >/dev/null 2>&1; then - log_error "Cannot read git base directory (permission denied): $git_base" - log_error "Try running with sudo or grant read permissions." - exit 1 - fi - - check_prerequisites - - log_info "=== Git State Synchronization Check ===" - log_info "State events: $state_events_file" - log_info "Git base: $git_base" - log_info "Output: $output_dir" - if [[ $do_categorize -eq 1 ]]; then - log_info "Mode: TSV + categorization" - else - log_info "Mode: TSV only (use 20-categorize.sh for categories)" - fi - log_info "Started: $(date)" - echo "" - - # Create output directory - mkdir -p "$output_dir" - - # Output files - local tsv_file="$output_dir/git-sync-status.tsv" - - # Initialize TSV with header - echo -e "repo\tnpub\tstate_refs\tgit_refs\tmatches\treason" > "$tsv_file" - - # Initialize category files if categorizing - local cat1="" cat2="" cat3="" cat4="" - if [[ $do_categorize -eq 1 ]]; then - cat1="$output_dir/category1-complete-match.txt" - cat2="$output_dir/category2-empty-blank.txt" - cat3="$output_dir/category3-partial-match.txt" - cat4="$output_dir/category4-no-match.txt" - > "$cat1" - > "$cat2" - > "$cat3" - > "$cat4" - fi - - # Count total events - local total_events - total_events=$(wc -l < "$state_events_file" | tr -d ' ') - log_info "Processing $total_events state events..." - echo "" - - # Process each event - local count=0 - local processed=0 - local skipped=0 - local count_cat1=0 count_cat2=0 count_cat3=0 count_cat4=0 - local start_time - start_time=$(date +%s) - - while IFS= read -r event; do - count=$((count + 1)) - - # Skip empty lines - [[ -z "$event" ]] && continue - - # Process event - local result - if result=$(process_event "$event" "$git_base"); then - processed=$((processed + 1)) - - # Write to TSV (skip header line) - echo "$result" >> "$tsv_file" - - # Categorize if requested - if [[ $do_categorize -eq 1 ]]; then - # Parse result - IFS=$'\t' read -r repo npub state_refs git_refs matches reason <<< "$result" - - local category - category=$(categorize_entry "$state_refs" "$git_refs" "$matches" "$reason") - - local cat_line - cat_line=$(format_category_line "$repo" "$npub" "$state_refs" "$git_refs" "$matches" "$reason") - - case "$category" in - 1) echo "$cat_line" >> "$cat1"; count_cat1=$((count_cat1 + 1)) ;; - 2) echo "$cat_line" >> "$cat2"; count_cat2=$((count_cat2 + 1)) ;; - 3) echo "$cat_line" >> "$cat3"; count_cat3=$((count_cat3 + 1)) ;; - 4) echo "$cat_line" >> "$cat4"; count_cat4=$((count_cat4 + 1)) ;; - esac - fi - else - skipped=$((skipped + 1)) - fi - - # Progress indicator every 10 events - if [[ $((count % 10)) -eq 0 ]]; then - local elapsed=$(($(date +%s) - start_time)) - local rate=0 - if [[ $elapsed -gt 0 ]]; then - rate=$((count / elapsed)) - fi - local eta="?" - if [[ $rate -gt 0 ]]; then - eta=$(( (total_events - count) / rate )) - fi - log_progress "Processed $count/$total_events events (~${rate}/s, ETA: ${eta}s)..." - fi - done < "$state_events_file" - - # Clear progress line - echo "" >&2 - - local end_time - end_time=$(date +%s) - local duration=$((end_time - start_time)) - - # Summary - echo "" - log_info "=== Analysis Complete ===" - log_info "Finished: $(date)" - log_info "Duration: ${duration}s" - log_info "Processed: $processed events" - if [[ $skipped -gt 0 ]]; then - log_warn "Skipped: $skipped events (missing identifier or pubkey)" - fi - echo "" - - if [[ $do_categorize -eq 1 ]]; then - # Calculate percentages - local total=$((count_cat1 + count_cat2 + count_cat3 + count_cat4)) - local pct1=0 pct2=0 pct3=0 pct4=0 - if [[ $total -gt 0 ]]; then - pct1=$(awk "BEGIN {printf \"%.1f\", ($count_cat1/$total)*100}") - pct2=$(awk "BEGIN {printf \"%.1f\", ($count_cat2/$total)*100}") - pct3=$(awk "BEGIN {printf \"%.1f\", ($count_cat3/$total)*100}") - pct4=$(awk "BEGIN {printf \"%.1f\", ($count_cat4/$total)*100}") - fi - - log_info "=== Category Summary ===" - log_success "Category 1 (Complete Match): $count_cat1 ($pct1%)" - log_warn "Category 2 (Empty/Blank): $count_cat2 ($pct2%)" - log_warn "Category 3 (Partial Match): $count_cat3 ($pct3%)" - log_error "Category 4 (No Match): $count_cat4 ($pct4%)" - echo "" - - # Validation warning - if [[ $count_cat2 -eq $total ]] && [[ $total -gt 0 ]]; then - log_error "WARNING: 100% of repos categorized as Empty/Blank" - log_error "This usually indicates a permission or path issue." - echo "" - log_info "Troubleshooting:" - echo " 1. Verify git data exists: sudo ls -la $git_base | head -10" - echo " 2. Check sample repo: sudo find $git_base -name '*.git' -type d | head -1" - echo " 3. Re-run with sudo if not already using it" - echo "" - fi - fi - - log_info "Output files:" - echo " $tsv_file" - if [[ $do_categorize -eq 1 ]]; then - echo " $cat1" - echo " $cat2" - echo " $cat3" - echo " $cat4" - else - echo "" - log_info "Next step: Run 20-categorize.sh to categorize results" - echo " ./20-categorize.sh $tsv_file $output_dir" - fi -} - -main "$@" diff --git a/docs/how-to/migration-scripts/20-categorize.sh b/docs/how-to/migration-scripts/20-categorize.sh deleted file mode 100755 index b38dc00..0000000 --- a/docs/how-to/migration-scripts/20-categorize.sh +++ /dev/null @@ -1,212 +0,0 @@ -#!/usr/bin/env bash -# -# 20-categorize.sh - Categorize git sync status into 4 categories -# -# PHASE 3a of the GRASP relay to ngit-grasp migration analysis pipeline. -# Takes git-sync-status.tsv from Phase 2 and categorizes into 4 files. -# -# USAGE: -# ./20-categorize.sh -# -# EXAMPLES: -# ./20-categorize.sh output/prod/git-sync-status.tsv output/prod -# ./20-categorize.sh output/archive/git-sync-status.tsv output/archive -# -# INPUT FORMAT (git-sync-status.tsv): -# Tab-separated values with columns: -# reponpubstate_refsgit_refsmatchesreason -# -# Where reason is optional and can be: no_git_dir, empty_refs, no_state_refs -# -# OUTPUT: -# /category1-complete-match.txt - All refs match perfectly -# /category2-empty-blank.txt - No git data available -# /category3-partial-match.txt - Some refs match -# /category4-no-match.txt - Git exists but refs don't match -# -# OUTPUT FORMAT: -# repo | npub | state_refs=N | git_refs=N | matches=N [| reason=X] -# -# CATEGORIES: -# 1. Complete Match: state_refs == git_refs == matches (all > 0) -# 2. Empty/Blank: git_refs == 0 OR reason in (no_git_dir, empty_refs, no_state_refs) -# 3. Partial Match: matches > 0 AND matches < state_refs -# 4. No Match: git_refs > 0 AND matches == 0 -# -# PREREQUISITES: -# - awk (standard Unix tool) -# -# RUNTIME: < 1 second (local processing only) -# -# SEE ALSO: -# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide -# 10-check-git-sync.sh - Phase 2 script that produces input for this script -# - -set -euo pipefail - -# Colors for output (disabled if not a terminal) -if [[ -t 1 ]]; then - RED='\033[0;31m' - GREEN='\033[0;32m' - YELLOW='\033[0;33m' - BLUE='\033[0;34m' - NC='\033[0m' -else - RED='' - GREEN='' - YELLOW='' - BLUE='' - NC='' -fi - -log_info() { - echo -e "${BLUE}[INFO]${NC} $*" >&2 -} - -log_success() { - echo -e "${GREEN}[OK]${NC} $*" >&2 -} - -log_warn() { - echo -e "${YELLOW}[WARN]${NC} $*" >&2 -} - -log_error() { - echo -e "${RED}[ERROR]${NC} $*" >&2 -} - -usage() { - echo "Usage: $0 " - echo "" - echo "Arguments:" - echo " git-sync-status.tsv TSV file from Phase 2 (10-check-git-sync.sh)" - echo " output-dir Directory to store categorized output" - echo "" - echo "Examples:" - echo " $0 output/prod/git-sync-status.tsv output/prod" - echo " $0 output/archive/git-sync-status.tsv output/archive" - echo "" - echo "Input format (TSV):" - echo " reponpubstate_refsgit_refsmatchesreason" - echo "" - echo "Output files:" - echo " category1-complete-match.txt - All refs match" - echo " category2-empty-blank.txt - No git data" - echo " category3-partial-match.txt - Some refs match" - echo " category4-no-match.txt - Git exists, refs don't match" - exit 1 -} - -# Main -main() { - if [[ $# -ne 2 ]]; then - usage - fi - - local input_file="$1" - local output_dir="$2" - - # Validate input file - if [[ ! -f "$input_file" ]]; then - log_error "Input file not found: $input_file" - exit 1 - fi - - log_info "Categorizing git sync status" - log_info "Input: $input_file" - log_info "Output: $output_dir" - - # Create output directory - mkdir -p "$output_dir" - - # Output files - local cat1="$output_dir/category1-complete-match.txt" - local cat2="$output_dir/category2-empty-blank.txt" - local cat3="$output_dir/category3-partial-match.txt" - local cat4="$output_dir/category4-no-match.txt" - - # Clear previous results - > "$cat1" - > "$cat2" - > "$cat3" - > "$cat4" - - # Process input file with awk - # Input: reponpubstate_refsgit_refsmatchesreason - awk -F'\t' -v cat1="$cat1" -v cat2="$cat2" -v cat3="$cat3" -v cat4="$cat4" ' - BEGIN { - count1 = 0; count2 = 0; count3 = 0; count4 = 0 - } - NR == 1 && /^repo/ { next } # Skip header if present - NF >= 5 { - repo = $1 - npub = $2 - state_refs = int($3) - git_refs = int($4) - matches = int($5) - reason = (NF >= 6) ? $6 : "" - - # Format output line - if (reason != "") { - line = repo " | " npub " | state_refs=" state_refs " | git_refs=" git_refs " | matches=" matches " | reason=" reason - } else { - line = repo " | " npub " | state_refs=" state_refs " | git_refs=" git_refs " | matches=" matches - } - - # Categorize - if (reason == "no_git_dir" || reason == "empty_refs" || reason == "no_state_refs" || git_refs == 0) { - # Category 2: Empty/Blank - print line >> cat2 - count2++ - } else if (state_refs > 0 && state_refs == git_refs && matches == state_refs) { - # Category 1: Complete Match - print line >> cat1 - count1++ - } else if (matches > 0 && matches < state_refs) { - # Category 3: Partial Match - print line >> cat3 - count3++ - } else if (git_refs > 0 && matches == 0) { - # Category 4: No Match - print line >> cat4 - count4++ - } else if (matches > 0) { - # Edge case: matches > 0 but does not fit other categories - # This can happen when git_refs > state_refs but all state refs match - # Treat as partial match - print line >> cat3 - count3++ - } else { - # Fallback: treat as category 2 (empty/blank) - print line >> cat2 - count2++ - } - } - END { - total = count1 + count2 + count3 + count4 - print "COUNTS:" count1 ":" count2 ":" count3 ":" count4 ":" total - } - ' "$input_file" 2>&1 | while IFS= read -r line; do - if [[ "$line" =~ ^COUNTS: ]]; then - # Parse counts from awk output - IFS=':' read -r _ c1 c2 c3 c4 total <<< "$line" - - echo "" - log_info "=== Categorization Summary ===" - log_info "Total entries: $total" - log_success "Category 1 (Complete Match): $c1" - log_warn "Category 2 (Empty/Blank): $c2" - log_warn "Category 3 (Partial Match): $c3" - log_error "Category 4 (No Match): $c4" - echo "" - log_info "Output files:" - echo " $cat1" - echo " $cat2" - echo " $cat3" - echo " $cat4" - fi - done -} - -main "$@" diff --git a/docs/how-to/migration-scripts/21-compare-relays.sh b/docs/how-to/migration-scripts/21-compare-relays.sh deleted file mode 100755 index b9c0d30..0000000 --- a/docs/how-to/migration-scripts/21-compare-relays.sh +++ /dev/null @@ -1,294 +0,0 @@ -#!/usr/bin/env bash -# -# 21-compare-relays.sh - Compare prod vs archive category files to find gaps -# -# PHASE 3b of the GRASP relay to ngit-grasp migration analysis pipeline. -# Compares categorized output from prod and archive to identify: -# - Repos complete in prod but missing/incomplete in archive -# - Repos in archive but not in prod -# - Status differences between relays -# -# USAGE: -# ./21-compare-relays.sh -# -# EXAMPLES: -# ./21-compare-relays.sh output/prod output/archive output/comparison -# -# INPUT: -# Both prod-dir and archive-dir must contain: -# - category1-complete-match.txt -# - category2-empty-blank.txt -# - category3-partial-match.txt -# - category4-no-match.txt -# -# OUTPUT: -# /complete-in-both.txt - Repos complete in both relays (no action) -# /complete-prod-missing-archive.txt - Complete in prod, not in archive cat1 -# /complete-prod-incomplete-archive.txt - Complete in prod, incomplete in archive -# /incomplete-in-both.txt - Incomplete in both relays -# /in-archive-not-prod.txt - In archive but not in prod -# /summary.txt - Human-readable summary -# -# OUTPUT FORMAT: -# Each file contains lines in the format: -# repo | npub | prod_status | archive_status -# -# PREREQUISITES: -# - awk, sort, comm (standard Unix tools) -# -# RUNTIME: < 1 second (local processing only) -# -# SEE ALSO: -# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide -# 20-categorize.sh - Phase 3a script that produces input for this script -# - -set -euo pipefail - -# Colors for output (disabled if not a terminal) -if [[ -t 1 ]]; then - RED='\033[0;31m' - GREEN='\033[0;32m' - YELLOW='\033[0;33m' - BLUE='\033[0;34m' - NC='\033[0m' -else - RED='' - GREEN='' - YELLOW='' - BLUE='' - NC='' -fi - -log_info() { - echo -e "${BLUE}[INFO]${NC} $*" >&2 -} - -log_success() { - echo -e "${GREEN}[OK]${NC} $*" >&2 -} - -log_warn() { - echo -e "${YELLOW}[WARN]${NC} $*" >&2 -} - -log_error() { - echo -e "${RED}[ERROR]${NC} $*" >&2 -} - -usage() { - echo "Usage: $0 " - echo "" - echo "Arguments:" - echo " prod-dir Directory containing prod category files" - echo " archive-dir Directory containing archive category files" - echo " output-dir Directory to store comparison results" - echo "" - echo "Examples:" - echo " $0 output/prod output/archive output/comparison" - echo "" - echo "Required input files in each directory:" - echo " category1-complete-match.txt" - echo " category2-empty-blank.txt" - echo " category3-partial-match.txt" - echo " category4-no-match.txt" - exit 1 -} - -# Extract repo|npub key from category line -# Input: "repo | npub | state_refs=N | ..." -# Output: "repo|npub" -extract_key() { - awk -F' \\| ' '{print $1 "|" $2}' -} - -# Build lookup table from category files -# Args: $1=directory, $2=output_file -build_lookup() { - local dir="$1" - local output="$2" - - # Process all 4 category files - for cat in 1 2 3 4; do - local file="$dir/category${cat}-*.txt" - # shellcheck disable=SC2086 - if ls $file 1>/dev/null 2>&1; then - # shellcheck disable=SC2086 - cat $file | while IFS= read -r line; do - key=$(echo "$line" | extract_key) - echo "${key}|cat${cat}|${line}" - done - fi - done | sort -t'|' -k1,2 > "$output" -} - -# Main -main() { - if [[ $# -ne 3 ]]; then - usage - fi - - local prod_dir="$1" - local archive_dir="$2" - local output_dir="$3" - - # Validate input directories - for dir in "$prod_dir" "$archive_dir"; do - if [[ ! -d "$dir" ]]; then - log_error "Directory not found: $dir" - exit 1 - fi - if [[ ! -f "$dir/category1-complete-match.txt" ]]; then - log_error "Missing category1-complete-match.txt in $dir" - exit 1 - fi - done - - log_info "Comparing relay categories" - log_info "Prod: $prod_dir" - log_info "Archive: $archive_dir" - log_info "Output: $output_dir" - - # Create output directory - mkdir -p "$output_dir" - - # Create temp files for processing - local tmp_dir - tmp_dir=$(mktemp -d) - # shellcheck disable=SC2064 - trap "rm -rf '$tmp_dir'" EXIT - - log_info "Building lookup tables..." - - # Build lookup tables: key|category|full_line - build_lookup "$prod_dir" "$tmp_dir/prod_lookup.txt" - build_lookup "$archive_dir" "$tmp_dir/archive_lookup.txt" - - # Extract just keys for comparison - cut -d'|' -f1,2 "$tmp_dir/prod_lookup.txt" | sort -u > "$tmp_dir/prod_keys.txt" - cut -d'|' -f1,2 "$tmp_dir/archive_lookup.txt" | sort -u > "$tmp_dir/archive_keys.txt" - - log_info "Comparing categories..." - - # Initialize output files - > "$output_dir/complete-in-both.txt" - > "$output_dir/complete-prod-missing-archive.txt" - > "$output_dir/complete-prod-incomplete-archive.txt" - > "$output_dir/incomplete-in-both.txt" - > "$output_dir/in-archive-not-prod.txt" - - # Process prod category 1 (complete) entries - while IFS='|' read -r repo npub cat full_line; do - key="${repo}|${npub}" - - # Look up in archive - archive_entry=$(grep "^${key}|" "$tmp_dir/archive_lookup.txt" 2>/dev/null | head -1 || echo "") - - if [[ -z "$archive_entry" ]]; then - # Not in archive at all - echo "$repo | $npub | prod=complete | archive=missing" >> "$output_dir/complete-prod-missing-archive.txt" - else - archive_cat=$(echo "$archive_entry" | cut -d'|' -f3) - if [[ "$archive_cat" == "cat1" ]]; then - # Complete in both - echo "$repo | $npub | prod=complete | archive=complete" >> "$output_dir/complete-in-both.txt" - else - # Complete in prod, incomplete in archive - echo "$repo | $npub | prod=complete | archive=$archive_cat" >> "$output_dir/complete-prod-incomplete-archive.txt" - fi - fi - done < <(grep '|cat1|' "$tmp_dir/prod_lookup.txt" | sed 's/|cat1|/|cat1|/') - - # Process prod categories 2-4 (incomplete) entries - for cat in cat2 cat3 cat4; do - while IFS='|' read -r repo npub _ full_line; do - key="${repo}|${npub}" - - # Look up in archive - archive_entry=$(grep "^${key}|" "$tmp_dir/archive_lookup.txt" 2>/dev/null | head -1 || echo "") - - if [[ -z "$archive_entry" ]]; then - # Incomplete in prod, missing in archive - echo "$repo | $npub | prod=$cat | archive=missing" >> "$output_dir/incomplete-in-both.txt" - else - archive_cat=$(echo "$archive_entry" | cut -d'|' -f3) - if [[ "$archive_cat" != "cat1" ]]; then - # Incomplete in both - echo "$repo | $npub | prod=$cat | archive=$archive_cat" >> "$output_dir/incomplete-in-both.txt" - fi - # If archive is complete but prod is not, that's unusual but not an error - fi - done < <(grep "|${cat}|" "$tmp_dir/prod_lookup.txt") - done - - # Find entries in archive but not in prod - comm -23 "$tmp_dir/archive_keys.txt" "$tmp_dir/prod_keys.txt" | while IFS='|' read -r repo npub; do - key="${repo}|${npub}" - archive_entry=$(grep "^${key}|" "$tmp_dir/archive_lookup.txt" 2>/dev/null | head -1 || echo "") - archive_cat=$(echo "$archive_entry" | cut -d'|' -f3) - echo "$repo | $npub | prod=missing | archive=$archive_cat" >> "$output_dir/in-archive-not-prod.txt" - done - - # Count results - local count_both count_missing count_incomplete count_both_incomplete count_archive_only - count_both=$(wc -l < "$output_dir/complete-in-both.txt" | tr -d ' ') - count_missing=$(wc -l < "$output_dir/complete-prod-missing-archive.txt" | tr -d ' ') - count_incomplete=$(wc -l < "$output_dir/complete-prod-incomplete-archive.txt" | tr -d ' ') - count_both_incomplete=$(wc -l < "$output_dir/incomplete-in-both.txt" | tr -d ' ') - count_archive_only=$(wc -l < "$output_dir/in-archive-not-prod.txt" | tr -d ' ') - - # Generate summary - cat > "$output_dir/summary.txt" << EOF -# Relay Comparison Summary -Generated: $(date -Iseconds) - -## Input -- Prod: $prod_dir -- Archive: $archive_dir - -## Results - -### No Action Required -- Complete in both relays: $count_both - -### Action/Decision Required -- Complete in prod, MISSING from archive: $count_missing -- Complete in prod, INCOMPLETE in archive: $count_incomplete -- Incomplete in BOTH relays: $count_both_incomplete - -### For Reference -- In archive but not in prod: $count_archive_only - -## Files -- complete-in-both.txt: Repos successfully migrated (no action) -- complete-prod-missing-archive.txt: Need investigation - why not in archive? -- complete-prod-incomplete-archive.txt: Archive sync may still be in progress -- incomplete-in-both.txt: Git data incomplete on both relays -- in-archive-not-prod.txt: May be deleted from prod or new to archive - -## Next Steps -1. Review complete-prod-missing-archive.txt - these repos need attention -2. Check if archive sync is still running for incomplete entries -3. Cross-reference with deletion events (kind 5) from Phase 1 -4. Use Phase 4 logs to understand parse failures and purgatory expiry -EOF - - # Display summary - echo "" - log_info "=== Comparison Summary ===" - log_success "Complete in both: $count_both (no action needed)" - log_error "Complete in prod, MISSING from archive: $count_missing" - log_warn "Complete in prod, incomplete in archive: $count_incomplete" - log_warn "Incomplete in both: $count_both_incomplete" - log_info "In archive only: $count_archive_only" - echo "" - log_info "Output files:" - echo " $output_dir/complete-in-both.txt" - echo " $output_dir/complete-prod-missing-archive.txt" - echo " $output_dir/complete-prod-incomplete-archive.txt" - echo " $output_dir/incomplete-in-both.txt" - echo " $output_dir/in-archive-not-prod.txt" - echo " $output_dir/summary.txt" -} - -main "$@" diff --git a/docs/how-to/migration-scripts/22-compare-git-data.sh b/docs/how-to/migration-scripts/22-compare-git-data.sh deleted file mode 100755 index 76521d4..0000000 --- a/docs/how-to/migration-scripts/22-compare-git-data.sh +++ /dev/null @@ -1,390 +0,0 @@ -#!/usr/bin/env bash -# -# 22-compare-git-data.sh - Compare actual git data between prod and archive relays -# -# PHASE 3c of the GRASP relay to ngit-grasp migration analysis pipeline. -# Compares actual git commits between prod and archive to determine which is ahead. -# -# KEY INSIGHT: -# Archive (ngit-grasp) enforces GRASP - git data ALWAYS matches a state event. -# If archive has different/newer data than prod, it means: -# - A state event authorized those commits at some point -# - Archive is actually MORE up-to-date than prod -# - Migration should use archive data (it's already correct) -# -# USAGE: -# ./22-compare-git-data.sh -# -# EXAMPLES: -# ./22-compare-git-data.sh /var/lib/grasp-relay/git /var/lib/ngit-grasp/git \ -# output/comparison/complete-prod-incomplete-archive.txt output/comparison -# -# INPUT: -# prod-git-base Base directory for prod git repos (e.g., /var/lib/grasp-relay/git) -# archive-git-base Base directory for archive git repos (e.g., /var/lib/ngit-grasp/git) -# repo-list File with repos to compare (format: "repo | npub | ...") -# -# OUTPUT: -# /git-ancestry.tsv - Tab-separated values: -# reponpubrelationshipdetails -# -# Relationship values: -# archive-ahead - Archive has all prod commits plus more (GOOD - use archive) -# in-sync - Both have identical commits -# prod-ahead - Prod has commits archive is missing (needs re-sync) -# diverged - Both have unique commits (manual review) -# archive-only - Only archive has git data -# prod-only - Only prod has git data -# both-empty - Neither has git data -# -# PREREQUISITES: -# - git (for ref comparison) -# - Read access to both git directories (may need sudo) -# -# RUNTIME: Depends on number of repos to compare -# -# SEE ALSO: -# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide -# 21-compare-relays.sh - Phase 3b script that identifies repos to compare -# - -set -euo pipefail - -# Colors for output (disabled if not a terminal) -if [[ -t 1 ]]; then - RED='\033[0;31m' - GREEN='\033[0;32m' - YELLOW='\033[0;33m' - BLUE='\033[0;34m' - NC='\033[0m' -else - RED='' - GREEN='' - YELLOW='' - BLUE='' - NC='' -fi - -log_info() { - echo -e "${BLUE}[INFO]${NC} $*" >&2 -} - -log_success() { - echo -e "${GREEN}[OK]${NC} $*" >&2 -} - -log_warn() { - echo -e "${YELLOW}[WARN]${NC} $*" >&2 -} - -log_error() { - echo -e "${RED}[ERROR]${NC} $*" >&2 -} - -log_progress() { - echo -ne "\r${BLUE}[PROGRESS]${NC} $*" >&2 -} - -usage() { - echo "Usage: $0 " - echo "" - echo "Arguments:" - echo " prod-git-base Base directory for prod git repos" - echo " archive-git-base Base directory for archive git repos" - echo " repo-list File with repos to compare (format: 'repo | npub | ...')" - echo " output-dir Directory to store output files" - echo "" - echo "Examples:" - echo " $0 /var/lib/grasp-relay/git /var/lib/ngit-grasp/git \\" - echo " output/comparison/complete-prod-incomplete-archive.txt output/comparison" - echo "" - echo "Output:" - echo " git-ancestry.tsv - TSV with: repo, npub, relationship, details" - exit 1 -} - -# Get all branch refs from a git directory -# Args: $1=git_dir -# Returns: sorted list of "ref_name commit_hash" lines -get_git_refs() { - local git_dir="$1" - - if [[ ! -d "$git_dir" ]]; then - return - fi - - git --git-dir="$git_dir" show-ref --heads 2>/dev/null | sort || true -} - -# Check if commit A is ancestor of commit B -# Args: $1=git_dir, $2=commit_a, $3=commit_b -# Returns: 0 if A is ancestor of B, 1 otherwise -is_ancestor() { - local git_dir="$1" - local commit_a="$2" - local commit_b="$3" - - git --git-dir="$git_dir" merge-base --is-ancestor "$commit_a" "$commit_b" 2>/dev/null -} - -# Compare git data between prod and archive for a single repo -# Args: $1=prod_git_dir, $2=archive_git_dir -# Returns: relationship string -compare_repo_git() { - local prod_git="$1" - local archive_git="$2" - - local prod_exists=false - local archive_exists=false - - [[ -d "$prod_git" ]] && prod_exists=true - [[ -d "$archive_git" ]] && archive_exists=true - - # Handle cases where one or both don't exist - if [[ "$prod_exists" == "false" && "$archive_exists" == "false" ]]; then - echo "both-empty" - return - fi - - if [[ "$prod_exists" == "false" ]]; then - echo "archive-only" - return - fi - - if [[ "$archive_exists" == "false" ]]; then - echo "prod-only" - return - fi - - # Both exist - get refs - local prod_refs archive_refs - prod_refs=$(get_git_refs "$prod_git") - archive_refs=$(get_git_refs "$archive_git") - - # Handle empty refs - if [[ -z "$prod_refs" && -z "$archive_refs" ]]; then - echo "both-empty" - return - fi - - if [[ -z "$prod_refs" ]]; then - echo "archive-only" - return - fi - - if [[ -z "$archive_refs" ]]; then - echo "prod-only" - return - fi - - # Compare refs - check if they're identical - if [[ "$prod_refs" == "$archive_refs" ]]; then - echo "in-sync" - return - fi - - # Refs differ - need to check ancestry - # Strategy: For each branch, check if one is ancestor of the other - # If all archive branches are ahead of or equal to prod branches, archive is ahead - # If all prod branches are ahead of or equal to archive branches, prod is ahead - # Otherwise, they've diverged - - local archive_ahead=true - local prod_ahead=true - local has_common_branch=false - - # Create temporary file to use archive as reference repo for ancestry checks - # We need a repo that has both sets of commits to check ancestry - # Use archive since it's the target and should have the superset - - # Check each prod branch against archive - while read -r prod_hash prod_ref; do - [[ -z "$prod_hash" ]] && continue - - # Get the same branch from archive - local archive_hash - archive_hash=$(echo "$archive_refs" | grep " $prod_ref$" | awk '{print $1}' || echo "") - - if [[ -z "$archive_hash" ]]; then - # Branch exists in prod but not archive - prod has something archive doesn't - # But this could be a deleted branch, so don't immediately say prod is ahead - continue - fi - - has_common_branch=true - - if [[ "$prod_hash" == "$archive_hash" ]]; then - # Same commit - neither ahead for this branch - continue - fi - - # Different commits - check ancestry - # First, try to check if prod is ancestor of archive (archive ahead) - if is_ancestor "$archive_git" "$prod_hash" "$archive_hash" 2>/dev/null; then - # Prod commit is ancestor of archive commit - archive is ahead for this branch - prod_ahead=false - elif is_ancestor "$archive_git" "$archive_hash" "$prod_hash" 2>/dev/null; then - # Archive commit is ancestor of prod commit - prod is ahead for this branch - archive_ahead=false - else - # Neither is ancestor - diverged - archive_ahead=false - prod_ahead=false - fi - done <<< "$prod_refs" - - # Also check for branches only in archive (archive has extra branches) - while read -r archive_hash archive_ref; do - [[ -z "$archive_hash" ]] && continue - - local prod_hash - prod_hash=$(echo "$prod_refs" | grep " $archive_ref$" | awk '{print $1}' || echo "") - - if [[ -z "$prod_hash" ]]; then - # Branch exists in archive but not prod - archive has something prod doesn't - # This means archive is ahead (has extra branches) - prod_ahead=false - fi - done <<< "$archive_refs" - - # Determine final relationship - if [[ "$has_common_branch" == "false" ]]; then - # No common branches - completely different - echo "diverged" - return - fi - - if [[ "$archive_ahead" == "true" && "$prod_ahead" == "false" ]]; then - echo "archive-ahead" - elif [[ "$prod_ahead" == "true" && "$archive_ahead" == "false" ]]; then - echo "prod-ahead" - elif [[ "$archive_ahead" == "true" && "$prod_ahead" == "true" ]]; then - # Both true means all common branches are identical - # But one might have extra branches - echo "in-sync" - else - echo "diverged" - fi -} - -# Main -main() { - if [[ $# -ne 4 ]]; then - usage - fi - - local prod_git_base="$1" - local archive_git_base="$2" - local repo_list="$3" - local output_dir="$4" - - # Validate inputs - if [[ ! -d "$prod_git_base" ]]; then - log_error "Prod git base directory not found: $prod_git_base" - exit 1 - fi - - if [[ ! -d "$archive_git_base" ]]; then - log_error "Archive git base directory not found: $archive_git_base" - exit 1 - fi - - if [[ ! -f "$repo_list" ]]; then - log_error "Repo list file not found: $repo_list" - exit 1 - fi - - log_info "=== Git Data Comparison ===" - log_info "Prod git base: $prod_git_base" - log_info "Archive git base: $archive_git_base" - log_info "Repo list: $repo_list" - log_info "Output: $output_dir" - log_info "Started: $(date)" - echo "" - - # Create output directory - mkdir -p "$output_dir" - - # Output file - local tsv_file="$output_dir/git-ancestry.tsv" - - # Initialize TSV with header - echo -e "repo\tnpub\trelationship\tdetails" > "$tsv_file" - - # Count repos - local total_repos - total_repos=$(grep -c -v '^#' "$repo_list" 2>/dev/null || echo "0") - log_info "Processing $total_repos repos..." - echo "" - - # Counters - local count=0 - local count_archive_ahead=0 - local count_in_sync=0 - local count_prod_ahead=0 - local count_diverged=0 - local count_archive_only=0 - local count_prod_only=0 - local count_both_empty=0 - - # Process each repo - while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do - # Skip comments and empty lines - [[ "$repo" =~ ^# ]] && continue - [[ -z "$repo" ]] && continue - - # Clean up whitespace - repo="${repo// /}" - npub="${npub// /}" - - [[ -z "$repo" || -z "$npub" ]] && continue - - count=$((count + 1)) - - # Build git paths - local prod_git="$prod_git_base/${npub}/${repo}.git" - local archive_git="$archive_git_base/${npub}/${repo}.git" - - # Compare - local relationship details="" - relationship=$(compare_repo_git "$prod_git" "$archive_git") - - # Count by relationship - case "$relationship" in - archive-ahead) count_archive_ahead=$((count_archive_ahead + 1)) ;; - in-sync) count_in_sync=$((count_in_sync + 1)) ;; - prod-ahead) count_prod_ahead=$((count_prod_ahead + 1)) ;; - diverged) count_diverged=$((count_diverged + 1)) ;; - archive-only) count_archive_only=$((count_archive_only + 1)) ;; - prod-only) count_prod_only=$((count_prod_only + 1)) ;; - both-empty) count_both_empty=$((count_both_empty + 1)) ;; - esac - - # Output TSV line - printf '%s\t%s\t%s\t%s\n' "$repo" "$npub" "$relationship" "$details" >> "$tsv_file" - - # Progress indicator every 10 repos - if [[ $((count % 10)) -eq 0 ]]; then - log_progress "Processed $count/$total_repos repos..." - fi - done < "$repo_list" - - # Clear progress line - echo "" >&2 - - # Summary - echo "" - log_info "=== Comparison Summary ===" - log_success "Archive ahead (use archive data): $count_archive_ahead" - log_success "In sync: $count_in_sync" - log_warn "Prod ahead (needs re-sync): $count_prod_ahead" - log_error "Diverged (manual review): $count_diverged" - log_info "Archive only: $count_archive_only" - log_info "Prod only: $count_prod_only" - log_info "Both empty: $count_both_empty" - echo "" - log_info "Total: $count repos" - log_info "Output: $tsv_file" -} - -main "$@" diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh deleted file mode 100755 index d762aae..0000000 --- a/docs/how-to/migration-scripts/30-extract-parse-failures.sh +++ /dev/null @@ -1,774 +0,0 @@ -#!/usr/bin/env bash -# -# 30-extract-parse-failures.sh - Extract parse failure events from systemd logs -# -# PHASE 4a of the GRASP relay to ngit-grasp migration analysis pipeline. -# Extracts structured [PARSE_FAIL] log entries AND "Invalid announcement" -# rejections from journalctl. -# -# USAGE: -# ./30-extract-parse-failures.sh [options] -# -# EXAMPLES: -# # Extract from ngit-grasp service (last 30 days, default) -# ./30-extract-parse-failures.sh ngit-grasp.service output/logs -# -# # Extract with custom time range -# ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-01" -# -# # Extract from specific time window -# ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-15" --until "2026-01-22" -# -# OPTIONS: -# --since Start date for log extraction (default: 30 days ago) -# --until End date for log extraction (default: now) -# --dry-run Show what would be extracted without writing files -# -# ENRICHMENT: -# The script automatically enriches parse failures with repo/npub information -# by extracting from "Added rejected announcement" log entries which include -# pubkey and identifier fields. Hex pubkeys are converted to npub format using -# `nak encode npub ` if the nak tool is available. -# -# OUTPUT: -# /parse-failures.txt -# -# OUTPUT FORMAT (TSV): -# event_idkindreasonreponpub -# -# EXPECTED LOG FORMATS: -# The script looks for three types of log entries: -# -# 1. Structured [PARSE_FAIL] entries: -# 2026-01-22T10:30:45Z ngit-grasp[1234]: [PARSE_FAIL] kind=30618 event_id=abc123... reason="invalid refs format" repo=myrepo npub=npub1... -# -# 2. "Invalid announcement" rejections (write policy): -# Event rejected by write policy event_id=abc123... relay=wss://... kind=30617 reason=Invalid announcement: multiple clone tags found... -# -# 3. "Added rejected announcement" entries (for enrichment): -# Added rejected announcement to two-tier index event_id=abc123... kind=30617 identifier=myrepo pubkey=hex... -# These entries provide pubkey and identifier for enriching write policy rejections. -# -# NOTE: Builder logs ("Rejected repository announcement note1xxx:") are NOT extracted -# because they use bech32 (note1) IDs while write policy logs use hex IDs. Extracting -# both would cause double-counting since deduplication only works within each format. -# Write policy logs contain the same events, so we don't lose any data. -# -# Required fields: kind, event_id, reason -# Enrichment fields: repo (identifier), npub (converted from hex pubkey) -# -# DEPENDENCY: -# This script requires logging improvements in ngit-grasp to emit structured -# [PARSE_FAIL] log entries. Until those are implemented, this script will -# find no matching entries (which is handled gracefully). -# -# "Invalid announcement" rejections are logged by the write policy and -# should be present in any ngit-grasp deployment. -# -# See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section) -# -# Expected Rust logging code for [PARSE_FAIL]: -# tracing::warn!( -# target: "migration", -# "[PARSE_FAIL] kind={} event_id={} reason=\"{}\" repo={} npub={}", -# event.kind, event.id, reason, identifier, npub -# ); -# -# PREREQUISITES: -# - journalctl (systemd) -# - grep, awk, sed (standard Unix tools) -# - Access to systemd journal (may require sudo or journal group membership) -# -# RUNTIME: Depends on log volume, typically < 30 seconds -# -# SEE ALSO: -# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide -# 31-extract-purgatory-expiry.sh - Companion script for purgatory expiry logs -# - -set -euo pipefail - -# Get script directory for sourcing helpers -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -# Source the service validation helper -if [[ -f "$SCRIPT_DIR/validate-service.sh" ]]; then - source "$SCRIPT_DIR/validate-service.sh" -fi - -# Colors for output (disabled if not a terminal) -if [[ -t 1 ]]; then - RED='\033[0;31m' - GREEN='\033[0;32m' - YELLOW='\033[0;33m' - BLUE='\033[0;34m' - NC='\033[0m' -else - RED='' - GREEN='' - YELLOW='' - BLUE='' - NC='' -fi - -log_info() { - echo -e "${BLUE}[INFO]${NC} $*" >&2 -} - -log_success() { - echo -e "${GREEN}[OK]${NC} $*" >&2 -} - -log_warn() { - echo -e "${YELLOW}[WARN]${NC} $*" >&2 -} - -log_error() { - echo -e "${RED}[ERROR]${NC} $*" >&2 -} - -usage() { - echo "Usage: $0 [options]" - echo "" - echo "Arguments:" - echo " service-name Systemd service name (e.g., ngit-grasp.service)" - echo " output-dir Directory to store extracted log data" - echo "" - echo "Options:" - echo " --since Start date (default: 30 days ago)" - echo " --until End date (default: now)" - echo " --dry-run Show what would be extracted without writing" - echo "" - echo "Examples:" - echo " $0 ngit-grasp.service output/logs" - echo " $0 ngit-grasp.service output/logs --since '2026-01-01'" - echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'" - echo "" - echo "Expected log formats:" - echo " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." - echo " Event rejected by write policy event_id=abc123 ... kind=30617 reason=Invalid announcement: ..." - echo "" - echo "Enrichment:" - echo " Parse failures are automatically enriched with repo/npub from" - echo " 'Added rejected announcement' log entries. Hex pubkeys are converted" - echo " to npub format using 'nak encode npub' if available." - exit 1 -} - -# ============================================================================= -# AWK-BASED BATCH PARSING FUNCTIONS -# ============================================================================= -# These functions use awk for efficient batch processing instead of per-line -# grep calls. This provides ~400x speedup for large log files. -# -# NOTE: parse_builder_rejection_line() was removed to fix double-counting bug. -# Builder logs use bech32 (note1) IDs while write policy logs use hex IDs. -# Since deduplication only works within each format, extracting both caused -# the same event to be counted twice. Write policy logs contain the same -# events, so we don't lose any data by only extracting from that source. - -# Parse [PARSE_FAIL] log lines in batch using awk -# Input: file containing log lines with [PARSE_FAIL] -# Output: TSV lines: event_idkindreasonreponpub -parse_parse_fail_batch() { - local input_file="$1" - awk ' - { - # Extract kind=VALUE - kind = "" - if (match($0, /kind=([0-9]+)/, m)) kind = m[1] - - # Extract event_id=VALUE (hex string) - event_id = "" - if (match($0, /event_id=([a-f0-9]+)/, m)) event_id = m[1] - - # Extract reason="VALUE" (quoted string) - reason = "" - if (match($0, /reason="([^"]*)"/, m)) reason = m[1] - - # Extract repo=VALUE (optional) - repo = "" - if (match($0, /repo=([^ ]+)/, m)) repo = m[1] - - # Extract npub=VALUE (optional) - npub = "" - if (match($0, /npub=([^ ]+)/, m)) npub = m[1] - - # Output if we have required fields - if (kind != "" && event_id != "" && reason != "") { - print event_id "\t" kind "\t" reason "\t" repo "\t" npub - } - } - ' "$input_file" -} - -# Parse "Invalid announcement" rejection log lines in batch using awk -# Input: file containing "Event rejected by write policy" log lines -# Output: TSV lines: event_idkindreason -parse_write_policy_rejection_batch() { - local input_file="$1" - awk ' - { - # Extract event_id=VALUE (hex string) - event_id = "" - if (match($0, /event_id=([a-f0-9]+)/, m)) event_id = m[1] - - # Extract kind=VALUE - kind = "" - if (match($0, /kind=([0-9]+)/, m)) kind = m[1] - - # Extract reason=VALUE (everything after "reason=") - reason = "" - if (match($0, /reason=(.*)$/, m)) reason = m[1] - - # Output if we have required fields (repo and npub are empty) - if (kind != "" && event_id != "" && reason != "") { - print event_id "\t" kind "\t" reason "\t\t" - } - } - ' "$input_file" -} - -# Parse "Added rejected announcement" log lines in batch using awk -# Input: file containing "Added rejected announcement to two-tier index" log lines -# Output: TSV lines: event_ididentifierpubkey_hex -parse_rejected_announcement_batch() { - local input_file="$1" - awk ' - { - # Extract event_id=VALUE (hex string) - event_id = "" - if (match($0, /event_id=([a-f0-9]+)/, m)) event_id = m[1] - - # Extract identifier=VALUE (repo name) - identifier = "" - if (match($0, /identifier=([^ ]+)/, m)) identifier = m[1] - - # Extract pubkey=VALUE (hex string) - pubkey = "" - if (match($0, /pubkey=([a-f0-9]+)/, m)) pubkey = m[1] - - # Output if we have all required fields - if (event_id != "" && identifier != "" && pubkey != "") { - print event_id "\t" identifier "\t" pubkey - } - } - ' "$input_file" -} - -# Enrich parse failures with repo/npub by looking up event_id in "Added rejected announcement" log entries -# This is critical because "Invalid announcement" rejections only log event_id and kind, -# not the repo name or npub. Without enrichment, Phase 5 shows event_id|kind instead -# of repo|npub in action-required.txt, making the output unusable. -# -# Arguments: -# $1 - parse failures file to enrich (modified in place) -# $2 - lookup file containing event_id -> identifier|pubkey mappings from logs -# -# The function: -# 1. Uses the lookup table built from "Added rejected announcement" log entries -# 2. For each parse failure with empty repo/npub, looks up the event_id -# 3. Populates repo and npub columns from the lookup -# 4. Converts hex pubkeys to npub format using `nak encode npub` if available -# -# OPTIMIZATION: This function uses batch processing for efficiency: -# - Uses awk for O(n) join instead of per-line grep (O(n*m)) -# - Batches all pubkey->npub conversions in a single nak call -# - This reduces runtime from minutes to seconds for large datasets -enrich_with_repo_npub() { - local parse_failures_file="$1" - local lookup_file="$2" - - # Validate lookup file exists and has content - if [[ ! -f "$lookup_file" ]] || [[ ! -s "$lookup_file" ]]; then - log_warn "No enrichment data available - repo/npub columns will remain empty" - return 0 - fi - - log_info "Enriching parse failures with repo/npub from log entries..." - - # Check if we have nak for pubkey->npub conversion - local can_convert_npub=false - if command -v nak &> /dev/null; then - can_convert_npub=true - log_info " Using 'nak' for pubkey->npub conversion" - else - log_warn " 'nak' not found - will use hex pubkeys instead of npub" - fi - - local lookup_count - lookup_count=$(wc -l < "$lookup_file") - lookup_count="${lookup_count//[^0-9]/}" - log_info " Lookup table has $lookup_count entries" - - # STEP 1: Extract unique pubkeys that need conversion - # Get pubkeys from lookup file (column 3), deduplicate - local unique_pubkeys_file npub_map_file - unique_pubkeys_file=$(mktemp) - npub_map_file=$(mktemp) - - cut -f3 "$lookup_file" | sort -u > "$unique_pubkeys_file" - local unique_pubkey_count - unique_pubkey_count=$(wc -l < "$unique_pubkeys_file") - unique_pubkey_count="${unique_pubkey_count//[^0-9]/}" - log_info " Converting $unique_pubkey_count unique pubkeys to npub format..." - - # STEP 2: Batch convert all pubkeys to npub in a single nak call - # nak reads hex pubkeys from stdin (one per line) and outputs npubs - if [[ "$can_convert_npub" == true && "$unique_pubkey_count" -gt 0 ]]; then - # Create mapping file: pubkey_hexnpub - # nak encode npub reads from stdin and outputs one npub per line - paste "$unique_pubkeys_file" <(nak encode npub < "$unique_pubkeys_file" 2>/dev/null) > "$npub_map_file" || { - # Fallback: if batch conversion fails, use hex pubkeys - log_warn " Batch npub conversion failed, using hex pubkeys" - awk '{print $1 "\t" $1}' "$unique_pubkeys_file" > "$npub_map_file" - } - else - # No nak available, use hex pubkeys as-is - awk '{print $1 "\t" $1}' "$unique_pubkeys_file" > "$npub_map_file" - fi - - rm -f "$unique_pubkeys_file" - - # STEP 3: Use awk for efficient join (O(n) instead of O(n*m) grep per line) - # This joins parse_failures with lookup_file on event_id, then with npub_map on pubkey - local enriched_file - enriched_file=$(mktemp) - - # Copy header lines - grep '^#' "$parse_failures_file" > "$enriched_file" 2>/dev/null || true - - # Use awk to perform the join efficiently - # Input files (order matters for ARGIND): - # 1. npub_map_file: pubkey_hexnpub - # 2. lookup_file: event_ididentifierpubkey_hex - # 3. parse_failures_file: event_idkindreasonreponpub - awk -F'\t' -v OFS='\t' ' - # Track which file we are processing - FNR==1 { file_num++ } - - # First file: npub_map (pubkey_hex -> npub) - file_num==1 { - npub_map[$1] = $2 - next - } - # Second file: lookup (event_id -> identifier, pubkey_hex) - file_num==2 { - lookup_repo[$1] = $2 - lookup_pubkey[$1] = $3 - next - } - # Third file: parse_failures - /^#/ { next } # Skip headers (already copied) - { - event_id = $1 - kind = $2 - reason = $3 - repo = $4 - npub = $5 - - # If repo/npub empty, try to enrich from lookup - if (repo == "" && event_id in lookup_repo) { - repo = lookup_repo[event_id] - } - if (npub == "" && event_id in lookup_pubkey) { - pubkey = lookup_pubkey[event_id] - if (pubkey in npub_map) { - npub = npub_map[pubkey] - } else { - npub = pubkey # Fallback to hex - } - } - - print event_id, kind, reason, repo, npub - } - ' "$npub_map_file" "$lookup_file" "$parse_failures_file" >> "$enriched_file" - - rm -f "$npub_map_file" - - # Count enriched entries - local enriched_count total_count - total_count=$(grep -v '^#' "$parse_failures_file" | wc -l) - total_count="${total_count//[^0-9]/}" - # Count entries that have non-empty repo AND npub after enrichment - enriched_count=$(grep -v '^#' "$enriched_file" | awk -F'\t' '$4 != "" && $5 != ""' | wc -l) - enriched_count="${enriched_count//[^0-9]/}" - - # Replace original with enriched version - mv "$enriched_file" "$parse_failures_file" - - log_info " Enriched $enriched_count of $total_count parse failures with repo/npub" - log_success "Enrichment complete" -} - -# Parse "Added rejected announcement" log entries to build enrichment lookup table -# Input: log line containing "Added rejected announcement to two-tier index" -# Output: TSV line: event_ididentifierpubkey_hex -parse_rejected_announcement_line() { - local line="$1" - - local event_id identifier pubkey_hex - - # Extract event_id=VALUE (hex string) - event_id=$(echo "$line" | grep -oP 'event_id=\K[a-f0-9]+' || echo "") - - # Extract identifier=VALUE (repo name) - identifier=$(echo "$line" | grep -oP 'identifier=\K[^ ]+' || echo "") - - # Extract pubkey=VALUE (hex string) - pubkey_hex=$(echo "$line" | grep -oP 'pubkey=\K[a-f0-9]+' || echo "") - - # Only output if we have all required fields - if [[ -n "$event_id" && -n "$identifier" && -n "$pubkey_hex" ]]; then - printf '%s\t%s\t%s\n' "$event_id" "$identifier" "$pubkey_hex" - fi -} - -# Main -main() { - if [[ $# -lt 2 ]]; then - usage - fi - - local service="$1" - local output_dir="$2" - shift 2 - - # Default time range: last 30 days - local since_date - since_date=$(date -d "30 days ago" "+%Y-%m-%d" 2>/dev/null || date -v-30d "+%Y-%m-%d" 2>/dev/null || echo "") - local until_date="" - local dry_run=false - - # Parse options - while [[ $# -gt 0 ]]; do - case "$1" in - --since) - since_date="$2" - shift 2 - ;; - --until) - until_date="$2" - shift 2 - ;; - --dry-run) - dry_run=true - shift - ;; - *) - log_error "Unknown option: $1" - usage - ;; - esac - done - - # Validate service name format - if [[ ! "$service" =~ \.service$ ]]; then - service="${service}.service" - fi - - # Validate service is appropriate for structured logging - # This prevents the common mistake of using ngit-relay instead of ngit-grasp - if type validate_service_for_structured_logging &>/dev/null; then - # Use non-interactive mode if not a terminal, skip log check (we'll do our own) - local interactive="true" - [[ ! -t 0 ]] && interactive="false" - - if ! validate_service_for_structured_logging "$service" "false" "$interactive"; then - log_error "Service validation failed. Use an ngit-grasp service for structured logging." - exit 1 - fi - else - # Fallback validation if helper not available - if [[ "$service" == *"ngit-relay"* ]]; then - log_error "Service name appears to be ngit-relay: $service" - log_error "Structured logging ([PARSE_FAIL]) only exists in ngit-grasp services." - log_error "Please use the ngit-grasp archive service instead." - log_error "" - log_error "To find the correct service:" - log_error " systemctl list-units 'ngit-grasp*' --all" - exit 1 - fi - fi - - log_info "Extracting parse failures from systemd logs" - log_info "Service: $service" - log_info "Output: $output_dir" - log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" - - # Check if journalctl is available - if ! command -v journalctl &> /dev/null; then - log_error "journalctl not found. This script requires systemd." - exit 1 - fi - - # Validate service exists (check if journalctl can find any logs for it) - # Note: We don't require the service to be running, just that it has logs - if ! journalctl --no-pager -u "$service" -n 1 &>/dev/null; then - log_warn "Could not query logs for service: $service" - log_warn "This may indicate the service doesn't exist or you lack permissions." - log_warn "" - log_warn "To list available ngit-grasp services:" - log_warn " systemctl list-units 'ngit-grasp*' --all" - log_warn " journalctl --list-boots # Check if you have journal access" - log_warn "" - # Continue anyway - the service might exist but have no logs yet - fi - - # Build journalctl command - local journal_cmd="journalctl -u $service --no-pager -o short-iso" - - if [[ -n "$since_date" ]]; then - journal_cmd="$journal_cmd --since '$since_date'" - fi - - if [[ -n "$until_date" ]]; then - journal_cmd="$journal_cmd --until '$until_date'" - fi - - log_info "Running: $journal_cmd | grep '[PARSE_FAIL]' or 'Invalid announcement'" - - if [[ "$dry_run" == true ]]; then - log_info "[DRY RUN] Would extract to: $output_dir/parse-failures.txt" - - # Show sample of what would be extracted - log_info "Checking for matching log entries..." - local parse_fail_count invalid_announcement_count - parse_fail_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0") - parse_fail_count="${parse_fail_count//[^0-9]/}" # Strip non-numeric characters - parse_fail_count="${parse_fail_count:-0}" - - invalid_announcement_count=$(eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep -c 'Invalid announcement' || echo "0") - invalid_announcement_count="${invalid_announcement_count//[^0-9]/}" - invalid_announcement_count="${invalid_announcement_count:-0}" - - log_info "Found $parse_fail_count [PARSE_FAIL] entries" - log_info "Found $invalid_announcement_count 'Invalid announcement' rejections" - - if [[ "$parse_fail_count" -eq 0 && "$invalid_announcement_count" -eq 0 ]]; then - log_warn "No matching entries found in logs." - log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." - log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" - fi - - exit 0 - fi - - # Create output directory - mkdir -p "$output_dir" - - local output_file="$output_dir/parse-failures.txt" - local temp_file - temp_file=$(mktemp) - - # Extract and parse log entries using streaming (avoids loading all logs into memory) - log_info "Extracting log entries..." - - # Create temp files for intermediate results - local temp_stderr temp_parse_fail temp_write_policy_rejection temp_rejected_announcement - temp_stderr=$(mktemp) - temp_parse_fail=$(mktemp) - temp_write_policy_rejection=$(mktemp) - temp_rejected_announcement=$(mktemp) - - # Extract [PARSE_FAIL] entries directly to temp file (streaming) - log_info " Searching for [PARSE_FAIL] entries..." - eval "$journal_cmd" 2>"$temp_stderr" | grep '\[PARSE_FAIL\]' > "$temp_parse_fail" || true - - local journal_stderr - journal_stderr=$(cat "$temp_stderr" 2>/dev/null || true) - if [[ -n "$journal_stderr" ]]; then - log_warn "journalctl reported: $journal_stderr" - fi - - # Extract "Event rejected by write policy" with "Invalid announcement" (streaming) - # NOTE: We only extract from write policy logs (hex IDs), not builder logs (note1 IDs) - # to avoid double-counting. Both log sources contain the same events. - log_info " Searching for write policy rejections..." - eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep 'Invalid announcement' > "$temp_write_policy_rejection" || true - - # Extract "Added rejected announcement" entries for enrichment (streaming) - # These contain pubkey and identifier which we use to enrich write policy rejections - log_info " Searching for rejected announcement entries (for enrichment)..." - eval "$journal_cmd" 2>/dev/null | grep 'Added rejected announcement to two-tier index' > "$temp_rejected_announcement" || true - - rm -f "$temp_stderr" - - # Check if we found anything - local parse_fail_line_count write_policy_line_count rejected_announcement_line_count - parse_fail_line_count=$(wc -l < "$temp_parse_fail") - parse_fail_line_count="${parse_fail_line_count//[^0-9]/}" - write_policy_line_count=$(wc -l < "$temp_write_policy_rejection") - write_policy_line_count="${write_policy_line_count//[^0-9]/}" - rejected_announcement_line_count=$(wc -l < "$temp_rejected_announcement") - rejected_announcement_line_count="${rejected_announcement_line_count//[^0-9]/}" - - log_info " Found $parse_fail_line_count [PARSE_FAIL] log lines" - log_info " Found $write_policy_line_count write policy rejection log lines" - log_info " Found $rejected_announcement_line_count rejected announcement log lines (for enrichment)" - - local total_invalid_announcement_lines=$write_policy_line_count - - if [[ "$parse_fail_line_count" -eq 0 && "$total_invalid_announcement_lines" -eq 0 ]]; then - log_warn "No matching entries found in logs." - log_warn "" - log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." - log_warn "The script looks for:" - log_warn "" - log_warn " 1. [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." - log_warn " 2. Event rejected by write policy event_id=... kind=30617 reason=Invalid announcement: ..." - log_warn "" - log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" - log_warn "" - - # Create empty output file with header comment - { - echo "# Parse failures and invalid announcements extracted from $service" - echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" - echo "# Extracted: $(date -Iseconds)" - echo "#" - echo "# Includes:" - echo "# - [PARSE_FAIL] structured log entries" - echo "# - \"Invalid announcement\" rejections" - echo "#" - echo "# Format: event_idkindreasonreponpub" - echo "# Note: repo and npub may be empty for some entries" - echo "#" - echo "# NOTE: No matching entries found." - echo "# This is expected if ngit-grasp logging improvements are not yet deployed." - } > "$output_file" - - rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_rejected_announcement" - log_info "Created empty output file: $output_file" - exit 0 - fi - - # Write header - { - echo "# Parse failures and invalid announcements extracted from $service" - echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" - echo "# Extracted: $(date -Iseconds)" - echo "#" - echo "# Includes:" - echo "# - [PARSE_FAIL] structured log entries" - echo "# - \"Invalid announcement\" rejections" - echo "#" - echo "# Format: event_idkindreasonreponpub" - echo "# Note: repo and npub may be empty for some entries" - } > "$output_file" - - # Parse [PARSE_FAIL] entries using batch awk processing - log_info " Parsing [PARSE_FAIL] entries..." - local parse_fail_count=0 - if [[ "$parse_fail_line_count" -gt 0 ]]; then - parse_parse_fail_batch "$temp_parse_fail" >> "$output_file" - parse_fail_count=$(grep -v '^#' "$output_file" | wc -l) - parse_fail_count="${parse_fail_count//[^0-9]/}" - fi - - # Parse write policy rejection entries using batch awk processing - log_info " Parsing write policy rejection entries..." - local write_policy_count=0 - if [[ "$write_policy_line_count" -gt 0 ]]; then - local before_count - before_count=$(grep -v '^#' "$output_file" 2>/dev/null | wc -l || echo "0") - before_count="${before_count//[^0-9]/}" - before_count="${before_count:-0}" - parse_write_policy_rejection_batch "$temp_write_policy_rejection" >> "$output_file" - local after_count - after_count=$(grep -v '^#' "$output_file" 2>/dev/null | wc -l || echo "0") - after_count="${after_count//[^0-9]/}" - after_count="${after_count:-0}" - write_policy_count=$((after_count - before_count)) - fi - - local invalid_announcement_count=$write_policy_count - - # Build enrichment lookup table from "Added rejected announcement" entries - local enrichment_lookup_file - enrichment_lookup_file=$(mktemp) - - log_info " Building enrichment lookup table..." - if [[ "$rejected_announcement_line_count" -gt 0 ]]; then - parse_rejected_announcement_batch "$temp_rejected_announcement" > "$enrichment_lookup_file" - fi - - rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_rejected_announcement" - - # Deduplicate by event_id (first column) - keep first occurrence - log_info " Deduplicating entries..." - local deduped_file - deduped_file=$(mktemp) - # Preserve header lines (starting with #) and deduplicate data lines - grep '^#' "$output_file" > "$deduped_file" - grep -v '^#' "$output_file" | sort -t$'\t' -k1,1 -u >> "$deduped_file" - mv "$deduped_file" "$output_file" - - # Deduplicate enrichment lookup table by event_id - if [[ -s "$enrichment_lookup_file" ]]; then - sort -t$'\t' -k1,1 -u "$enrichment_lookup_file" > "$enrichment_lookup_file.deduped" - mv "$enrichment_lookup_file.deduped" "$enrichment_lookup_file" - fi - - # Enrich with repo/npub from "Added rejected announcement" log entries - # This is critical for usability - without it, action-required.txt shows - # event_id|kind instead of repo|npub, making parse failures unidentifiable - enrich_with_repo_npub "$output_file" "$enrichment_lookup_file" - - rm -f "$enrichment_lookup_file" - - # Count final entries (excluding header lines) - local count - count=$(grep -v '^#' "$output_file" | wc -l) - count="${count//[^0-9]/}" # Strip whitespace - count="${count:-0}" - - rm -f "$temp_file" - - # Summary - echo "" - log_info "=== Extraction Summary ===" - log_info "Service: $service" - log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" - log_success "Extracted $count total entries" - log_info " - [PARSE_FAIL] entries: $parse_fail_count" - log_info " - Invalid announcement rejections: $invalid_announcement_count" - echo "" - log_info "Output file: $output_file" - - if [[ $count -gt 0 ]]; then - echo "" - log_info "Sample entries (first 5):" - # Use a subshell to avoid SIGPIPE issues with set -e - # New format: event_idkindreasonreponpub - (grep -v '^#' "$output_file" | head -5 | while IFS=$'\t' read -r event_id kind reason repo npub; do - echo " kind=$kind event_id=${event_id:0:16}... reason=\"${reason:0:60}...\"" - done) || true - fi - - # Breakdown by kind - if [[ $count -gt 0 ]]; then - echo "" - log_info "Breakdown by event kind:" - # Use a subshell to avoid SIGPIPE issues with set -e - # kind is now column 2 - (grep -v '^#' "$output_file" | awk -F'\t' '{print $2}' | sort | uniq -c | sort -rn | while read -r cnt kind; do - echo " kind $kind: $cnt failures" - done) || true - fi - - # Breakdown by reason pattern (for invalid announcements) - if [[ $invalid_announcement_count -gt 0 ]]; then - echo "" - log_info "Breakdown by reason pattern:" - # Extract the main reason type (before the colon details) - (grep -v '^#' "$output_file" | awk -F'\t' '{print $3}' | sed 's/:.*//' | sort | uniq -c | sort -rn | head -10 | while read -r cnt reason; do - echo " $reason: $cnt" - done) || true - fi - - # Explicit success exit - exit 0 -} - -main "$@" diff --git a/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh deleted file mode 100755 index a0c8ad0..0000000 --- a/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh +++ /dev/null @@ -1,408 +0,0 @@ -#!/usr/bin/env bash -# -# 31-extract-purgatory-expiry.sh - Extract purgatory expiry events from systemd logs -# -# PHASE 4b of the GRASP relay to ngit-grasp migration analysis pipeline. -# Extracts structured [PURGATORY_EXPIRED] log entries from journalctl. -# -# USAGE: -# ./31-extract-purgatory-expiry.sh [options] -# -# EXAMPLES: -# # Extract from ngit-grasp service (last 30 days, default) -# ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs -# -# # Extract with custom time range -# ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs --since "2026-01-01" -# -# # Extract from specific time window -# ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs --since "2026-01-15" --until "2026-01-22" -# -# OPTIONS: -# --since Start date for log extraction (default: 30 days ago) -# --until End date for log extraction (default: now) -# --dry-run Show what would be extracted without writing files -# -# OUTPUT: -# /purgatory-expired.txt -# -# OUTPUT FORMAT (TSV): -# reponpubtimestampreason -# -# EXPECTED LOG FORMAT: -# The script looks for structured log entries in this format: -# -# 2026-01-22T10:30:45Z ngit-grasp[1234]: [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason="clone URL unreachable after 7 days" -# -# Required fields: repo, npub -# Optional fields: reason (explains why purgatory expired) -# -# BACKGROUND: -# "Purgatory" is the state where ngit-grasp has received an announcement event -# but cannot yet sync the git data (e.g., clone URL unreachable, git server down). -# After a configurable timeout (default 7 days), the repository is marked as -# expired and removed from purgatory. -# -# Purgatory expiry during migration analysis indicates repositories that: -# - Had valid announcements on the production relay -# - Could not be synced to the archive relay -# - May need manual intervention or investigation -# -# DEPENDENCY: -# This script requires logging improvements in ngit-grasp to emit structured -# [PURGATORY_EXPIRED] log entries. Until those are implemented, this script -# will find no matching entries (which is handled gracefully). -# -# See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section) -# -# Expected Rust logging code: -# tracing::warn!( -# target: "migration", -# "[PURGATORY_EXPIRED] repo={} npub={} reason=\"{}\"", -# identifier, npub, reason -# ); -# -# PREREQUISITES: -# - journalctl (systemd) -# - grep, awk (standard Unix tools) -# - Access to systemd journal (may require sudo or journal group membership) -# -# RUNTIME: Depends on log volume, typically < 30 seconds -# -# SEE ALSO: -# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide -# 30-extract-parse-failures.sh - Companion script for parse failure logs -# - -set -euo pipefail - -# Get script directory for sourcing helpers -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -# Source the service validation helper -if [[ -f "$SCRIPT_DIR/validate-service.sh" ]]; then - source "$SCRIPT_DIR/validate-service.sh" -fi - -# Colors for output (disabled if not a terminal) -if [[ -t 1 ]]; then - RED='\033[0;31m' - GREEN='\033[0;32m' - YELLOW='\033[0;33m' - BLUE='\033[0;34m' - NC='\033[0m' -else - RED='' - GREEN='' - YELLOW='' - BLUE='' - NC='' -fi - -log_info() { - echo -e "${BLUE}[INFO]${NC} $*" >&2 -} - -log_success() { - echo -e "${GREEN}[OK]${NC} $*" >&2 -} - -log_warn() { - echo -e "${YELLOW}[WARN]${NC} $*" >&2 -} - -log_error() { - echo -e "${RED}[ERROR]${NC} $*" >&2 -} - -usage() { - echo "Usage: $0 [options]" - echo "" - echo "Arguments:" - echo " service-name Systemd service name (e.g., ngit-grasp.service)" - echo " output-dir Directory to store extracted log data" - echo "" - echo "Options:" - echo " --since Start date (default: 30 days ago)" - echo " --until End date (default: now)" - echo " --dry-run Show what would be extracted without writing" - echo "" - echo "Examples:" - echo " $0 ngit-grasp.service output/logs" - echo " $0 ngit-grasp.service output/logs --since '2026-01-01'" - echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'" - echo "" - echo "Expected log format:" - echo " [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason=\"...\"" - exit 1 -} - -# Parse a single log line and extract fields -# Input: log line containing [PURGATORY_EXPIRED] -# Output: TSV line: reponpubtimestampreason -parse_log_line() { - local line="$1" - - # Extract timestamp from the beginning of the log line - # Format: 2026-01-22T10:30:45+0000 or similar ISO format - local timestamp repo npub reason - - # Extract ISO timestamp from beginning of line - timestamp=$(echo "$line" | grep -oP '^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}' || echo "") - - # Extract repo=VALUE (unquoted identifier) - repo=$(echo "$line" | grep -oP 'repo=\K[^ ]+' || echo "") - - # Extract npub=VALUE (npub1... format) - npub=$(echo "$line" | grep -oP 'npub=\K[^ ]+' || echo "") - - # Extract reason="VALUE" (quoted string, optional) - reason=$(echo "$line" | grep -oP 'reason="\K[^"]*' || echo "") - - # Only output if we have the required fields - if [[ -n "$repo" && -n "$npub" ]]; then - printf '%s\t%s\t%s\t%s\n' "$repo" "$npub" "$timestamp" "$reason" - fi -} - -# Main -main() { - if [[ $# -lt 2 ]]; then - usage - fi - - local service="$1" - local output_dir="$2" - shift 2 - - # Default time range: last 30 days - local since_date - since_date=$(date -d "30 days ago" "+%Y-%m-%d" 2>/dev/null || date -v-30d "+%Y-%m-%d" 2>/dev/null || echo "") - local until_date="" - local dry_run=false - - # Parse options - while [[ $# -gt 0 ]]; do - case "$1" in - --since) - since_date="$2" - shift 2 - ;; - --until) - until_date="$2" - shift 2 - ;; - --dry-run) - dry_run=true - shift - ;; - *) - log_error "Unknown option: $1" - usage - ;; - esac - done - - # Validate service name format - if [[ ! "$service" =~ \.service$ ]]; then - service="${service}.service" - fi - - # Validate service is appropriate for structured logging - # This prevents the common mistake of using ngit-relay instead of ngit-grasp - if type validate_service_for_structured_logging &>/dev/null; then - # Use non-interactive mode if not a terminal, skip log check (we'll do our own) - local interactive="true" - [[ ! -t 0 ]] && interactive="false" - - if ! validate_service_for_structured_logging "$service" "false" "$interactive"; then - log_error "Service validation failed. Use an ngit-grasp service for structured logging." - exit 1 - fi - else - # Fallback validation if helper not available - if [[ "$service" == *"ngit-relay"* ]]; then - log_error "Service name appears to be ngit-relay: $service" - log_error "Structured logging ([PURGATORY_EXPIRED]) only exists in ngit-grasp services." - log_error "Please use the ngit-grasp archive service instead." - log_error "" - log_error "To find the correct service:" - log_error " systemctl list-units 'ngit-grasp*' --all" - exit 1 - fi - fi - - log_info "Extracting purgatory expiry events from systemd logs" - log_info "Service: $service" - log_info "Output: $output_dir" - log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" - - # Check if journalctl is available - if ! command -v journalctl &> /dev/null; then - log_error "journalctl not found. This script requires systemd." - exit 1 - fi - - # Validate service exists (check if journalctl can find any logs for it) - # Note: We don't require the service to be running, just that it has logs - if ! journalctl --no-pager -u "$service" -n 1 &>/dev/null; then - log_warn "Could not query logs for service: $service" - log_warn "This may indicate the service doesn't exist or you lack permissions." - log_warn "" - log_warn "To list available ngit-grasp services:" - log_warn " systemctl list-units 'ngit-grasp*' --all" - log_warn " journalctl --list-boots # Check if you have journal access" - log_warn "" - # Continue anyway - the service might exist but have no logs yet - fi - - # Build journalctl command - local journal_cmd="journalctl -u $service --no-pager -o short-iso" - - if [[ -n "$since_date" ]]; then - journal_cmd="$journal_cmd --since '$since_date'" - fi - - if [[ -n "$until_date" ]]; then - journal_cmd="$journal_cmd --until '$until_date'" - fi - - log_info "Running: $journal_cmd | grep '\\[PURGATORY_EXPIRED\\]'" - - if [[ "$dry_run" == true ]]; then - log_info "[DRY RUN] Would extract to: $output_dir/purgatory-expired.txt" - - # Show sample of what would be extracted - log_info "Checking for matching log entries..." - local sample_count - sample_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PURGATORY_EXPIRED\]' || echo "0") - sample_count="${sample_count//[^0-9]/}" # Strip non-numeric characters - sample_count="${sample_count:-0}" - log_info "Found $sample_count matching log entries" - - if [[ "$sample_count" -eq 0 ]]; then - log_warn "No [PURGATORY_EXPIRED] entries found in logs." - log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." - log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" - fi - - exit 0 - fi - - # Create output directory - mkdir -p "$output_dir" - - local output_file="$output_dir/purgatory-expired.txt" - local temp_file - temp_file=$(mktemp) - - # Extract and parse log entries - log_info "Extracting log entries..." - - # Get raw log lines containing [PURGATORY_EXPIRED] - # Capture stderr separately to detect journalctl errors - local raw_lines journal_stderr journal_exit - local temp_stderr - temp_stderr=$(mktemp) - - raw_lines=$(eval "$journal_cmd" 2>"$temp_stderr" | grep '\[PURGATORY_EXPIRED\]' || true) - journal_exit=$? - journal_stderr=$(cat "$temp_stderr" 2>/dev/null || true) - rm -f "$temp_stderr" - - # Report any journalctl errors (but don't fail - empty logs are valid) - if [[ -n "$journal_stderr" ]]; then - log_warn "journalctl reported: $journal_stderr" - fi - - if [[ -z "$raw_lines" ]]; then - log_warn "No [PURGATORY_EXPIRED] entries found in logs." - log_warn "" - log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." - log_warn "The structured log format required by this script:" - log_warn "" - log_warn " [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason=\"...\"" - log_warn "" - log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" - log_warn "" - - # Create empty output file with header comment - { - echo "# Purgatory expiry events extracted from $service" - echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" - echo "# Extracted: $(date -Iseconds)" - echo "# Format: reponpubtimestampreason" - echo "#" - echo "# NOTE: No [PURGATORY_EXPIRED] entries found." - echo "# This is expected if ngit-grasp logging improvements are not yet deployed." - } > "$output_file" - - log_info "Created empty output file: $output_file" - exit 0 - fi - - # Write header - { - echo "# Purgatory expiry events extracted from $service" - echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" - echo "# Extracted: $(date -Iseconds)" - echo "# Format: reponpubtimestampreason" - } > "$output_file" - - # Parse each line - local count=0 - while IFS= read -r line; do - local parsed - parsed=$(parse_log_line "$line") - if [[ -n "$parsed" ]]; then - echo "$parsed" >> "$output_file" - count=$((count + 1)) - fi - done <<< "$raw_lines" - - rm -f "$temp_file" - - # Summary - echo "" - log_info "=== Extraction Summary ===" - log_info "Service: $service" - log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" - log_success "Extracted $count purgatory expiry entries" - echo "" - log_info "Output file: $output_file" - - if [[ $count -gt 0 ]]; then - echo "" - log_info "Sample entries (first 5):" - # Use a subshell to avoid SIGPIPE issues with set -e - (tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub timestamp reason; do - echo " repo=$repo npub=${npub:0:20}... timestamp=$timestamp" - done) || true - fi - - # Show unique repos affected - if [[ $count -gt 0 ]]; then - echo "" - local unique_repos - unique_repos=$(tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort -u | wc -l) - log_info "Unique repositories affected: $unique_repos" - - echo "" - log_info "Repositories with purgatory expiry:" - # Use a subshell to avoid SIGPIPE issues with set -e - (tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort | uniq -c | sort -rn | head -10 | while read -r cnt repo; do - echo " $repo: $cnt expiry events" - done) || true - - local total_repos - total_repos=$(tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort -u | wc -l) - if [[ $total_repos -gt 10 ]]; then - echo " ... and $((total_repos - 10)) more repositories" - fi - fi - - # Explicit success exit - exit 0 -} - -main "$@" diff --git a/docs/how-to/migration-scripts/40-classify-actions.sh b/docs/how-to/migration-scripts/40-classify-actions.sh deleted file mode 100755 index 8b61636..0000000 --- a/docs/how-to/migration-scripts/40-classify-actions.sh +++ /dev/null @@ -1,662 +0,0 @@ -#!/usr/bin/env bash -# -# 40-classify-actions.sh - Classify repos by migration action required -# -# Implements the redesigned classification system (Option B) with user feedback: -# -# Tier 1: No Action Required (ready-for-migration.txt) -# - Complete in both (prod=cat1, archive=cat1) -# - Deleted by user (kind 5 event) -# - Empty in prod (prod=cat2, any archive status) -# - Archive-only (archive=any, prod=missing) -# - Not in prod (purgatory-only, prod=missing) -# - Archive ahead (archive has newer git data than prod - GRASP enforced) -# -# Tier 2: Action Required (needs-resync.txt) -# - Complete in prod, missing from archive (with purgatory context) -# - Complete in prod, incomplete in archive AND prod is ahead (with purgatory context) -# -# Tier 3: Manual Investigation (manual-review.txt) -# - Partial in prod (prod=cat3) -# - No-match in prod (prod=cat4) -# - Parse failures -# - Conflicting states -# - Diverged git history (both have unique commits) -# -# KEY INSIGHT: -# Archive (ngit-grasp) enforces GRASP - git data ALWAYS matches a state event. -# If archive has different/newer data than prod, it means: -# - A state event authorized those commits at some point -# - Archive is actually MORE up-to-date than prod -# - Migration should use archive data (it's already correct) -# -# Usage: ./40-classify-actions.sh -# -# Output format: repo | npub | prod_status | archive_status | context | action -# - -set -euo pipefail - -# Colors for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' # No Color - -log_info() { echo -e "${BLUE}[INFO]${NC} $*"; } -log_success() { echo -e "${GREEN}[OK]${NC} $*"; } -log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } -log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; } - -# Check arguments -if [[ $# -lt 1 ]]; then - echo "Usage: $0 " - echo "Example: $0 work/migration-analysis-20260123-200701" - exit 1 -fi - -ANALYSIS_DIR="$1" - -# Validate analysis directory -if [[ ! -d "$ANALYSIS_DIR" ]]; then - log_error "Analysis directory not found: $ANALYSIS_DIR" - exit 1 -fi - -# Define paths -PROD_DIR="$ANALYSIS_DIR/prod" -ARCHIVE_DIR="$ANALYSIS_DIR/archive" -COMPARISON_DIR="$ANALYSIS_DIR/comparison" -LOGS_DIR="$ANALYSIS_DIR/logs" -RESULTS_DIR="$ANALYSIS_DIR/results" - -# Validate required directories -for dir in "$PROD_DIR" "$ARCHIVE_DIR" "$COMPARISON_DIR" "$LOGS_DIR"; do - if [[ ! -d "$dir" ]]; then - log_error "Required directory not found: $dir" - exit 1 - fi -done - -# Create results directory -mkdir -p "$RESULTS_DIR" - -# Output files -READY_FILE="$RESULTS_DIR/ready-for-migration.txt" -RESYNC_FILE="$RESULTS_DIR/needs-resync.txt" -REVIEW_FILE="$RESULTS_DIR/manual-review.txt" -SUMMARY_FILE="$RESULTS_DIR/summary.txt" - -# Temporary files for processing -TMP_DIR=$(mktemp -d) -trap 'rm -rf "$TMP_DIR"' EXIT - -log_info "Starting classification with revised system (Option B)" -log_info "Analysis directory: $ANALYSIS_DIR" - -# ============================================================================ -# Phase 1: Build lookup tables from source data -# ============================================================================ - -log_info "Building lookup tables..." - -# Build prod category lookup: repo|npub -> category -declare -A PROD_CAT -while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do - repo="${repo// /}" # Remove all spaces - npub="${npub// /}" # Remove all spaces - [[ -z "$repo" || -z "$npub" ]] && continue - PROD_CAT["$repo|$npub"]="cat1" -done < "$PROD_DIR/category1-complete-match.txt" - -while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do - repo="${repo// /}" - npub="${npub// /}" - [[ -z "$repo" || -z "$npub" ]] && continue - PROD_CAT["$repo|$npub"]="cat2" -done < "$PROD_DIR/category2-empty-blank.txt" - -while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do - repo="${repo// /}" - npub="${npub// /}" - [[ -z "$repo" || -z "$npub" ]] && continue - PROD_CAT["$repo|$npub"]="cat3" -done < "$PROD_DIR/category3-partial-match.txt" - -while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do - repo="${repo// /}" - npub="${npub// /}" - [[ -z "$repo" || -z "$npub" ]] && continue - PROD_CAT["$repo|$npub"]="cat4" -done < "$PROD_DIR/category4-no-match.txt" - -log_info "Loaded ${#PROD_CAT[@]} prod entries" - -# Build archive category lookup: repo|npub -> category -declare -A ARCHIVE_CAT -while IFS='|' read -r repo npub rest; do - repo="${repo// /}" - npub="${npub// /}" - [[ -z "$repo" || -z "$npub" ]] && continue - ARCHIVE_CAT["$repo|$npub"]="cat1" -done < "$ARCHIVE_DIR/category1-complete-match.txt" - -while IFS='|' read -r repo npub rest; do - repo="${repo// /}" - npub="${npub// /}" - [[ -z "$repo" || -z "$npub" ]] && continue - ARCHIVE_CAT["$repo|$npub"]="cat2" -done < "$ARCHIVE_DIR/category2-empty-blank.txt" - -while IFS='|' read -r repo npub rest; do - repo="${repo// /}" - npub="${npub// /}" - [[ -z "$repo" || -z "$npub" ]] && continue - ARCHIVE_CAT["$repo|$npub"]="cat3" -done < "$ARCHIVE_DIR/category3-partial-match.txt" - -while IFS='|' read -r repo npub rest; do - repo="${repo// /}" - npub="${npub// /}" - [[ -z "$repo" || -z "$npub" ]] && continue - ARCHIVE_CAT["$repo|$npub"]="cat4" -done < "$ARCHIVE_DIR/category4-no-match.txt" - -log_info "Loaded ${#ARCHIVE_CAT[@]} archive entries" - -# Build purgatory lookup: repo|npub -> 1 (if purgatory expired) -declare -A PURGATORY -PURGATORY_COUNT=0 -if [[ -f "$LOGS_DIR/purgatory-expired.txt" ]]; then - while IFS=$'\t' read -r repo npub timestamp reason || [[ -n "$repo" ]]; do - # Skip comments and empty lines - [[ "$repo" =~ ^# ]] && continue - [[ -z "$repo" || -z "$npub" ]] && continue - PURGATORY["$repo|$npub"]=1 - PURGATORY_COUNT=$((PURGATORY_COUNT + 1)) - done < "$LOGS_DIR/purgatory-expired.txt" -fi -log_info "Loaded $PURGATORY_COUNT purgatory entries" - -# Build parse failure lookup: repo|npub -> 1 (if parse failure logged) -# Parse failures file format: event_idkindreasonreponpub -declare -A PARSE_FAIL -PARSE_FAIL_COUNT=0 -if [[ -f "$LOGS_DIR/parse-failures.txt" ]]; then - while IFS=$'\t' read -r event_id kind reason repo npub || [[ -n "$event_id" ]]; do - # Skip comments and empty lines - [[ "$event_id" =~ ^# ]] && continue - [[ -z "$repo" || -z "$npub" ]] && continue - PARSE_FAIL["$repo|$npub"]=1 - PARSE_FAIL_COUNT=$((PARSE_FAIL_COUNT + 1)) - done < "$LOGS_DIR/parse-failures.txt" -fi -log_info "Loaded $PARSE_FAIL_COUNT parse failure entries" - -# Build deletion lookup: repo|npub -> 1 (if kind 5 deletion event) -# Deletions are in NDJSON format with "a" tags like "30617:pubkey_hex:repo" -# We need to convert hex pubkeys to npub format using nak -declare -A DELETED - -# Helper function to process deletion file (NDJSON format) -# Extracts unique pubkey_hex:repo pairs and converts to npub -process_deletions() { - local file="$1" - [[ ! -f "$file" ]] && return - - # Extract unique pubkey_hex|repo pairs from NDJSON - # Each line is a JSON object, extract "a" tags - local pairs - pairs=$(jq -r '.tags[] | select(.[0] == "a") | .[1]' "$file" 2>/dev/null | \ - sed 's/^30617://' | awk -F: '{print $1 "|" $2}' | sort -u) - - # Get unique hex pubkeys for batch conversion - local hex_keys - hex_keys=$(echo "$pairs" | cut -d'|' -f1 | sort -u) - - # Build hex->npub lookup via batch nak call - declare -A HEX_TO_NPUB - while read -r hex; do - [[ -z "$hex" ]] && continue - local npub - npub=$(nak encode npub "$hex" 2>/dev/null || echo "") - [[ -n "$npub" ]] && HEX_TO_NPUB["$hex"]="$npub" - done <<< "$hex_keys" - - # Now process pairs with cached npub values - while IFS='|' read -r pubkey_hex repo; do - [[ -z "$repo" || -z "$pubkey_hex" ]] && continue - local npub="${HEX_TO_NPUB[$pubkey_hex]:-}" - [[ -z "$npub" ]] && continue - DELETED["$repo|$npub"]=1 - done <<< "$pairs" -} - -# Process prod and archive deletions -process_deletions "$PROD_DIR/raw/deletions.json" -process_deletions "$ARCHIVE_DIR/raw/deletions.json" -DELETED_COUNT=0 -[[ ${#DELETED[@]} -gt 0 ]] && DELETED_COUNT=${#DELETED[@]} -log_info "Loaded $DELETED_COUNT deletion entries" - -# Build git ancestry lookup: repo|npub -> relationship (archive-ahead, prod-ahead, diverged, etc.) -# This data comes from 22-compare-git-data.sh which compares actual git commits -declare -A GIT_ANCESTRY -GIT_ANCESTRY_COUNT=0 -if [[ -f "$COMPARISON_DIR/git-ancestry.tsv" ]]; then - while IFS=$'\t' read -r repo npub relationship details || [[ -n "$repo" ]]; do - # Skip header and comments - [[ "$repo" == "repo" ]] && continue - [[ "$repo" =~ ^# ]] && continue - [[ -z "$repo" || -z "$npub" ]] && continue - GIT_ANCESTRY["$repo|$npub"]="$relationship" - GIT_ANCESTRY_COUNT=$((GIT_ANCESTRY_COUNT + 1)) - done < "$COMPARISON_DIR/git-ancestry.tsv" - log_info "Loaded $GIT_ANCESTRY_COUNT git ancestry entries" -else - log_warn "No git-ancestry.tsv found - will not check if archive is ahead of prod" - log_warn "Run 22-compare-git-data.sh to enable archive-ahead detection" -fi - -# ============================================================================ -# Phase 2: Build unique repo list from all sources -# ============================================================================ - -log_info "Building unique repo list..." - -declare -A ALL_REPOS -for key in "${!PROD_CAT[@]}"; do - ALL_REPOS["$key"]=1 -done -for key in "${!ARCHIVE_CAT[@]}"; do - ALL_REPOS["$key"]=1 -done -for key in "${!PURGATORY[@]}"; do - ALL_REPOS["$key"]=1 -done - -log_info "Total unique repos: ${#ALL_REPOS[@]}" - -# ============================================================================ -# Phase 3: Classify each repo according to revised decision tree -# ============================================================================ - -log_info "Classifying repos..." - -# Counters for summary -declare -A COUNTS -COUNTS[ready_complete_both]=0 -COUNTS[ready_deleted]=0 -COUNTS[ready_empty_prod]=0 -COUNTS[ready_archive_only]=0 -COUNTS[ready_not_in_prod]=0 -COUNTS[ready_archive_ahead]=0 -COUNTS[resync_missing_archive]=0 -COUNTS[resync_incomplete_archive]=0 -COUNTS[review_partial_prod]=0 -COUNTS[review_nomatch_prod]=0 -COUNTS[review_parse_failure]=0 -COUNTS[review_conflicting]=0 -COUNTS[review_diverged]=0 - -# Output arrays -declare -a READY_LINES -declare -a RESYNC_LINES -declare -a REVIEW_LINES - -# Helper function to get context string -get_context() { - local key="$1" - local prod_status="$2" - local archive_status="$3" - local context="" - - # Check purgatory - if [[ -n "${PURGATORY[$key]:-}" ]]; then - context="purgatory-expired" - fi - - # Check parse failure - if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then - if [[ -n "$context" ]]; then - context="$context, parse-failure" - else - context="parse-failure" - fi - fi - - # Add archive context for unexpected states - if [[ "$prod_status" == "empty" && "$archive_status" != "missing" && "$archive_status" != "empty" ]]; then - if [[ -n "$context" ]]; then - context="$context, archive-has-data" - else - context="archive-has-data" - fi - fi - - echo "${context:-none}" -} - -# Helper to convert category to human-readable status -cat_to_status() { - case "$1" in - cat1) echo "complete" ;; - cat2) echo "empty" ;; - cat3) echo "partial" ;; - cat4) echo "no-match" ;; - missing) echo "missing" ;; - *) echo "$1" ;; - esac -} - -LOOP_COUNT=0 -for key in "${!ALL_REPOS[@]}"; do - LOOP_COUNT=$((LOOP_COUNT + 1)) - [[ $((LOOP_COUNT % 100)) -eq 0 ]] && log_info "Processed $LOOP_COUNT repos..." - IFS='|' read -r repo npub <<< "$key" - - prod_cat="${PROD_CAT[$key]:-missing}" - archive_cat="${ARCHIVE_CAT[$key]:-missing}" - prod_status=$(cat_to_status "$prod_cat") - archive_status=$(cat_to_status "$archive_cat") - - # Decision tree implementation - - # 1. Is there a kind 5 deletion event? - if [[ -n "${DELETED[$key]:-}" ]]; then - context=$(get_context "$key" "$prod_status" "$archive_status") - READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | deleted by user") - COUNTS[ready_deleted]=$((COUNTS[ready_deleted] + 1)) - continue - fi - - # 2. What is the prod status? - case "$prod_cat" in - missing) - # Not in prod - if [[ "$archive_cat" != "missing" ]]; then - # In archive but not in prod -> no action (archive-only) - context=$(get_context "$key" "$prod_status" "$archive_status") - READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive-only (not in prod)") - COUNTS[ready_archive_only]=$((COUNTS[ready_archive_only] + 1)) - elif [[ -n "${PURGATORY[$key]:-}" ]]; then - # Purgatory only, not in prod -> no action - context="purgatory-expired" - READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | purgatory-only (not in prod)") - COUNTS[ready_not_in_prod]=$((COUNTS[ready_not_in_prod] + 1)) - fi - # Otherwise skip (not a real repo - no data anywhere) - ;; - - cat2) - # Empty in prod -> ALWAYS no action required - context=$(get_context "$key" "$prod_status" "$archive_status") - READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | empty in prod (user never pushed)") - COUNTS[ready_empty_prod]=$((COUNTS[ready_empty_prod] + 1)) - ;; - - cat1) - # Complete in prod - if [[ "$archive_cat" == "cat1" ]]; then - # Complete in both -> no action - context=$(get_context "$key" "$prod_status" "$archive_status") - READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in both") - COUNTS[ready_complete_both]=$((COUNTS[ready_complete_both] + 1)) - else - # Complete in prod, missing/incomplete in archive - # Check for parse failure - if so, needs manual review - if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then - context=$(get_context "$key" "$prod_status" "$archive_status") - REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in prod with parse failure") - COUNTS[review_parse_failure]=$((COUNTS[review_parse_failure] + 1)) - else - # Check git ancestry to see if archive is actually ahead - git_relationship="${GIT_ANCESTRY[$key]:-unknown}" - - if [[ "$git_relationship" == "archive-ahead" || "$git_relationship" == "in-sync" ]]; then - # Archive has newer/same git data - this is GOOD - # Archive's git data was authorized by a state event (GRASP enforced) - context=$(get_context "$key" "$prod_status" "$archive_status") - if [[ -n "$context" && "$context" != "none" ]]; then - context="$context, git=$git_relationship" - else - context="git=$git_relationship" - fi - READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive ahead (use archive data)") - COUNTS[ready_archive_ahead]=$((COUNTS[ready_archive_ahead] + 1)) - elif [[ "$git_relationship" == "diverged" ]]; then - # Git histories diverged - needs manual review - context=$(get_context "$key" "$prod_status" "$archive_status") - if [[ -n "$context" && "$context" != "none" ]]; then - context="$context, git=diverged" - else - context="git=diverged" - fi - REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | git histories diverged (manual review)") - COUNTS[review_diverged]=$((COUNTS[review_diverged] + 1)) - else - # prod-ahead, archive-only, prod-only, both-empty, or unknown - # These need resync - include purgatory context - context=$(get_context "$key" "$prod_status" "$archive_status") - if [[ "$git_relationship" != "unknown" ]]; then - if [[ -n "$context" && "$context" != "none" ]]; then - context="$context, git=$git_relationship" - else - context="git=$git_relationship" - fi - fi - if [[ "$archive_cat" == "missing" ]]; then - RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync to archive") - COUNTS[resync_missing_archive]=$((COUNTS[resync_missing_archive] + 1)) - else - RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync (archive incomplete)") - COUNTS[resync_incomplete_archive]=$((COUNTS[resync_incomplete_archive] + 1)) - fi - fi - fi - fi - ;; - - cat3) - # Partial in prod -> ALWAYS manual investigation - context=$(get_context "$key" "$prod_status" "$archive_status") - REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | partial in prod (investigate git data)") - COUNTS[review_partial_prod]=$((COUNTS[review_partial_prod] + 1)) - ;; - - cat4) - # No-match in prod -> ALWAYS manual investigation - context=$(get_context "$key" "$prod_status" "$archive_status") - REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | no-match in prod (git corruption)") - COUNTS[review_nomatch_prod]=$((COUNTS[review_nomatch_prod] + 1)) - ;; - esac -done - -# ============================================================================ -# Phase 4: Write output files -# ============================================================================ - -log_info "Writing output files..." - -TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%S+00:00") - -# Write ready-for-migration.txt -{ - echo "# Ready for Migration - No action required" - echo "# Generated: $TIMESTAMP" - echo "# Format: repo | npub | prod_status | archive_status | context | reason" - echo "#" - for line in "${READY_LINES[@]}"; do - echo "$line" - done -} > "$READY_FILE" - -# Write needs-resync.txt -{ - echo "# Needs Re-sync - Action required" - echo "# Generated: $TIMESTAMP" - echo "# Format: repo | npub | prod_status | archive_status | context | action" - echo "#" - echo "# Context meanings:" - echo "# purgatory-expired = archive tried to sync but failed (30min timeout)" - echo "# none = archive never tried or announcement missing" - echo "#" - for line in "${RESYNC_LINES[@]}"; do - echo "$line" - done -} > "$RESYNC_FILE" - -# Write manual-review.txt -{ - echo "# Manual Review Required - Investigation needed" - echo "# Generated: $TIMESTAMP" - echo "# Format: repo | npub | prod_status | archive_status | context | reason" - echo "#" - for line in "${REVIEW_LINES[@]}"; do - echo "$line" - done -} > "$REVIEW_FILE" - -# ============================================================================ -# Phase 5: Generate summary -# ============================================================================ - -log_info "Generating summary..." - -TOTAL_READY="${#READY_LINES[@]}" -TOTAL_RESYNC="${#RESYNC_LINES[@]}" -TOTAL_REVIEW="${#REVIEW_LINES[@]}" -TOTAL=$((TOTAL_READY + TOTAL_RESYNC + TOTAL_REVIEW)) - -# Calculate percentages -if [[ $TOTAL -gt 0 ]]; then - PCT_READY=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_READY / $TOTAL) * 100}") - PCT_RESYNC=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_RESYNC / $TOTAL) * 100}") - PCT_REVIEW=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_REVIEW / $TOTAL) * 100}") -else - PCT_READY="0.0" - PCT_RESYNC="0.0" - PCT_REVIEW="0.0" -fi - -{ - echo "# Migration Classification Summary" - echo "Generated: $TIMESTAMP" - echo "Analysis Directory: $ANALYSIS_DIR" - echo "" - echo "## Overview" - echo "" - echo "| Category | Count | Percentage |" - echo "|----------|-------|------------|" - echo "| Ready for Migration | $TOTAL_READY | $PCT_READY% |" - echo "| Needs Re-sync | $TOTAL_RESYNC | $PCT_RESYNC% |" - echo "| Manual Review | $TOTAL_REVIEW | $PCT_REVIEW% |" - echo "| **Total** | **$TOTAL** | **100%** |" - echo "" - echo "## Tier 1: Ready for Migration ($TOTAL_READY repos)" - echo "" - echo "These repositories are ready for migration or don't need migration:" - echo "" - echo "| Reason | Count |" - echo "|--------|-------|" - echo "| complete in both prod and archive | ${COUNTS[ready_complete_both]} |" - echo "| archive ahead (has newer git data) | ${COUNTS[ready_archive_ahead]} |" - echo "| deleted by user | ${COUNTS[ready_deleted]} |" - echo "| empty in prod (user never pushed) | ${COUNTS[ready_empty_prod]} |" - echo "| archive-only (not in prod) | ${COUNTS[ready_archive_only]} |" - echo "| purgatory-only (not in prod) | ${COUNTS[ready_not_in_prod]} |" - echo "" - echo "## Tier 2: Needs Re-sync ($TOTAL_RESYNC repos)" - echo "" - echo "These repositories need re-sync to archive before migration:" - echo "" - echo "| Reason | Count | Action |" - echo "|--------|-------|--------|" - echo "| complete in prod, missing from archive | ${COUNTS[resync_missing_archive]} | trigger re-sync |" - echo "| complete in prod, incomplete in archive | ${COUNTS[resync_incomplete_archive]} | trigger re-sync |" - echo "" - echo "### Purgatory Context" - echo "" - echo "Repos in needs-resync.txt include purgatory context:" - echo "- **purgatory-expired**: Archive tried to sync but failed (30min timeout)" - echo "- **none**: Archive never tried or announcement missing" - echo "" - echo "## Tier 3: Manual Review ($TOTAL_REVIEW repos)" - echo "" - echo "These repositories require human investigation:" - echo "" - echo "| Reason | Count |" - echo "|--------|-------|" - echo "| partial in prod (cat3) | ${COUNTS[review_partial_prod]} |" - echo "| no-match in prod (cat4) | ${COUNTS[review_nomatch_prod]} |" - echo "| complete in prod with parse failure | ${COUNTS[review_parse_failure]} |" - echo "| git histories diverged | ${COUNTS[review_diverged]} |" - echo "" - echo "## Input Data Summary" - echo "" - echo "### Prod Categories" - echo "- Category 1 (complete): $(wc -l < "$PROD_DIR/category1-complete-match.txt")" - echo "- Category 2 (empty): $(wc -l < "$PROD_DIR/category2-empty-blank.txt")" - echo "- Category 3 (partial): $(wc -l < "$PROD_DIR/category3-partial-match.txt")" - echo "- Category 4 (no match): $(wc -l < "$PROD_DIR/category4-no-match.txt")" - echo "" - echo "### Archive Categories" - echo "- Category 1 (complete): $(wc -l < "$ARCHIVE_DIR/category1-complete-match.txt")" - echo "- Category 2 (empty): $(wc -l < "$ARCHIVE_DIR/category2-empty-blank.txt")" - echo "- Category 3 (partial): $(wc -l < "$ARCHIVE_DIR/category3-partial-match.txt")" - echo "- Category 4 (no match): $(wc -l < "$ARCHIVE_DIR/category4-no-match.txt")" - echo "" - echo "### Logs" - echo "- Parse failures: $(grep -c -v '^#' "$LOGS_DIR/parse-failures.txt" 2>/dev/null || echo 0)" - echo "- Purgatory expired: $(grep -c -v '^#' "$LOGS_DIR/purgatory-expired.txt" 2>/dev/null || echo 0)" - echo "" - echo "## Output Files" - echo "" - echo "- \`results/ready-for-migration.txt\` - $TOTAL_READY repos ready for migration" - echo "- \`results/needs-resync.txt\` - $TOTAL_RESYNC repos needing re-sync" - echo "- \`results/manual-review.txt\` - $TOTAL_REVIEW repos needing investigation" - echo "- \`results/summary.txt\` - This summary file" - echo "" - echo "## Recommended Next Steps" - echo "" - echo "1. **Review needs-resync.txt** - Trigger re-sync for these repos" - echo "2. **Review manual-review.txt** - Investigate unusual states" - echo "3. **Verify ready-for-migration.txt** - Spot-check a few repos" - echo "4. **Plan migration window** - Schedule cutover when action items resolved" -} > "$SUMMARY_FILE" - -# ============================================================================ -# Phase 6: Print summary to console -# ============================================================================ - -echo "" -log_success "Classification complete!" -echo "" -echo "=== Summary ===" -echo "Ready for Migration: $TOTAL_READY ($PCT_READY%)" -echo " - Complete in both: ${COUNTS[ready_complete_both]}" -echo " - Archive ahead: ${COUNTS[ready_archive_ahead]}" -echo " - Deleted by user: ${COUNTS[ready_deleted]}" -echo " - Empty in prod: ${COUNTS[ready_empty_prod]}" -echo " - Archive-only: ${COUNTS[ready_archive_only]}" -echo " - Purgatory-only: ${COUNTS[ready_not_in_prod]}" -echo "" -echo "Needs Re-sync: $TOTAL_RESYNC ($PCT_RESYNC%)" -echo " - Missing from archive: ${COUNTS[resync_missing_archive]}" -echo " - Incomplete in archive: ${COUNTS[resync_incomplete_archive]}" -echo "" -echo "Manual Review: $TOTAL_REVIEW ($PCT_REVIEW%)" -echo " - Partial in prod: ${COUNTS[review_partial_prod]}" -echo " - No-match in prod: ${COUNTS[review_nomatch_prod]}" -echo " - Parse failures: ${COUNTS[review_parse_failure]}" -echo " - Git diverged: ${COUNTS[review_diverged]}" -echo "" -echo "Total: $TOTAL repos" -echo "" -echo "Output files:" -echo " $READY_FILE" -echo " $RESYNC_FILE" -echo " $REVIEW_FILE" -echo " $SUMMARY_FILE" diff --git a/docs/how-to/migration-scripts/run-migration-analysis.sh b/docs/how-to/migration-scripts/run-migration-analysis.sh deleted file mode 100755 index acc5e44..0000000 --- a/docs/how-to/migration-scripts/run-migration-analysis.sh +++ /dev/null @@ -1,779 +0,0 @@ -#!/usr/bin/env bash -# -# run-migration-analysis.sh - Orchestrate the complete GRASP relay to ngit-grasp migration analysis -# -# This script runs all 5 phases of the migration analysis pipeline in sequence, -# with proper error handling, progress reporting, and timing information. -# -# QUICK START: -# # Basic usage (local analysis only - Phases 1, 3, 5) -# ./run-migration-analysis.sh --prod-relay wss://relay.ngit.dev --archive-relay wss://archive.relay.ngit.dev -# -# # Full analysis including git sync check (requires VPS access) -# ./run-migration-analysis.sh \ -# --prod-relay wss://relay.ngit.dev \ -# --archive-relay wss://archive.relay.ngit.dev \ -# --prod-git /var/lib/grasp-relay/git \ -# --archive-git /var/lib/ngit-grasp/git -# -# USAGE: -# ./run-migration-analysis.sh [options] -# -# REQUIRED OPTIONS: -# --prod-relay Production relay WebSocket URL (e.g., wss://relay.ngit.dev) -# --archive-relay Archive relay WebSocket URL (e.g., wss://archive.relay.ngit.dev) -# -# OPTIONAL OPTIONS: -# --prod-git Git base directory for prod (enables Phase 2) -# --archive-git Git base directory for archive (enables Phase 2) -# --service Systemd service name for log extraction (enables Phase 4) -# --output Output directory (default: work/migration-analysis-YYYYMMDD-HHMM) -# --since Start date for log extraction (default: 30 days ago) -# --until End date for log extraction (default: now) -# -# PHASE CONTROL: -# --skip-phase-1 Skip event fetching (use existing data) -# --skip-phase-2 Skip git sync check (use existing data) -# --skip-phase-3 Skip categorization (use existing data) -# --skip-phase-4 Skip log extraction (use existing data) -# --skip-phase-5 Skip final classification -# --only-phase-N Run only phase N (1-5) -# --from-phase-N Start from phase N (skip earlier phases) -# -# OTHER OPTIONS: -# --dry-run Show what would be executed without running -# --continue-on-error Continue to next phase even if current phase fails -# --help Show this help message -# -# PHASES: -# Phase 1: Fetch events from both relays (~30s each, local) -# Phase 2: Check git sync status (~20 min each, requires VPS) -# Phase 3: Categorize and compare results (fast, local) -# Phase 4: Extract logs from systemd (requires VPS) -# Phase 5: Final classification (fast, local) -# -# EXAMPLES: -# # Dry run to see what would happen -# ./run-migration-analysis.sh --prod-relay wss://relay.ngit.dev --archive-relay wss://archive.relay.ngit.dev --dry-run -# -# # Run only Phase 1 (fetch events) -# ./run-migration-analysis.sh --prod-relay wss://relay.ngit.dev --archive-relay wss://archive.relay.ngit.dev --only-phase-1 -# -# # Resume from Phase 3 using existing Phase 1-2 data -# ./run-migration-analysis.sh --prod-relay wss://relay.ngit.dev --archive-relay wss://archive.relay.ngit.dev --from-phase-3 --output work/migration-analysis-20260122-1430 -# -# # Full analysis on VPS with all features -# ./run-migration-analysis.sh \ -# --prod-relay wss://relay.ngit.dev \ -# --archive-relay wss://archive.relay.ngit.dev \ -# --prod-git /var/lib/grasp-relay/git \ -# --archive-git /var/lib/ngit-grasp/git \ -# --service ngit-grasp.service -# -# SEE ALSO: -# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide -# - -set -euo pipefail - -# Get script directory for finding other scripts -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -# Colors for output (disabled if not a terminal) -if [[ -t 1 ]]; then - RED='\033[0;31m' - GREEN='\033[0;32m' - YELLOW='\033[0;33m' - BLUE='\033[0;34m' - CYAN='\033[0;36m' - BOLD='\033[1m' - NC='\033[0m' -else - RED='' - GREEN='' - YELLOW='' - BLUE='' - CYAN='' - BOLD='' - NC='' -fi - -# Logging functions -log_header() { - echo "" - echo -e "${BOLD}${CYAN}════════════════════════════════════════════════════════════════${NC}" - echo -e "${BOLD}${CYAN} $*${NC}" - echo -e "${BOLD}${CYAN}════════════════════════════════════════════════════════════════${NC}" - echo "" -} - -log_phase() { - echo "" - echo -e "${BOLD}${BLUE}┌──────────────────────────────────────────────────────────────┐${NC}" - echo -e "${BOLD}${BLUE}│ $*${NC}" - echo -e "${BOLD}${BLUE}└──────────────────────────────────────────────────────────────┘${NC}" -} - -log_info() { - echo -e "${BLUE}[INFO]${NC} $*" >&2 -} - -log_success() { - echo -e "${GREEN}[OK]${NC} $*" >&2 -} - -log_warn() { - echo -e "${YELLOW}[WARN]${NC} $*" >&2 -} - -log_error() { - echo -e "${RED}[ERROR]${NC} $*" >&2 -} - -log_step() { - echo -e "${CYAN} →${NC} $*" >&2 -} - -# Default values -PROD_RELAY="" -ARCHIVE_RELAY="" -PROD_GIT="" -ARCHIVE_GIT="" -SERVICE_NAME="" -OUTPUT_DIR="" -DRY_RUN=false -CONTINUE_ON_ERROR=false -LOG_SINCE="" -LOG_UNTIL="" - -# Phase control -SKIP_PHASE_1=false -SKIP_PHASE_2=false -SKIP_PHASE_3=false -SKIP_PHASE_4=false -SKIP_PHASE_5=false -ONLY_PHASE="" -FROM_PHASE="" - -# Timing -declare -A PHASE_TIMES - -usage() { - head -73 "$0" | tail -n +3 | sed 's/^# //' | sed 's/^#//' - exit 0 -} - -# Parse command line arguments -parse_args() { - while [[ $# -gt 0 ]]; do - case "$1" in - --prod-relay) - PROD_RELAY="$2" - shift 2 - ;; - --archive-relay) - ARCHIVE_RELAY="$2" - shift 2 - ;; - --prod-git) - PROD_GIT="$2" - shift 2 - ;; - --archive-git) - ARCHIVE_GIT="$2" - shift 2 - ;; - --service) - SERVICE_NAME="$2" - shift 2 - ;; - --output) - OUTPUT_DIR="$2" - shift 2 - ;; - --skip-phase-1) - SKIP_PHASE_1=true - shift - ;; - --skip-phase-2) - SKIP_PHASE_2=true - shift - ;; - --skip-phase-3) - SKIP_PHASE_3=true - shift - ;; - --skip-phase-4) - SKIP_PHASE_4=true - shift - ;; - --skip-phase-5) - SKIP_PHASE_5=true - shift - ;; - --only-phase-1|--only-phase-2|--only-phase-3|--only-phase-4|--only-phase-5) - ONLY_PHASE="${1#--only-phase-}" - shift - ;; - --from-phase-1|--from-phase-2|--from-phase-3|--from-phase-4|--from-phase-5) - FROM_PHASE="${1#--from-phase-}" - shift - ;; - --dry-run) - DRY_RUN=true - shift - ;; - --continue-on-error) - CONTINUE_ON_ERROR=true - shift - ;; - --since) - LOG_SINCE="$2" - shift 2 - ;; - --until) - LOG_UNTIL="$2" - shift 2 - ;; - --help|-h) - usage - ;; - *) - log_error "Unknown option: $1" - echo "Use --help for usage information." - exit 1 - ;; - esac - done -} - -# Validate required arguments -validate_args() { - local errors=0 - - if [[ -z "$PROD_RELAY" ]]; then - log_error "Missing required option: --prod-relay" - errors=1 - fi - - if [[ -z "$ARCHIVE_RELAY" ]]; then - log_error "Missing required option: --archive-relay" - errors=1 - fi - - # Validate relay URLs - if [[ -n "$PROD_RELAY" && ! "$PROD_RELAY" =~ ^wss?:// ]]; then - log_error "Invalid prod relay URL: $PROD_RELAY (must start with ws:// or wss://)" - errors=1 - fi - - if [[ -n "$ARCHIVE_RELAY" && ! "$ARCHIVE_RELAY" =~ ^wss?:// ]]; then - log_error "Invalid archive relay URL: $ARCHIVE_RELAY (must start with ws:// or wss://)" - errors=1 - fi - - # Validate git paths if provided - if [[ -n "$PROD_GIT" && ! -d "$PROD_GIT" ]]; then - log_warn "Prod git directory not found: $PROD_GIT" - log_warn "Phase 2 will fail unless running on VPS with access to this path." - fi - - if [[ -n "$ARCHIVE_GIT" && ! -d "$ARCHIVE_GIT" ]]; then - log_warn "Archive git directory not found: $ARCHIVE_GIT" - log_warn "Phase 2 will fail unless running on VPS with access to this path." - fi - - if [[ $errors -eq 1 ]]; then - echo "" - echo "Use --help for usage information." - exit 1 - fi -} - -# Check prerequisites -check_prerequisites() { - local missing=0 - - log_info "Checking prerequisites..." - - # Required tools - for tool in git nak jq awk sort; do - if command -v "$tool" &> /dev/null; then - log_step "$tool: found" - else - log_error "$tool: NOT FOUND" - missing=1 - fi - done - - # Optional tools - if command -v journalctl &> /dev/null; then - log_step "journalctl: found (Phase 4 available)" - else - log_step "journalctl: not found (Phase 4 will be skipped)" - SKIP_PHASE_4=true - fi - - if [[ $missing -eq 1 ]]; then - log_error "Missing required tools. Install them and try again." - exit 1 - fi - - # Check scripts exist - for script in 01-fetch-events.sh 10-check-git-sync.sh 20-categorize.sh 21-compare-relays.sh 22-compare-git-data.sh 30-extract-parse-failures.sh 31-extract-purgatory-expiry.sh 40-classify-actions.sh; do - if [[ ! -x "$SCRIPT_DIR/$script" ]]; then - log_error "Script not found or not executable: $SCRIPT_DIR/$script" - missing=1 - fi - done - - if [[ $missing -eq 1 ]]; then - exit 1 - fi - - log_success "All prerequisites satisfied" -} - -# Determine which phases to run -determine_phases() { - # Handle --only-phase-N - if [[ -n "$ONLY_PHASE" ]]; then - for i in 1 2 3 4 5; do - if [[ "$i" != "$ONLY_PHASE" ]]; then - eval "SKIP_PHASE_$i=true" - fi - done - fi - - # Handle --from-phase-N - if [[ -n "$FROM_PHASE" ]]; then - for i in 1 2 3 4 5; do - if [[ "$i" -lt "$FROM_PHASE" ]]; then - eval "SKIP_PHASE_$i=true" - fi - done - fi - - # Auto-skip Phase 2 if git paths not provided - if [[ -z "$PROD_GIT" && -z "$ARCHIVE_GIT" ]]; then - if [[ "$SKIP_PHASE_2" != "true" ]]; then - log_warn "No git paths provided. Phase 2 (git sync check) will be skipped." - log_warn "Use --prod-git and --archive-git to enable Phase 2." - SKIP_PHASE_2=true - fi - fi - - # Auto-skip Phase 4 if service not provided - if [[ -z "$SERVICE_NAME" ]]; then - if [[ "$SKIP_PHASE_4" != "true" ]]; then - log_warn "No service name provided. Phase 4 (log extraction) will be skipped." - log_warn "Use --service to enable Phase 4." - SKIP_PHASE_4=true - fi - fi -} - -# Setup output directory -setup_output_dir() { - if [[ -z "$OUTPUT_DIR" ]]; then - OUTPUT_DIR="work/migration-analysis-$(date +%Y%m%d-%H%M)" - fi - - log_info "Output directory: $OUTPUT_DIR" - - if [[ "$DRY_RUN" == "true" ]]; then - log_info "[DRY RUN] Would create directory structure" - return - fi - - mkdir -p "$OUTPUT_DIR"/{prod/raw,archive/raw,comparison,logs,results} - - # Save configuration - cat > "$OUTPUT_DIR/config.txt" << EOF -# Migration Analysis Configuration -# Generated: $(date -Iseconds) - -PROD_RELAY=$PROD_RELAY -ARCHIVE_RELAY=$ARCHIVE_RELAY -PROD_GIT=$PROD_GIT -ARCHIVE_GIT=$ARCHIVE_GIT -SERVICE_NAME=$SERVICE_NAME -OUTPUT_DIR=$OUTPUT_DIR -EOF - - log_success "Created output directory structure" -} - -# Run a phase with timing and error handling -run_phase() { - local phase_num="$1" - local phase_name="$2" - shift 2 - local cmd=("$@") - - local skip_var="SKIP_PHASE_$phase_num" - if [[ "${!skip_var}" == "true" ]]; then - log_phase "Phase $phase_num: $phase_name [SKIPPED]" - return 0 - fi - - log_phase "Phase $phase_num: $phase_name" - - if [[ "$DRY_RUN" == "true" ]]; then - log_info "[DRY RUN] Would execute:" - for c in "${cmd[@]}"; do - echo " $c" - done - return 0 - fi - - local start_time - start_time=$(date +%s) - - local exit_code=0 - - # Execute the command(s) - for c in "${cmd[@]}"; do - log_step "Running: $c" - if ! eval "$c"; then - exit_code=1 - if [[ "$CONTINUE_ON_ERROR" == "true" ]]; then - log_warn "Command failed, continuing due to --continue-on-error" - else - log_error "Command failed" - break - fi - fi - done - - local end_time - end_time=$(date +%s) - local duration=$((end_time - start_time)) - PHASE_TIMES[$phase_num]=$duration - - if [[ $exit_code -eq 0 ]]; then - log_success "Phase $phase_num completed in ${duration}s" - else - log_error "Phase $phase_num failed after ${duration}s" - if [[ "$CONTINUE_ON_ERROR" != "true" ]]; then - return 1 - fi - fi - - return $exit_code -} - -# Phase 1: Fetch events -run_phase_1() { - local cmds=() - - # Fetch from prod relay - cmds+=("'$SCRIPT_DIR/01-fetch-events.sh' '$PROD_RELAY' '$OUTPUT_DIR/prod'") - - # Fetch from archive relay - cmds+=("'$SCRIPT_DIR/01-fetch-events.sh' '$ARCHIVE_RELAY' '$OUTPUT_DIR/archive'") - - run_phase 1 "Fetch Events (~30s each)" "${cmds[@]}" -} - -# Phase 2: Git sync check -run_phase_2() { - local cmds=() - - if [[ -n "$PROD_GIT" ]]; then - cmds+=("'$SCRIPT_DIR/10-check-git-sync.sh' '$OUTPUT_DIR/prod/raw/state-events.json' '$PROD_GIT' '$OUTPUT_DIR/prod' --categorize") - else - log_warn "Skipping prod git sync check (no --prod-git provided)" - fi - - if [[ -n "$ARCHIVE_GIT" ]]; then - cmds+=("'$SCRIPT_DIR/10-check-git-sync.sh' '$OUTPUT_DIR/archive/raw/state-events.json' '$ARCHIVE_GIT' '$OUTPUT_DIR/archive' --categorize") - else - log_warn "Skipping archive git sync check (no --archive-git provided)" - fi - - if [[ ${#cmds[@]} -eq 0 ]]; then - log_warn "No git paths provided, skipping Phase 2" - return 0 - fi - - run_phase 2 "Git Sync Check (~20 min each)" "${cmds[@]}" -} - -# Phase 3: Categorize and compare -run_phase_3() { - local cmds=() - - # Check if we have git-sync-status.tsv files (from Phase 2) - # If not, we can't run categorization - local has_prod_sync=false - local has_archive_sync=false - - if [[ -f "$OUTPUT_DIR/prod/git-sync-status.tsv" ]]; then - has_prod_sync=true - fi - - if [[ -f "$OUTPUT_DIR/archive/git-sync-status.tsv" ]]; then - has_archive_sync=true - fi - - # Run categorization if we have sync data but no category files - if [[ "$has_prod_sync" == "true" && ! -f "$OUTPUT_DIR/prod/category1-complete-match.txt" ]]; then - cmds+=("'$SCRIPT_DIR/20-categorize.sh' '$OUTPUT_DIR/prod/git-sync-status.tsv' '$OUTPUT_DIR/prod'") - fi - - if [[ "$has_archive_sync" == "true" && ! -f "$OUTPUT_DIR/archive/category1-complete-match.txt" ]]; then - cmds+=("'$SCRIPT_DIR/20-categorize.sh' '$OUTPUT_DIR/archive/git-sync-status.tsv' '$OUTPUT_DIR/archive'") - fi - - # Run comparison if we have category files - if [[ -f "$OUTPUT_DIR/prod/category1-complete-match.txt" && -f "$OUTPUT_DIR/archive/category1-complete-match.txt" ]]; then - cmds+=("'$SCRIPT_DIR/21-compare-relays.sh' '$OUTPUT_DIR/prod' '$OUTPUT_DIR/archive' '$OUTPUT_DIR/comparison'") - else - log_warn "Missing category files for comparison." - log_warn "Phase 2 must complete successfully before Phase 3 can compare relays." - - # Create placeholder comparison files if they don't exist - if [[ "$DRY_RUN" != "true" ]]; then - mkdir -p "$OUTPUT_DIR/comparison" - for f in complete-in-both.txt complete-prod-missing-archive.txt complete-prod-incomplete-archive.txt incomplete-in-both.txt in-archive-not-prod.txt; do - if [[ ! -f "$OUTPUT_DIR/comparison/$f" ]]; then - echo "# Placeholder - Phase 2 data not available" > "$OUTPUT_DIR/comparison/$f" - fi - done - echo "# Comparison not available - Phase 2 data missing" > "$OUTPUT_DIR/comparison/summary.txt" - fi - fi - - if [[ ${#cmds[@]} -eq 0 ]]; then - log_warn "No categorization or comparison needed (already done or missing input)" - return 0 - fi - - run_phase 3 "Categorize & Compare (fast)" "${cmds[@]}" - - # Phase 3c: Compare git data between relays (requires git paths) - # This determines if archive is ahead of prod for repos with mismatched state - if [[ -n "$PROD_GIT" && -n "$ARCHIVE_GIT" ]]; then - # Build list of repos to compare: those where prod=complete but archive is not - local repos_to_compare="$OUTPUT_DIR/comparison/complete-prod-incomplete-archive.txt" - if [[ -f "$repos_to_compare" ]] && [[ ! -f "$OUTPUT_DIR/comparison/git-ancestry.tsv" ]]; then - log_info "Running git ancestry comparison (Phase 3c)..." - run_phase 3 "Git Ancestry Comparison" "'$SCRIPT_DIR/22-compare-git-data.sh' '$PROD_GIT' '$ARCHIVE_GIT' '$repos_to_compare' '$OUTPUT_DIR/comparison'" - fi - else - log_warn "Git paths not provided - skipping git ancestry comparison" - log_warn "Without git comparison, repos where archive is ahead will be incorrectly flagged as needing re-sync" - fi -} - -# Phase 4: Extract logs -run_phase_4() { - if [[ -z "$SERVICE_NAME" ]]; then - log_warn "No service name provided, skipping Phase 4" - return 0 - fi - - # Validate service name before running Phase 4 - # Structured logging only exists in ngit-grasp, not ngit-relay - if [[ "$SERVICE_NAME" == *"ngit-relay"* ]]; then - log_error "SERVICE_NAME appears to be ngit-relay: $SERVICE_NAME" - log_error "" - log_error "Phase 4 requires an ngit-grasp service with structured logging." - log_error "Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) only exists" - log_error "in ngit-grasp services, NOT in ngit-relay services." - log_error "" - log_error "Please update --service to use the ngit-grasp archive service." - log_error "" - log_error "To find the correct service name:" - log_error " systemctl list-units 'ngit-grasp*' --all" - log_error "" - log_error "Common ngit-grasp service names:" - log_error " - ngit-grasp.service" - log_error " - ngit-grasp-relay-ngit-dev.service (NixOS multi-instance)" - log_error " - ngit-grasp-archive.service" - return 1 - fi - - # Warn if service name doesn't look like ngit-grasp - if [[ "$SERVICE_NAME" != *"ngit-grasp"* && "$SERVICE_NAME" != *"grasp"* ]]; then - log_warn "SERVICE_NAME doesn't contain 'ngit-grasp': $SERVICE_NAME" - log_warn "Structured logging only exists in ngit-grasp services." - log_warn "If this is not an ngit-grasp service, Phase 4 will find no logs." - fi - - local cmds=() - - # Build log extraction options - local log_opts="" - if [[ -n "$LOG_SINCE" ]]; then - log_opts="$log_opts --since '$LOG_SINCE'" - fi - if [[ -n "$LOG_UNTIL" ]]; then - log_opts="$log_opts --until '$LOG_UNTIL'" - fi - - cmds+=("'$SCRIPT_DIR/30-extract-parse-failures.sh' '$SERVICE_NAME' '$OUTPUT_DIR/logs' $log_opts") - cmds+=("'$SCRIPT_DIR/31-extract-purgatory-expiry.sh' '$SERVICE_NAME' '$OUTPUT_DIR/logs' $log_opts") - - run_phase 4 "Extract Logs (VPS required)" "${cmds[@]}" -} - -# Phase 5: Final classification -run_phase_5() { - # Check if we have the minimum required files - local can_run=true - - if [[ ! -d "$OUTPUT_DIR/prod" ]]; then - log_warn "Missing prod directory" - can_run=false - fi - - if [[ ! -d "$OUTPUT_DIR/archive" ]]; then - log_warn "Missing archive directory" - can_run=false - fi - - if [[ ! -d "$OUTPUT_DIR/comparison" ]]; then - log_warn "Missing comparison directory" - can_run=false - fi - - # Create logs directory with empty files if missing - if [[ "$DRY_RUN" != "true" ]]; then - mkdir -p "$OUTPUT_DIR/logs" - for f in parse-failures.txt purgatory-expired.txt; do - if [[ ! -f "$OUTPUT_DIR/logs/$f" ]]; then - echo "# No data - Phase 4 not run" > "$OUTPUT_DIR/logs/$f" - fi - done - fi - - if [[ "$can_run" == "false" ]]; then - log_error "Cannot run Phase 5 - missing required input directories" - return 1 - fi - - run_phase 5 "Final Classification (fast)" "'$SCRIPT_DIR/40-classify-actions.sh' '$OUTPUT_DIR'" -} - -# Display summary -display_summary() { - log_header "Migration Analysis Complete" - - echo "Output Directory: $OUTPUT_DIR" - echo "" - - # Phase timing summary - echo "Phase Timing:" - local total_time=0 - for phase in 1 2 3 4 5; do - local skip_var="SKIP_PHASE_$phase" - if [[ "${!skip_var}" == "true" ]]; then - echo " Phase $phase: SKIPPED" - elif [[ -n "${PHASE_TIMES[$phase]:-}" ]]; then - local t="${PHASE_TIMES[$phase]}" - echo " Phase $phase: ${t}s" - total_time=$((total_time + t)) - else - echo " Phase $phase: N/A" - fi - done - echo " ─────────────" - echo " Total: ${total_time}s" - echo "" - - # Results summary - if [[ -f "$OUTPUT_DIR/results/summary.txt" ]]; then - echo "Results Summary:" - echo "" - # Extract key metrics from summary - if grep -q "No Action Required" "$OUTPUT_DIR/results/summary.txt"; then - grep -A1 "No Action Required" "$OUTPUT_DIR/results/summary.txt" | head -2 - fi - if grep -q "Action Required" "$OUTPUT_DIR/results/summary.txt"; then - grep -A1 "Action Required" "$OUTPUT_DIR/results/summary.txt" | head -2 - fi - if grep -q "Manual Investigation" "$OUTPUT_DIR/results/summary.txt"; then - grep -A1 "Manual Investigation" "$OUTPUT_DIR/results/summary.txt" | head -2 - fi - echo "" - fi - - # Output files - echo "Output Files:" - echo " $OUTPUT_DIR/results/no-action-required.txt" - echo " $OUTPUT_DIR/results/action-required.txt" - echo " $OUTPUT_DIR/results/manual-investigation.txt" - echo " $OUTPUT_DIR/results/summary.txt" - echo "" - - # Next steps - echo "Next Steps:" - echo " 1. Review results/summary.txt for overview" - echo " 2. Address items in results/action-required.txt" - echo " 3. Investigate items in results/manual-investigation.txt" - echo " 4. Plan migration window when action items are resolved" - echo "" -} - -# Main -main() { - parse_args "$@" - - log_header "GRASP Relay to ngit-grasp Migration Analysis" - - validate_args - check_prerequisites - determine_phases - setup_output_dir - - # Show configuration - log_info "Configuration:" - log_step "Prod relay: $PROD_RELAY" - log_step "Archive relay: $ARCHIVE_RELAY" - [[ -n "$PROD_GIT" ]] && log_step "Prod git: $PROD_GIT" - [[ -n "$ARCHIVE_GIT" ]] && log_step "Archive git: $ARCHIVE_GIT" - [[ -n "$SERVICE_NAME" ]] && log_step "Service: $SERVICE_NAME" - log_step "Output: $OUTPUT_DIR" - echo "" - - # Show phase plan - log_info "Phase Plan:" - for phase in 1 2 3 4 5; do - local skip_var="SKIP_PHASE_$phase" - if [[ "${!skip_var}" == "true" ]]; then - log_step "Phase $phase: SKIP" - else - log_step "Phase $phase: RUN" - fi - done - echo "" - - if [[ "$DRY_RUN" == "true" ]]; then - log_warn "DRY RUN MODE - No changes will be made" - echo "" - fi - - # Run phases - local overall_exit=0 - - run_phase_1 || overall_exit=1 - run_phase_2 || overall_exit=1 - run_phase_3 || overall_exit=1 - run_phase_4 || overall_exit=1 - run_phase_5 || overall_exit=1 - - # Display summary - if [[ "$DRY_RUN" != "true" ]]; then - display_summary - fi - - if [[ $overall_exit -ne 0 ]]; then - log_warn "Some phases failed. Review output for details." - fi - - exit $overall_exit -} - -main "$@" diff --git a/docs/how-to/migration-scripts/validate-service.sh b/docs/how-to/migration-scripts/validate-service.sh deleted file mode 100755 index 6988af3..0000000 --- a/docs/how-to/migration-scripts/validate-service.sh +++ /dev/null @@ -1,151 +0,0 @@ -#!/usr/bin/env bash -# -# validate-service.sh - Validate service name for structured logging -# -# This helper script validates that a service name is appropriate for -# Phase 4 log extraction. Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) -# only exists in ngit-grasp services, NOT in ngit-relay services. -# -# USAGE: -# Source this script and call the validation function: -# -# source validate-service.sh -# validate_service_for_structured_logging "$SERVICE_NAME" || exit 1 -# -# BACKGROUND: -# Phase 4 of the migration analysis extracts structured log entries from -# journald. These log entries only exist in ngit-grasp services. If you -# accidentally specify an ngit-relay service, Phase 4 will find no logs -# and produce empty results. -# -# This validation prevents that common mistake by: -# 1. Checking if the service name contains "ngit-relay" (error) -# 2. Warning if the service name doesn't contain "ngit-grasp" -# 3. Optionally checking if structured logs actually exist -# -# SEE ALSO: -# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide -# 30-extract-parse-failures.sh - Uses this validation -# 31-extract-purgatory-expiry.sh - Uses this validation -# - -# Colors for output (disabled if not a terminal) -if [[ -t 1 ]]; then - _VS_RED='\033[0;31m' - _VS_YELLOW='\033[0;33m' - _VS_NC='\033[0m' -else - _VS_RED='' - _VS_YELLOW='' - _VS_NC='' -fi - -# Validates that the service name is appropriate for structured logging -# -# Arguments: -# $1 - service_name: The systemd service name to validate -# $2 - check_logs: Whether to check if logs actually exist (default: "true") -# $3 - interactive: Whether to prompt for confirmation (default: "true") -# -# Returns: -# 0 - Service is valid for structured logging -# 1 - Service is invalid or user declined to continue -# -# Example: -# validate_service_for_structured_logging "ngit-grasp.service" || exit 1 -# validate_service_for_structured_logging "ngit-grasp.service" "false" # Skip log check -# validate_service_for_structured_logging "ngit-grasp.service" "true" "false" # Non-interactive -# -validate_service_for_structured_logging() { - local service_name="$1" - local check_logs="${2:-true}" - local interactive="${3:-true}" - - # Check if service name looks like ngit-relay (ERROR - wrong service type) - if [[ "$service_name" == *"ngit-relay"* ]]; then - echo -e "${_VS_RED}ERROR: Service name appears to be ngit-relay: $service_name${_VS_NC}" >&2 - echo "" >&2 - echo "Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) only exists in" >&2 - echo "ngit-grasp services, NOT in ngit-relay services." >&2 - echo "" >&2 - echo "Please use the ngit-grasp archive service instead." >&2 - echo "" >&2 - echo "To find the correct service name:" >&2 - echo " systemctl list-units 'ngit-grasp*' --all" >&2 - echo "" >&2 - echo "Common ngit-grasp service names:" >&2 - echo " - ngit-grasp.service" >&2 - echo " - ngit-grasp-relay-ngit-dev.service (NixOS multi-instance)" >&2 - echo " - ngit-grasp-archive.service" >&2 - return 1 - fi - - # Check if service name looks like ngit-grasp (WARNING if not) - if [[ "$service_name" != *"ngit-grasp"* && "$service_name" != *"grasp"* ]]; then - echo -e "${_VS_YELLOW}WARNING: Service name doesn't contain 'ngit-grasp': $service_name${_VS_NC}" >&2 - echo "" >&2 - echo "Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) only exists in" >&2 - echo "ngit-grasp services." >&2 - echo "" >&2 - - if [[ "$interactive" == "true" ]]; then - read -p "Continue anyway? (y/N) " -n 1 -r - echo - if [[ ! $REPLY =~ ^[Yy]$ ]]; then - return 1 - fi - else - echo "Non-interactive mode: proceeding despite warning" >&2 - fi - fi - - # Optionally check if structured logs actually exist - if [[ "$check_logs" == "true" ]]; then - # Check if journalctl is available - if ! command -v journalctl &> /dev/null; then - echo -e "${_VS_YELLOW}WARNING: journalctl not available, cannot verify logs exist${_VS_NC}" >&2 - return 0 - fi - - # Check for structured log entries - # IMPORTANT: Use --no-pager to prevent hanging when run non-interactively (e.g., via SSH) - local has_parse_fail has_purgatory - has_parse_fail=$(journalctl --no-pager -u "$service_name" --since "7 days ago" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0") - has_purgatory=$(journalctl --no-pager -u "$service_name" --since "7 days ago" 2>/dev/null | grep -c '\[PURGATORY_EXPIRED\]' || echo "0") - - # Strip any non-numeric characters (grep -c can have trailing whitespace) - has_parse_fail="${has_parse_fail//[^0-9]/}" - has_purgatory="${has_purgatory//[^0-9]/}" - has_parse_fail="${has_parse_fail:-0}" - has_purgatory="${has_purgatory:-0}" - - if [[ "$has_parse_fail" -eq 0 && "$has_purgatory" -eq 0 ]]; then - echo -e "${_VS_YELLOW}WARNING: No structured logs found in $service_name (last 7 days)${_VS_NC}" >&2 - echo "" >&2 - echo "This may indicate:" >&2 - echo " 1. Wrong service (should be ngit-grasp archive service, not ngit-relay)" >&2 - echo " 2. Structured logging not yet deployed to this ngit-grasp instance" >&2 - echo " 3. No parse failures or purgatory expiry events in the time window" >&2 - echo "" >&2 - echo "To verify you have the right service:" >&2 - echo " systemctl list-units 'ngit-grasp*' --all" >&2 - echo " journalctl -u | grep -E '\\[PARSE_FAIL\\]|\\[PURGATORY_EXPIRED\\]' | head -5" >&2 - echo "" >&2 - - if [[ "$interactive" == "true" ]]; then - read -p "Continue anyway? (y/N) " -n 1 -r - echo - if [[ ! $REPLY =~ ^[Yy]$ ]]; then - return 1 - fi - else - echo "Non-interactive mode: proceeding despite warning" >&2 - fi - fi - fi - - return 0 -} - -# Export the function so it can be used after sourcing -export -f validate_service_for_structured_logging -- cgit v1.2.3