diff options
| author | DanConwayDev <DanConwayDev@protonmail.com> | 2026-02-23 15:20:59 +0000 |
|---|---|---|
| committer | DanConwayDev <DanConwayDev@protonmail.com> | 2026-02-23 15:20:59 +0000 |
| commit | 113928aa84894ea8f65c247d9987527e792b32a9 (patch) | |
| tree | ec967d6195d9f7ec4f061449596611afe3a0950f /docs/archive/2026-01-relay-ngit-dev-migration/scripts | |
| parent | 26f608e5011b9d1ad6036da75b89272835e69695 (diff) | |
| parent | e0ad39a489b3398f8208713bf728db0cb11475b0 (diff) | |
Merge master into 3ca0-announcements-purgatory
Diffstat (limited to 'docs/archive/2026-01-relay-ngit-dev-migration/scripts')
10 files changed, 4440 insertions, 0 deletions
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/01-fetch-events.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/01-fetch-events.sh new file mode 100755 index 0000000..e0d6f26 --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/01-fetch-events.sh | |||
| @@ -0,0 +1,206 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | # | ||
| 3 | # 01-fetch-events.sh - Fetch nostr events from a relay for migration analysis | ||
| 4 | # | ||
| 5 | # PHASE 1 of the GRASP relay to ngit-grasp migration analysis pipeline. | ||
| 6 | # Fetches kind 30618 (state), 30617 (announcement), and 5 (deletion) events. | ||
| 7 | # | ||
| 8 | # USAGE: | ||
| 9 | # ./01-fetch-events.sh <relay-url> <output-dir> | ||
| 10 | # | ||
| 11 | # EXAMPLES: | ||
| 12 | # # Fetch from production relay | ||
| 13 | # ./01-fetch-events.sh wss://relay.ngit.dev output/prod | ||
| 14 | # | ||
| 15 | # # Fetch from archive relay | ||
| 16 | # ./01-fetch-events.sh wss://archive.relay.ngit.dev output/archive | ||
| 17 | # | ||
| 18 | # # Full migration analysis setup | ||
| 19 | # mkdir -p work/migration-analysis-$(date +%Y%m%d-%H%M) | ||
| 20 | # ./01-fetch-events.sh wss://relay.ngit.dev work/migration-analysis-*/prod | ||
| 21 | # ./01-fetch-events.sh wss://archive.relay.ngit.dev work/migration-analysis-*/archive | ||
| 22 | # | ||
| 23 | # OUTPUT: | ||
| 24 | # <output-dir>/raw/state-events.json - kind 30618 events (one per line, JSONL) | ||
| 25 | # <output-dir>/raw/announcements.json - kind 30617 events (one per line, JSONL) | ||
| 26 | # <output-dir>/raw/deletions.json - kind 5 events (one per line, JSONL) | ||
| 27 | # | ||
| 28 | # OUTPUT FORMAT: | ||
| 29 | # Each file contains one JSON event per line (JSONL format). | ||
| 30 | # Events are the raw nostr event objects as returned by the relay. | ||
| 31 | # | ||
| 32 | # PREREQUISITES: | ||
| 33 | # - nak (Nostr Army Knife) - https://github.com/fiatjaf/nak | ||
| 34 | # - jq (for counting/validation) | ||
| 35 | # | ||
| 36 | # RUNTIME: ~30 seconds per relay (depends on network and event count) | ||
| 37 | # | ||
| 38 | # NOTES: | ||
| 39 | # - Uses --paginate to ensure all events are fetched (not just first page) | ||
| 40 | # - If event counts are exact multiples of 250, pagination may have failed | ||
| 41 | # - Run Phase 1 and Phase 2 back-to-back for accurate snapshot | ||
| 42 | # | ||
| 43 | # SEE ALSO: | ||
| 44 | # docs/how-to/migrate-to-ngit-grasp.md - Full migration guide | ||
| 45 | # | ||
| 46 | |||
| 47 | set -euo pipefail | ||
| 48 | |||
| 49 | # Colors for output (disabled if not a terminal) | ||
| 50 | if [[ -t 1 ]]; then | ||
| 51 | RED='\033[0;31m' | ||
| 52 | GREEN='\033[0;32m' | ||
| 53 | YELLOW='\033[0;33m' | ||
| 54 | BLUE='\033[0;34m' | ||
| 55 | NC='\033[0m' # No Color | ||
| 56 | else | ||
| 57 | RED='' | ||
| 58 | GREEN='' | ||
| 59 | YELLOW='' | ||
| 60 | BLUE='' | ||
| 61 | NC='' | ||
| 62 | fi | ||
| 63 | |||
| 64 | log_info() { | ||
| 65 | echo -e "${BLUE}[INFO]${NC} $*" >&2 | ||
| 66 | } | ||
| 67 | |||
| 68 | log_success() { | ||
| 69 | echo -e "${GREEN}[OK]${NC} $*" >&2 | ||
| 70 | } | ||
| 71 | |||
| 72 | log_warn() { | ||
| 73 | echo -e "${YELLOW}[WARN]${NC} $*" >&2 | ||
| 74 | } | ||
| 75 | |||
| 76 | log_error() { | ||
| 77 | echo -e "${RED}[ERROR]${NC} $*" >&2 | ||
| 78 | } | ||
| 79 | |||
| 80 | usage() { | ||
| 81 | echo "Usage: $0 <relay-url> <output-dir>" | ||
| 82 | echo "" | ||
| 83 | echo "Arguments:" | ||
| 84 | echo " relay-url WebSocket URL of the relay (e.g., wss://relay.ngit.dev)" | ||
| 85 | echo " output-dir Directory to store fetched events (e.g., output/prod)" | ||
| 86 | echo "" | ||
| 87 | echo "Examples:" | ||
| 88 | echo " $0 wss://relay.ngit.dev output/prod" | ||
| 89 | echo " $0 wss://archive.relay.ngit.dev output/archive" | ||
| 90 | exit 1 | ||
| 91 | } | ||
| 92 | |||
| 93 | # Check prerequisites | ||
| 94 | check_prerequisites() { | ||
| 95 | local missing=0 | ||
| 96 | |||
| 97 | if ! command -v nak &> /dev/null; then | ||
| 98 | log_error "nak not found. Install from: https://github.com/fiatjaf/nak" | ||
| 99 | missing=1 | ||
| 100 | fi | ||
| 101 | |||
| 102 | if ! command -v jq &> /dev/null; then | ||
| 103 | log_error "jq not found. Install with your package manager." | ||
| 104 | missing=1 | ||
| 105 | fi | ||
| 106 | |||
| 107 | if [[ $missing -eq 1 ]]; then | ||
| 108 | exit 1 | ||
| 109 | fi | ||
| 110 | } | ||
| 111 | |||
| 112 | # Fetch events of a specific kind | ||
| 113 | # Args: $1=relay, $2=kind, $3=output_file, $4=description | ||
| 114 | fetch_kind() { | ||
| 115 | local relay="$1" | ||
| 116 | local kind="$2" | ||
| 117 | local output_file="$3" | ||
| 118 | local description="$4" | ||
| 119 | |||
| 120 | log_info "Fetching $description (kind $kind) from $relay..." | ||
| 121 | |||
| 122 | local start_time | ||
| 123 | start_time=$(date +%s) | ||
| 124 | |||
| 125 | # Use --paginate to ensure we get all events, not just first page | ||
| 126 | # nak outputs one event per line (JSONL format) | ||
| 127 | if ! nak req -k "$kind" --paginate "$relay" > "$output_file" 2>/dev/null; then | ||
| 128 | log_error "Failed to fetch $description from $relay" | ||
| 129 | return 1 | ||
| 130 | fi | ||
| 131 | |||
| 132 | local end_time | ||
| 133 | end_time=$(date +%s) | ||
| 134 | local duration=$((end_time - start_time)) | ||
| 135 | |||
| 136 | # Count events | ||
| 137 | local count | ||
| 138 | count=$(wc -l < "$output_file" | tr -d ' ') | ||
| 139 | |||
| 140 | # Warn if count is suspicious (exact multiple of 250 suggests pagination issue) | ||
| 141 | if [[ $count -gt 0 ]] && [[ $((count % 250)) -eq 0 ]]; then | ||
| 142 | log_warn "$description count ($count) is exact multiple of 250 - pagination may have failed!" | ||
| 143 | fi | ||
| 144 | |||
| 145 | log_success "Fetched $count $description in ${duration}s -> $output_file" | ||
| 146 | |||
| 147 | echo "$count" | ||
| 148 | } | ||
| 149 | |||
| 150 | # Main | ||
| 151 | main() { | ||
| 152 | if [[ $# -ne 2 ]]; then | ||
| 153 | usage | ||
| 154 | fi | ||
| 155 | |||
| 156 | local relay="$1" | ||
| 157 | local output_dir="$2" | ||
| 158 | |||
| 159 | # Validate relay URL | ||
| 160 | if [[ ! "$relay" =~ ^wss?:// ]]; then | ||
| 161 | log_error "Invalid relay URL: $relay (must start with ws:// or wss://)" | ||
| 162 | exit 1 | ||
| 163 | fi | ||
| 164 | |||
| 165 | check_prerequisites | ||
| 166 | |||
| 167 | log_info "Starting event fetch from $relay" | ||
| 168 | log_info "Output directory: $output_dir" | ||
| 169 | |||
| 170 | # Create output directory structure | ||
| 171 | local raw_dir="$output_dir/raw" | ||
| 172 | mkdir -p "$raw_dir" | ||
| 173 | |||
| 174 | local total_start | ||
| 175 | total_start=$(date +%s) | ||
| 176 | |||
| 177 | # Fetch each event type | ||
| 178 | local state_count announcement_count deletion_count | ||
| 179 | |||
| 180 | state_count=$(fetch_kind "$relay" 30618 "$raw_dir/state-events.json" "state events") | ||
| 181 | announcement_count=$(fetch_kind "$relay" 30617 "$raw_dir/announcements.json" "announcements") | ||
| 182 | deletion_count=$(fetch_kind "$relay" 5 "$raw_dir/deletions.json" "deletion requests") | ||
| 183 | |||
| 184 | local total_end | ||
| 185 | total_end=$(date +%s) | ||
| 186 | local total_duration=$((total_end - total_start)) | ||
| 187 | |||
| 188 | # Summary | ||
| 189 | echo "" | ||
| 190 | log_info "=== Fetch Summary ===" | ||
| 191 | log_info "Relay: $relay" | ||
| 192 | log_info "Output: $output_dir" | ||
| 193 | log_info "State events (30618): $state_count" | ||
| 194 | log_info "Announcements (30617): $announcement_count" | ||
| 195 | log_info "Deletions (5): $deletion_count" | ||
| 196 | log_info "Total time: ${total_duration}s" | ||
| 197 | echo "" | ||
| 198 | |||
| 199 | # Output file listing for easy copy/paste | ||
| 200 | log_info "Output files:" | ||
| 201 | echo " $raw_dir/state-events.json" | ||
| 202 | echo " $raw_dir/announcements.json" | ||
| 203 | echo " $raw_dir/deletions.json" | ||
| 204 | } | ||
| 205 | |||
| 206 | main "$@" | ||
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh new file mode 100755 index 0000000..b4536cb --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh | |||
| @@ -0,0 +1,564 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | # | ||
| 3 | # 10-check-git-sync.sh - Compare state events to actual git data on disk | ||
| 4 | # | ||
| 5 | # PHASE 2 of the GRASP relay to ngit-grasp migration analysis pipeline. | ||
| 6 | # Compares kind 30618 state events against actual git refs on disk. | ||
| 7 | # | ||
| 8 | # USAGE: | ||
| 9 | # ./10-check-git-sync.sh <state-events.json> <git-base-dir> <output-dir> [--categorize] | ||
| 10 | # | ||
| 11 | # EXAMPLES: | ||
| 12 | # # Check source relay against source git data | ||
| 13 | # ./10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod | ||
| 14 | # | ||
| 15 | # # Check target relay against target git data | ||
| 16 | # ./10-check-git-sync.sh output/archive/raw/state-events.json /var/lib/ngit-grasp/git output/archive | ||
| 17 | # | ||
| 18 | # # Check and categorize in one step (convenience mode) | ||
| 19 | # ./10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod --categorize | ||
| 20 | # | ||
| 21 | # INPUT: | ||
| 22 | # state-events.json - JSONL file from Phase 1 (01-fetch-events.sh) | ||
| 23 | # One kind 30618 event per line | ||
| 24 | # git-base-dir - Base directory containing git repos | ||
| 25 | # Structure: <git-base>/<npub>/<repo>.git/ | ||
| 26 | # | ||
| 27 | # OUTPUT: | ||
| 28 | # <output-dir>/git-sync-status.tsv - Tab-separated values: | ||
| 29 | # repo<TAB>npub<TAB>state_refs<TAB>git_refs<TAB>matches<TAB>reason | ||
| 30 | # | ||
| 31 | # With --categorize flag, also outputs: | ||
| 32 | # <output-dir>/category1-complete-match.txt | ||
| 33 | # <output-dir>/category2-empty-blank.txt | ||
| 34 | # <output-dir>/category3-partial-match.txt | ||
| 35 | # <output-dir>/category4-no-match.txt | ||
| 36 | # | ||
| 37 | # CATEGORIES: | ||
| 38 | # 1. Complete Match - All refs in state event match git data perfectly | ||
| 39 | # 2. Empty/Blank - No git data available (directory missing or empty) | ||
| 40 | # 3. Partial Match - Some refs match, some don't | ||
| 41 | # 4. No Match - Git data exists but commit hashes don't match | ||
| 42 | # | ||
| 43 | # PREREQUISITES: | ||
| 44 | # - nak (for npub encoding) - https://github.com/fiatjaf/nak | ||
| 45 | # - jq (for JSON parsing) | ||
| 46 | # - Read access to git directories (may need sudo) | ||
| 47 | # | ||
| 48 | # RUNTIME: ~20 minutes on VPS (git operations are slow) | ||
| 49 | # | ||
| 50 | # NOTES: | ||
| 51 | # - Must run on VPS with access to git directories | ||
| 52 | # - Progress indicator updates every 10 events | ||
| 53 | # - Handles packed refs (git show-ref) and loose refs | ||
| 54 | # | ||
| 55 | # SEE ALSO: | ||
| 56 | # docs/how-to/migrate-to-ngit-grasp.md - Full migration guide | ||
| 57 | # 01-fetch-events.sh - Phase 1 script that produces input for this script | ||
| 58 | # 20-categorize.sh - Phase 3a script that consumes output from this script | ||
| 59 | # | ||
| 60 | |||
| 61 | set -euo pipefail | ||
| 62 | |||
| 63 | # Colors for output (disabled if not a terminal) | ||
| 64 | if [[ -t 1 ]]; then | ||
| 65 | RED='\033[0;31m' | ||
| 66 | GREEN='\033[0;32m' | ||
| 67 | YELLOW='\033[0;33m' | ||
| 68 | BLUE='\033[0;34m' | ||
| 69 | NC='\033[0m' | ||
| 70 | else | ||
| 71 | RED='' | ||
| 72 | GREEN='' | ||
| 73 | YELLOW='' | ||
| 74 | BLUE='' | ||
| 75 | NC='' | ||
| 76 | fi | ||
| 77 | |||
| 78 | log_info() { | ||
| 79 | echo -e "${BLUE}[INFO]${NC} $*" >&2 | ||
| 80 | } | ||
| 81 | |||
| 82 | log_success() { | ||
| 83 | echo -e "${GREEN}[OK]${NC} $*" >&2 | ||
| 84 | } | ||
| 85 | |||
| 86 | log_warn() { | ||
| 87 | echo -e "${YELLOW}[WARN]${NC} $*" >&2 | ||
| 88 | } | ||
| 89 | |||
| 90 | log_error() { | ||
| 91 | echo -e "${RED}[ERROR]${NC} $*" >&2 | ||
| 92 | } | ||
| 93 | |||
| 94 | log_progress() { | ||
| 95 | # Overwrite current line for progress updates | ||
| 96 | echo -ne "\r${BLUE}[PROGRESS]${NC} $*" >&2 | ||
| 97 | } | ||
| 98 | |||
| 99 | usage() { | ||
| 100 | echo "Usage: $0 <state-events.json> <git-base-dir> <output-dir> [--categorize]" | ||
| 101 | echo "" | ||
| 102 | echo "Arguments:" | ||
| 103 | echo " state-events.json JSONL file from Phase 1 (kind 30618 events)" | ||
| 104 | echo " git-base-dir Base directory for git repos (e.g., /var/lib/grasp-relay/git)" | ||
| 105 | echo " output-dir Directory to store output files" | ||
| 106 | echo " --categorize Optional: also output category files (like Phase 3)" | ||
| 107 | echo "" | ||
| 108 | echo "Examples:" | ||
| 109 | echo " $0 output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod" | ||
| 110 | echo " $0 output/archive/raw/state-events.json /var/lib/ngit-grasp/git output/archive" | ||
| 111 | echo "" | ||
| 112 | echo "Output:" | ||
| 113 | echo " git-sync-status.tsv - TSV with: repo, npub, state_refs, git_refs, matches, reason" | ||
| 114 | exit 1 | ||
| 115 | } | ||
| 116 | |||
| 117 | # Check prerequisites | ||
| 118 | check_prerequisites() { | ||
| 119 | local missing=0 | ||
| 120 | |||
| 121 | if ! command -v git &> /dev/null; then | ||
| 122 | log_error "git not found. Install with your package manager." | ||
| 123 | missing=1 | ||
| 124 | fi | ||
| 125 | |||
| 126 | if ! command -v nak &> /dev/null; then | ||
| 127 | log_error "nak not found. Install from: https://github.com/fiatjaf/nak" | ||
| 128 | log_error "Or run: nix-shell -p nak jq --run \"$0 $*\"" | ||
| 129 | missing=1 | ||
| 130 | fi | ||
| 131 | |||
| 132 | if ! command -v jq &> /dev/null; then | ||
| 133 | log_error "jq not found. Install with your package manager." | ||
| 134 | missing=1 | ||
| 135 | fi | ||
| 136 | |||
| 137 | if [[ $missing -eq 1 ]]; then | ||
| 138 | exit 1 | ||
| 139 | fi | ||
| 140 | } | ||
| 141 | |||
| 142 | # Convert hex pubkey to npub | ||
| 143 | # Args: $1=hex_pubkey | ||
| 144 | # Returns: npub string or empty on error | ||
| 145 | hex_to_npub() { | ||
| 146 | local hex="$1" | ||
| 147 | nak encode npub "$hex" 2>/dev/null || echo "" | ||
| 148 | } | ||
| 149 | |||
| 150 | # Count refs in state event (only refs/heads/) | ||
| 151 | # Args: $1=event_json | ||
| 152 | # Returns: count | ||
| 153 | count_state_refs() { | ||
| 154 | local event="$1" | ||
| 155 | echo "$event" | jq '[.tags[] | select(.[0] | startswith("refs/heads/"))] | length' 2>/dev/null || echo "0" | ||
| 156 | } | ||
| 157 | |||
| 158 | # Get git refs from disk | ||
| 159 | # Args: $1=git_dir | ||
| 160 | # Returns: count of refs/heads/ refs | ||
| 161 | count_git_refs() { | ||
| 162 | local git_dir="$1" | ||
| 163 | |||
| 164 | if [[ ! -d "$git_dir" ]]; then | ||
| 165 | echo "0" | ||
| 166 | return | ||
| 167 | fi | ||
| 168 | |||
| 169 | # Try git show-ref first (handles packed refs correctly) | ||
| 170 | # Note: We capture output separately to avoid pipefail issues | ||
| 171 | local count | ||
| 172 | if count=$(git --git-dir="$git_dir" show-ref --heads 2>/dev/null | wc -l); then | ||
| 173 | echo "$count" | tr -d ' ' | ||
| 174 | return | ||
| 175 | fi | ||
| 176 | |||
| 177 | # Fallback: count loose refs (when git is not available or fails) | ||
| 178 | if [[ -d "$git_dir/refs/heads" ]]; then | ||
| 179 | find "$git_dir/refs/heads" -type f 2>/dev/null | wc -l | tr -d ' ' | ||
| 180 | else | ||
| 181 | echo "0" | ||
| 182 | fi | ||
| 183 | } | ||
| 184 | |||
| 185 | # Get ref hash from git directory | ||
| 186 | # Args: $1=git_dir, $2=ref_path (e.g., refs/heads/main) | ||
| 187 | # Returns: commit hash or empty | ||
| 188 | get_git_ref_hash() { | ||
| 189 | local git_dir="$1" | ||
| 190 | local ref_path="$2" | ||
| 191 | |||
| 192 | # Try git show-ref first (handles packed refs) | ||
| 193 | local hash | ||
| 194 | hash=$(git --git-dir="$git_dir" show-ref --hash "$ref_path" 2>/dev/null | head -1 || echo "") | ||
| 195 | |||
| 196 | if [[ -n "$hash" ]]; then | ||
| 197 | echo "$hash" | ||
| 198 | return | ||
| 199 | fi | ||
| 200 | |||
| 201 | # Fallback: read loose ref file | ||
| 202 | local ref_file="$git_dir/$ref_path" | ||
| 203 | if [[ -f "$ref_file" ]]; then | ||
| 204 | cat "$ref_file" 2>/dev/null | tr -d '\n' || echo "" | ||
| 205 | else | ||
| 206 | echo "" | ||
| 207 | fi | ||
| 208 | } | ||
| 209 | |||
| 210 | # Compare state event refs to git refs | ||
| 211 | # Args: $1=event_json, $2=git_dir | ||
| 212 | # Returns: count of matching refs | ||
| 213 | count_matching_refs() { | ||
| 214 | local event="$1" | ||
| 215 | local git_dir="$2" | ||
| 216 | local matching=0 | ||
| 217 | |||
| 218 | # Extract refs/heads/ tags and compare | ||
| 219 | while IFS= read -r ref_tag; do | ||
| 220 | [[ -z "$ref_tag" ]] && continue | ||
| 221 | |||
| 222 | local ref_path expected_hash | ||
| 223 | ref_path=$(echo "$ref_tag" | jq -r '.[0]' 2>/dev/null || echo "") | ||
| 224 | expected_hash=$(echo "$ref_tag" | jq -r '.[1]' 2>/dev/null || echo "") | ||
| 225 | |||
| 226 | # Skip if not a heads ref or hash is missing | ||
| 227 | [[ ! "$ref_path" =~ ^refs/heads/ ]] && continue | ||
| 228 | [[ -z "$expected_hash" || "$expected_hash" == "null" ]] && continue | ||
| 229 | |||
| 230 | # Get actual hash from git | ||
| 231 | local actual_hash | ||
| 232 | actual_hash=$(get_git_ref_hash "$git_dir" "$ref_path") | ||
| 233 | |||
| 234 | if [[ "$expected_hash" == "$actual_hash" ]]; then | ||
| 235 | matching=$((matching + 1)) | ||
| 236 | fi | ||
| 237 | done < <(echo "$event" | jq -c '.tags[] | select(.[0] | startswith("refs/heads/"))' 2>/dev/null) | ||
| 238 | |||
| 239 | echo "$matching" | ||
| 240 | } | ||
| 241 | |||
| 242 | # Categorize a single entry | ||
| 243 | # Args: $1=state_refs, $2=git_refs, $3=matches, $4=reason | ||
| 244 | # Returns: category number (1-4) | ||
| 245 | categorize_entry() { | ||
| 246 | local state_refs="$1" | ||
| 247 | local git_refs="$2" | ||
| 248 | local matches="$3" | ||
| 249 | local reason="$4" | ||
| 250 | |||
| 251 | # Category 2: Empty/Blank | ||
| 252 | if [[ -n "$reason" ]] || [[ "$git_refs" -eq 0 ]]; then | ||
| 253 | echo "2" | ||
| 254 | return | ||
| 255 | fi | ||
| 256 | |||
| 257 | # Category 1: Complete Match | ||
| 258 | if [[ "$state_refs" -gt 0 ]] && [[ "$state_refs" -eq "$git_refs" ]] && [[ "$matches" -eq "$state_refs" ]]; then | ||
| 259 | echo "1" | ||
| 260 | return | ||
| 261 | fi | ||
| 262 | |||
| 263 | # Category 4: No Match | ||
| 264 | if [[ "$git_refs" -gt 0 ]] && [[ "$matches" -eq 0 ]]; then | ||
| 265 | echo "4" | ||
| 266 | return | ||
| 267 | fi | ||
| 268 | |||
| 269 | # Category 3: Partial Match (default for anything else with matches > 0) | ||
| 270 | if [[ "$matches" -gt 0 ]]; then | ||
| 271 | echo "3" | ||
| 272 | return | ||
| 273 | fi | ||
| 274 | |||
| 275 | # Fallback to category 2 | ||
| 276 | echo "2" | ||
| 277 | } | ||
| 278 | |||
| 279 | # Format entry for category file | ||
| 280 | # Args: $1=repo, $2=npub, $3=state_refs, $4=git_refs, $5=matches, $6=reason | ||
| 281 | format_category_line() { | ||
| 282 | local repo="$1" | ||
| 283 | local npub="$2" | ||
| 284 | local state_refs="$3" | ||
| 285 | local git_refs="$4" | ||
| 286 | local matches="$5" | ||
| 287 | local reason="$6" | ||
| 288 | |||
| 289 | if [[ -n "$reason" ]]; then | ||
| 290 | echo "$repo | $npub | state_refs=$state_refs | git_refs=$git_refs | matches=$matches | reason=$reason" | ||
| 291 | else | ||
| 292 | echo "$repo | $npub | state_refs=$state_refs | git_refs=$git_refs | matches=$matches" | ||
| 293 | fi | ||
| 294 | } | ||
| 295 | |||
| 296 | # Process a single state event | ||
| 297 | # Args: $1=event_json, $2=git_base | ||
| 298 | # Outputs: TSV line to stdout | ||
| 299 | process_event() { | ||
| 300 | local event="$1" | ||
| 301 | local git_base="$2" | ||
| 302 | |||
| 303 | # Extract repository identifier (d tag) | ||
| 304 | local identifier | ||
| 305 | identifier=$(echo "$event" | jq -r '.tags[] | select(.[0] == "d") | .[1]' 2>/dev/null | head -1 || echo "") | ||
| 306 | |||
| 307 | if [[ -z "$identifier" ]]; then | ||
| 308 | return 1 | ||
| 309 | fi | ||
| 310 | |||
| 311 | # Extract maintainer pubkey (hex) | ||
| 312 | local hex_pubkey | ||
| 313 | hex_pubkey=$(echo "$event" | jq -r '.pubkey' 2>/dev/null || echo "") | ||
| 314 | |||
| 315 | if [[ -z "$hex_pubkey" ]]; then | ||
| 316 | return 1 | ||
| 317 | fi | ||
| 318 | |||
| 319 | # Convert to npub | ||
| 320 | local npub | ||
| 321 | npub=$(hex_to_npub "$hex_pubkey") | ||
| 322 | |||
| 323 | if [[ -z "$npub" ]]; then | ||
| 324 | return 1 | ||
| 325 | fi | ||
| 326 | |||
| 327 | # Count state refs | ||
| 328 | local state_refs | ||
| 329 | state_refs=$(count_state_refs "$event") | ||
| 330 | |||
| 331 | # Find git directory | ||
| 332 | local git_dir="$git_base/${npub}/${identifier}.git" | ||
| 333 | |||
| 334 | # Check git directory status | ||
| 335 | local git_refs=0 | ||
| 336 | local matches=0 | ||
| 337 | local reason="" | ||
| 338 | |||
| 339 | if [[ ! -d "$git_dir" ]]; then | ||
| 340 | reason="no_git_dir" | ||
| 341 | elif [[ ! -d "$git_dir/refs/heads" ]] && [[ ! -f "$git_dir/packed-refs" ]]; then | ||
| 342 | reason="empty_refs" | ||
| 343 | else | ||
| 344 | git_refs=$(count_git_refs "$git_dir") | ||
| 345 | |||
| 346 | if [[ "$git_refs" -eq 0 ]]; then | ||
| 347 | reason="empty_refs" | ||
| 348 | elif [[ "$state_refs" -eq 0 ]]; then | ||
| 349 | reason="no_state_refs" | ||
| 350 | else | ||
| 351 | matches=$(count_matching_refs "$event" "$git_dir") | ||
| 352 | fi | ||
| 353 | fi | ||
| 354 | |||
| 355 | # Output TSV line: repo, npub, state_refs, git_refs, matches, reason | ||
| 356 | printf '%s\t%s\t%s\t%s\t%s\t%s\n' "$identifier" "$npub" "$state_refs" "$git_refs" "$matches" "$reason" | ||
| 357 | } | ||
| 358 | |||
| 359 | # Main | ||
| 360 | main() { | ||
| 361 | local do_categorize=0 | ||
| 362 | local args=() | ||
| 363 | |||
| 364 | # Parse arguments | ||
| 365 | for arg in "$@"; do | ||
| 366 | if [[ "$arg" == "--categorize" ]]; then | ||
| 367 | do_categorize=1 | ||
| 368 | else | ||
| 369 | args+=("$arg") | ||
| 370 | fi | ||
| 371 | done | ||
| 372 | |||
| 373 | if [[ ${#args[@]} -ne 3 ]]; then | ||
| 374 | usage | ||
| 375 | fi | ||
| 376 | |||
| 377 | local state_events_file="${args[0]}" | ||
| 378 | local git_base="${args[1]}" | ||
| 379 | local output_dir="${args[2]}" | ||
| 380 | |||
| 381 | # Validate inputs | ||
| 382 | if [[ ! -f "$state_events_file" ]]; then | ||
| 383 | log_error "State events file not found: $state_events_file" | ||
| 384 | exit 1 | ||
| 385 | fi | ||
| 386 | |||
| 387 | if [[ ! -d "$git_base" ]]; then | ||
| 388 | log_error "Git base directory not found: $git_base" | ||
| 389 | log_error "This script must run on the VPS with access to git directories." | ||
| 390 | exit 1 | ||
| 391 | fi | ||
| 392 | |||
| 393 | # Check read permissions | ||
| 394 | if ! ls "$git_base" >/dev/null 2>&1; then | ||
| 395 | log_error "Cannot read git base directory (permission denied): $git_base" | ||
| 396 | log_error "Try running with sudo or grant read permissions." | ||
| 397 | exit 1 | ||
| 398 | fi | ||
| 399 | |||
| 400 | check_prerequisites | ||
| 401 | |||
| 402 | log_info "=== Git State Synchronization Check ===" | ||
| 403 | log_info "State events: $state_events_file" | ||
| 404 | log_info "Git base: $git_base" | ||
| 405 | log_info "Output: $output_dir" | ||
| 406 | if [[ $do_categorize -eq 1 ]]; then | ||
| 407 | log_info "Mode: TSV + categorization" | ||
| 408 | else | ||
| 409 | log_info "Mode: TSV only (use 20-categorize.sh for categories)" | ||
| 410 | fi | ||
| 411 | log_info "Started: $(date)" | ||
| 412 | echo "" | ||
| 413 | |||
| 414 | # Create output directory | ||
| 415 | mkdir -p "$output_dir" | ||
| 416 | |||
| 417 | # Output files | ||
| 418 | local tsv_file="$output_dir/git-sync-status.tsv" | ||
| 419 | |||
| 420 | # Initialize TSV with header | ||
| 421 | echo -e "repo\tnpub\tstate_refs\tgit_refs\tmatches\treason" > "$tsv_file" | ||
| 422 | |||
| 423 | # Initialize category files if categorizing | ||
| 424 | local cat1="" cat2="" cat3="" cat4="" | ||
| 425 | if [[ $do_categorize -eq 1 ]]; then | ||
| 426 | cat1="$output_dir/category1-complete-match.txt" | ||
| 427 | cat2="$output_dir/category2-empty-blank.txt" | ||
| 428 | cat3="$output_dir/category3-partial-match.txt" | ||
| 429 | cat4="$output_dir/category4-no-match.txt" | ||
| 430 | > "$cat1" | ||
| 431 | > "$cat2" | ||
| 432 | > "$cat3" | ||
| 433 | > "$cat4" | ||
| 434 | fi | ||
| 435 | |||
| 436 | # Count total events | ||
| 437 | local total_events | ||
| 438 | total_events=$(wc -l < "$state_events_file" | tr -d ' ') | ||
| 439 | log_info "Processing $total_events state events..." | ||
| 440 | echo "" | ||
| 441 | |||
| 442 | # Process each event | ||
| 443 | local count=0 | ||
| 444 | local processed=0 | ||
| 445 | local skipped=0 | ||
| 446 | local count_cat1=0 count_cat2=0 count_cat3=0 count_cat4=0 | ||
| 447 | local start_time | ||
| 448 | start_time=$(date +%s) | ||
| 449 | |||
| 450 | while IFS= read -r event; do | ||
| 451 | count=$((count + 1)) | ||
| 452 | |||
| 453 | # Skip empty lines | ||
| 454 | [[ -z "$event" ]] && continue | ||
| 455 | |||
| 456 | # Process event | ||
| 457 | local result | ||
| 458 | if result=$(process_event "$event" "$git_base"); then | ||
| 459 | processed=$((processed + 1)) | ||
| 460 | |||
| 461 | # Write to TSV (skip header line) | ||
| 462 | echo "$result" >> "$tsv_file" | ||
| 463 | |||
| 464 | # Categorize if requested | ||
| 465 | if [[ $do_categorize -eq 1 ]]; then | ||
| 466 | # Parse result | ||
| 467 | IFS=$'\t' read -r repo npub state_refs git_refs matches reason <<< "$result" | ||
| 468 | |||
| 469 | local category | ||
| 470 | category=$(categorize_entry "$state_refs" "$git_refs" "$matches" "$reason") | ||
| 471 | |||
| 472 | local cat_line | ||
| 473 | cat_line=$(format_category_line "$repo" "$npub" "$state_refs" "$git_refs" "$matches" "$reason") | ||
| 474 | |||
| 475 | case "$category" in | ||
| 476 | 1) echo "$cat_line" >> "$cat1"; count_cat1=$((count_cat1 + 1)) ;; | ||
| 477 | 2) echo "$cat_line" >> "$cat2"; count_cat2=$((count_cat2 + 1)) ;; | ||
| 478 | 3) echo "$cat_line" >> "$cat3"; count_cat3=$((count_cat3 + 1)) ;; | ||
| 479 | 4) echo "$cat_line" >> "$cat4"; count_cat4=$((count_cat4 + 1)) ;; | ||
| 480 | esac | ||
| 481 | fi | ||
| 482 | else | ||
| 483 | skipped=$((skipped + 1)) | ||
| 484 | fi | ||
| 485 | |||
| 486 | # Progress indicator every 10 events | ||
| 487 | if [[ $((count % 10)) -eq 0 ]]; then | ||
| 488 | local elapsed=$(($(date +%s) - start_time)) | ||
| 489 | local rate=0 | ||
| 490 | if [[ $elapsed -gt 0 ]]; then | ||
| 491 | rate=$((count / elapsed)) | ||
| 492 | fi | ||
| 493 | local eta="?" | ||
| 494 | if [[ $rate -gt 0 ]]; then | ||
| 495 | eta=$(( (total_events - count) / rate )) | ||
| 496 | fi | ||
| 497 | log_progress "Processed $count/$total_events events (~${rate}/s, ETA: ${eta}s)..." | ||
| 498 | fi | ||
| 499 | done < "$state_events_file" | ||
| 500 | |||
| 501 | # Clear progress line | ||
| 502 | echo "" >&2 | ||
| 503 | |||
| 504 | local end_time | ||
| 505 | end_time=$(date +%s) | ||
| 506 | local duration=$((end_time - start_time)) | ||
| 507 | |||
| 508 | # Summary | ||
| 509 | echo "" | ||
| 510 | log_info "=== Analysis Complete ===" | ||
| 511 | log_info "Finished: $(date)" | ||
| 512 | log_info "Duration: ${duration}s" | ||
| 513 | log_info "Processed: $processed events" | ||
| 514 | if [[ $skipped -gt 0 ]]; then | ||
| 515 | log_warn "Skipped: $skipped events (missing identifier or pubkey)" | ||
| 516 | fi | ||
| 517 | echo "" | ||
| 518 | |||
| 519 | if [[ $do_categorize -eq 1 ]]; then | ||
| 520 | # Calculate percentages | ||
| 521 | local total=$((count_cat1 + count_cat2 + count_cat3 + count_cat4)) | ||
| 522 | local pct1=0 pct2=0 pct3=0 pct4=0 | ||
| 523 | if [[ $total -gt 0 ]]; then | ||
| 524 | pct1=$(awk "BEGIN {printf \"%.1f\", ($count_cat1/$total)*100}") | ||
| 525 | pct2=$(awk "BEGIN {printf \"%.1f\", ($count_cat2/$total)*100}") | ||
| 526 | pct3=$(awk "BEGIN {printf \"%.1f\", ($count_cat3/$total)*100}") | ||
| 527 | pct4=$(awk "BEGIN {printf \"%.1f\", ($count_cat4/$total)*100}") | ||
| 528 | fi | ||
| 529 | |||
| 530 | log_info "=== Category Summary ===" | ||
| 531 | log_success "Category 1 (Complete Match): $count_cat1 ($pct1%)" | ||
| 532 | log_warn "Category 2 (Empty/Blank): $count_cat2 ($pct2%)" | ||
| 533 | log_warn "Category 3 (Partial Match): $count_cat3 ($pct3%)" | ||
| 534 | log_error "Category 4 (No Match): $count_cat4 ($pct4%)" | ||
| 535 | echo "" | ||
| 536 | |||
| 537 | # Validation warning | ||
| 538 | if [[ $count_cat2 -eq $total ]] && [[ $total -gt 0 ]]; then | ||
| 539 | log_error "WARNING: 100% of repos categorized as Empty/Blank" | ||
| 540 | log_error "This usually indicates a permission or path issue." | ||
| 541 | echo "" | ||
| 542 | log_info "Troubleshooting:" | ||
| 543 | echo " 1. Verify git data exists: sudo ls -la $git_base | head -10" | ||
| 544 | echo " 2. Check sample repo: sudo find $git_base -name '*.git' -type d | head -1" | ||
| 545 | echo " 3. Re-run with sudo if not already using it" | ||
| 546 | echo "" | ||
| 547 | fi | ||
| 548 | fi | ||
| 549 | |||
| 550 | log_info "Output files:" | ||
| 551 | echo " $tsv_file" | ||
| 552 | if [[ $do_categorize -eq 1 ]]; then | ||
| 553 | echo " $cat1" | ||
| 554 | echo " $cat2" | ||
| 555 | echo " $cat3" | ||
| 556 | echo " $cat4" | ||
| 557 | else | ||
| 558 | echo "" | ||
| 559 | log_info "Next step: Run 20-categorize.sh to categorize results" | ||
| 560 | echo " ./20-categorize.sh $tsv_file $output_dir" | ||
| 561 | fi | ||
| 562 | } | ||
| 563 | |||
| 564 | main "$@" | ||
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/20-categorize.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/20-categorize.sh new file mode 100755 index 0000000..b38dc00 --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/20-categorize.sh | |||
| @@ -0,0 +1,212 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | # | ||
| 3 | # 20-categorize.sh - Categorize git sync status into 4 categories | ||
| 4 | # | ||
| 5 | # PHASE 3a of the GRASP relay to ngit-grasp migration analysis pipeline. | ||
| 6 | # Takes git-sync-status.tsv from Phase 2 and categorizes into 4 files. | ||
| 7 | # | ||
| 8 | # USAGE: | ||
| 9 | # ./20-categorize.sh <git-sync-status.tsv> <output-dir> | ||
| 10 | # | ||
| 11 | # EXAMPLES: | ||
| 12 | # ./20-categorize.sh output/prod/git-sync-status.tsv output/prod | ||
| 13 | # ./20-categorize.sh output/archive/git-sync-status.tsv output/archive | ||
| 14 | # | ||
| 15 | # INPUT FORMAT (git-sync-status.tsv): | ||
| 16 | # Tab-separated values with columns: | ||
| 17 | # repo<TAB>npub<TAB>state_refs<TAB>git_refs<TAB>matches<TAB>reason | ||
| 18 | # | ||
| 19 | # Where reason is optional and can be: no_git_dir, empty_refs, no_state_refs | ||
| 20 | # | ||
| 21 | # OUTPUT: | ||
| 22 | # <output-dir>/category1-complete-match.txt - All refs match perfectly | ||
| 23 | # <output-dir>/category2-empty-blank.txt - No git data available | ||
| 24 | # <output-dir>/category3-partial-match.txt - Some refs match | ||
| 25 | # <output-dir>/category4-no-match.txt - Git exists but refs don't match | ||
| 26 | # | ||
| 27 | # OUTPUT FORMAT: | ||
| 28 | # repo | npub | state_refs=N | git_refs=N | matches=N [| reason=X] | ||
| 29 | # | ||
| 30 | # CATEGORIES: | ||
| 31 | # 1. Complete Match: state_refs == git_refs == matches (all > 0) | ||
| 32 | # 2. Empty/Blank: git_refs == 0 OR reason in (no_git_dir, empty_refs, no_state_refs) | ||
| 33 | # 3. Partial Match: matches > 0 AND matches < state_refs | ||
| 34 | # 4. No Match: git_refs > 0 AND matches == 0 | ||
| 35 | # | ||
| 36 | # PREREQUISITES: | ||
| 37 | # - awk (standard Unix tool) | ||
| 38 | # | ||
| 39 | # RUNTIME: < 1 second (local processing only) | ||
| 40 | # | ||
| 41 | # SEE ALSO: | ||
| 42 | # docs/how-to/migrate-to-ngit-grasp.md - Full migration guide | ||
| 43 | # 10-check-git-sync.sh - Phase 2 script that produces input for this script | ||
| 44 | # | ||
| 45 | |||
| 46 | set -euo pipefail | ||
| 47 | |||
| 48 | # Colors for output (disabled if not a terminal) | ||
| 49 | if [[ -t 1 ]]; then | ||
| 50 | RED='\033[0;31m' | ||
| 51 | GREEN='\033[0;32m' | ||
| 52 | YELLOW='\033[0;33m' | ||
| 53 | BLUE='\033[0;34m' | ||
| 54 | NC='\033[0m' | ||
| 55 | else | ||
| 56 | RED='' | ||
| 57 | GREEN='' | ||
| 58 | YELLOW='' | ||
| 59 | BLUE='' | ||
| 60 | NC='' | ||
| 61 | fi | ||
| 62 | |||
| 63 | log_info() { | ||
| 64 | echo -e "${BLUE}[INFO]${NC} $*" >&2 | ||
| 65 | } | ||
| 66 | |||
| 67 | log_success() { | ||
| 68 | echo -e "${GREEN}[OK]${NC} $*" >&2 | ||
| 69 | } | ||
| 70 | |||
| 71 | log_warn() { | ||
| 72 | echo -e "${YELLOW}[WARN]${NC} $*" >&2 | ||
| 73 | } | ||
| 74 | |||
| 75 | log_error() { | ||
| 76 | echo -e "${RED}[ERROR]${NC} $*" >&2 | ||
| 77 | } | ||
| 78 | |||
| 79 | usage() { | ||
| 80 | echo "Usage: $0 <git-sync-status.tsv> <output-dir>" | ||
| 81 | echo "" | ||
| 82 | echo "Arguments:" | ||
| 83 | echo " git-sync-status.tsv TSV file from Phase 2 (10-check-git-sync.sh)" | ||
| 84 | echo " output-dir Directory to store categorized output" | ||
| 85 | echo "" | ||
| 86 | echo "Examples:" | ||
| 87 | echo " $0 output/prod/git-sync-status.tsv output/prod" | ||
| 88 | echo " $0 output/archive/git-sync-status.tsv output/archive" | ||
| 89 | echo "" | ||
| 90 | echo "Input format (TSV):" | ||
| 91 | echo " repo<TAB>npub<TAB>state_refs<TAB>git_refs<TAB>matches<TAB>reason" | ||
| 92 | echo "" | ||
| 93 | echo "Output files:" | ||
| 94 | echo " category1-complete-match.txt - All refs match" | ||
| 95 | echo " category2-empty-blank.txt - No git data" | ||
| 96 | echo " category3-partial-match.txt - Some refs match" | ||
| 97 | echo " category4-no-match.txt - Git exists, refs don't match" | ||
| 98 | exit 1 | ||
| 99 | } | ||
| 100 | |||
| 101 | # Main | ||
| 102 | main() { | ||
| 103 | if [[ $# -ne 2 ]]; then | ||
| 104 | usage | ||
| 105 | fi | ||
| 106 | |||
| 107 | local input_file="$1" | ||
| 108 | local output_dir="$2" | ||
| 109 | |||
| 110 | # Validate input file | ||
| 111 | if [[ ! -f "$input_file" ]]; then | ||
| 112 | log_error "Input file not found: $input_file" | ||
| 113 | exit 1 | ||
| 114 | fi | ||
| 115 | |||
| 116 | log_info "Categorizing git sync status" | ||
| 117 | log_info "Input: $input_file" | ||
| 118 | log_info "Output: $output_dir" | ||
| 119 | |||
| 120 | # Create output directory | ||
| 121 | mkdir -p "$output_dir" | ||
| 122 | |||
| 123 | # Output files | ||
| 124 | local cat1="$output_dir/category1-complete-match.txt" | ||
| 125 | local cat2="$output_dir/category2-empty-blank.txt" | ||
| 126 | local cat3="$output_dir/category3-partial-match.txt" | ||
| 127 | local cat4="$output_dir/category4-no-match.txt" | ||
| 128 | |||
| 129 | # Clear previous results | ||
| 130 | > "$cat1" | ||
| 131 | > "$cat2" | ||
| 132 | > "$cat3" | ||
| 133 | > "$cat4" | ||
| 134 | |||
| 135 | # Process input file with awk | ||
| 136 | # Input: repo<TAB>npub<TAB>state_refs<TAB>git_refs<TAB>matches<TAB>reason | ||
| 137 | awk -F'\t' -v cat1="$cat1" -v cat2="$cat2" -v cat3="$cat3" -v cat4="$cat4" ' | ||
| 138 | BEGIN { | ||
| 139 | count1 = 0; count2 = 0; count3 = 0; count4 = 0 | ||
| 140 | } | ||
| 141 | NR == 1 && /^repo/ { next } # Skip header if present | ||
| 142 | NF >= 5 { | ||
| 143 | repo = $1 | ||
| 144 | npub = $2 | ||
| 145 | state_refs = int($3) | ||
| 146 | git_refs = int($4) | ||
| 147 | matches = int($5) | ||
| 148 | reason = (NF >= 6) ? $6 : "" | ||
| 149 | |||
| 150 | # Format output line | ||
| 151 | if (reason != "") { | ||
| 152 | line = repo " | " npub " | state_refs=" state_refs " | git_refs=" git_refs " | matches=" matches " | reason=" reason | ||
| 153 | } else { | ||
| 154 | line = repo " | " npub " | state_refs=" state_refs " | git_refs=" git_refs " | matches=" matches | ||
| 155 | } | ||
| 156 | |||
| 157 | # Categorize | ||
| 158 | if (reason == "no_git_dir" || reason == "empty_refs" || reason == "no_state_refs" || git_refs == 0) { | ||
| 159 | # Category 2: Empty/Blank | ||
| 160 | print line >> cat2 | ||
| 161 | count2++ | ||
| 162 | } else if (state_refs > 0 && state_refs == git_refs && matches == state_refs) { | ||
| 163 | # Category 1: Complete Match | ||
| 164 | print line >> cat1 | ||
| 165 | count1++ | ||
| 166 | } else if (matches > 0 && matches < state_refs) { | ||
| 167 | # Category 3: Partial Match | ||
| 168 | print line >> cat3 | ||
| 169 | count3++ | ||
| 170 | } else if (git_refs > 0 && matches == 0) { | ||
| 171 | # Category 4: No Match | ||
| 172 | print line >> cat4 | ||
| 173 | count4++ | ||
| 174 | } else if (matches > 0) { | ||
| 175 | # Edge case: matches > 0 but does not fit other categories | ||
| 176 | # This can happen when git_refs > state_refs but all state refs match | ||
| 177 | # Treat as partial match | ||
| 178 | print line >> cat3 | ||
| 179 | count3++ | ||
| 180 | } else { | ||
| 181 | # Fallback: treat as category 2 (empty/blank) | ||
| 182 | print line >> cat2 | ||
| 183 | count2++ | ||
| 184 | } | ||
| 185 | } | ||
| 186 | END { | ||
| 187 | total = count1 + count2 + count3 + count4 | ||
| 188 | print "COUNTS:" count1 ":" count2 ":" count3 ":" count4 ":" total | ||
| 189 | } | ||
| 190 | ' "$input_file" 2>&1 | while IFS= read -r line; do | ||
| 191 | if [[ "$line" =~ ^COUNTS: ]]; then | ||
| 192 | # Parse counts from awk output | ||
| 193 | IFS=':' read -r _ c1 c2 c3 c4 total <<< "$line" | ||
| 194 | |||
| 195 | echo "" | ||
| 196 | log_info "=== Categorization Summary ===" | ||
| 197 | log_info "Total entries: $total" | ||
| 198 | log_success "Category 1 (Complete Match): $c1" | ||
| 199 | log_warn "Category 2 (Empty/Blank): $c2" | ||
| 200 | log_warn "Category 3 (Partial Match): $c3" | ||
| 201 | log_error "Category 4 (No Match): $c4" | ||
| 202 | echo "" | ||
| 203 | log_info "Output files:" | ||
| 204 | echo " $cat1" | ||
| 205 | echo " $cat2" | ||
| 206 | echo " $cat3" | ||
| 207 | echo " $cat4" | ||
| 208 | fi | ||
| 209 | done | ||
| 210 | } | ||
| 211 | |||
| 212 | main "$@" | ||
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh new file mode 100755 index 0000000..b9c0d30 --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh | |||
| @@ -0,0 +1,294 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | # | ||
| 3 | # 21-compare-relays.sh - Compare prod vs archive category files to find gaps | ||
| 4 | # | ||
| 5 | # PHASE 3b of the GRASP relay to ngit-grasp migration analysis pipeline. | ||
| 6 | # Compares categorized output from prod and archive to identify: | ||
| 7 | # - Repos complete in prod but missing/incomplete in archive | ||
| 8 | # - Repos in archive but not in prod | ||
| 9 | # - Status differences between relays | ||
| 10 | # | ||
| 11 | # USAGE: | ||
| 12 | # ./21-compare-relays.sh <prod-dir> <archive-dir> <output-dir> | ||
| 13 | # | ||
| 14 | # EXAMPLES: | ||
| 15 | # ./21-compare-relays.sh output/prod output/archive output/comparison | ||
| 16 | # | ||
| 17 | # INPUT: | ||
| 18 | # Both prod-dir and archive-dir must contain: | ||
| 19 | # - category1-complete-match.txt | ||
| 20 | # - category2-empty-blank.txt | ||
| 21 | # - category3-partial-match.txt | ||
| 22 | # - category4-no-match.txt | ||
| 23 | # | ||
| 24 | # OUTPUT: | ||
| 25 | # <output-dir>/complete-in-both.txt - Repos complete in both relays (no action) | ||
| 26 | # <output-dir>/complete-prod-missing-archive.txt - Complete in prod, not in archive cat1 | ||
| 27 | # <output-dir>/complete-prod-incomplete-archive.txt - Complete in prod, incomplete in archive | ||
| 28 | # <output-dir>/incomplete-in-both.txt - Incomplete in both relays | ||
| 29 | # <output-dir>/in-archive-not-prod.txt - In archive but not in prod | ||
| 30 | # <output-dir>/summary.txt - Human-readable summary | ||
| 31 | # | ||
| 32 | # OUTPUT FORMAT: | ||
| 33 | # Each file contains lines in the format: | ||
| 34 | # repo | npub | prod_status | archive_status | ||
| 35 | # | ||
| 36 | # PREREQUISITES: | ||
| 37 | # - awk, sort, comm (standard Unix tools) | ||
| 38 | # | ||
| 39 | # RUNTIME: < 1 second (local processing only) | ||
| 40 | # | ||
| 41 | # SEE ALSO: | ||
| 42 | # docs/how-to/migrate-to-ngit-grasp.md - Full migration guide | ||
| 43 | # 20-categorize.sh - Phase 3a script that produces input for this script | ||
| 44 | # | ||
| 45 | |||
| 46 | set -euo pipefail | ||
| 47 | |||
| 48 | # Colors for output (disabled if not a terminal) | ||
| 49 | if [[ -t 1 ]]; then | ||
| 50 | RED='\033[0;31m' | ||
| 51 | GREEN='\033[0;32m' | ||
| 52 | YELLOW='\033[0;33m' | ||
| 53 | BLUE='\033[0;34m' | ||
| 54 | NC='\033[0m' | ||
| 55 | else | ||
| 56 | RED='' | ||
| 57 | GREEN='' | ||
| 58 | YELLOW='' | ||
| 59 | BLUE='' | ||
| 60 | NC='' | ||
| 61 | fi | ||
| 62 | |||
| 63 | log_info() { | ||
| 64 | echo -e "${BLUE}[INFO]${NC} $*" >&2 | ||
| 65 | } | ||
| 66 | |||
| 67 | log_success() { | ||
| 68 | echo -e "${GREEN}[OK]${NC} $*" >&2 | ||
| 69 | } | ||
| 70 | |||
| 71 | log_warn() { | ||
| 72 | echo -e "${YELLOW}[WARN]${NC} $*" >&2 | ||
| 73 | } | ||
| 74 | |||
| 75 | log_error() { | ||
| 76 | echo -e "${RED}[ERROR]${NC} $*" >&2 | ||
| 77 | } | ||
| 78 | |||
| 79 | usage() { | ||
| 80 | echo "Usage: $0 <prod-dir> <archive-dir> <output-dir>" | ||
| 81 | echo "" | ||
| 82 | echo "Arguments:" | ||
| 83 | echo " prod-dir Directory containing prod category files" | ||
| 84 | echo " archive-dir Directory containing archive category files" | ||
| 85 | echo " output-dir Directory to store comparison results" | ||
| 86 | echo "" | ||
| 87 | echo "Examples:" | ||
| 88 | echo " $0 output/prod output/archive output/comparison" | ||
| 89 | echo "" | ||
| 90 | echo "Required input files in each directory:" | ||
| 91 | echo " category1-complete-match.txt" | ||
| 92 | echo " category2-empty-blank.txt" | ||
| 93 | echo " category3-partial-match.txt" | ||
| 94 | echo " category4-no-match.txt" | ||
| 95 | exit 1 | ||
| 96 | } | ||
| 97 | |||
| 98 | # Extract repo|npub key from category line | ||
| 99 | # Input: "repo | npub | state_refs=N | ..." | ||
| 100 | # Output: "repo|npub" | ||
| 101 | extract_key() { | ||
| 102 | awk -F' \\| ' '{print $1 "|" $2}' | ||
| 103 | } | ||
| 104 | |||
| 105 | # Build lookup table from category files | ||
| 106 | # Args: $1=directory, $2=output_file | ||
| 107 | build_lookup() { | ||
| 108 | local dir="$1" | ||
| 109 | local output="$2" | ||
| 110 | |||
| 111 | # Process all 4 category files | ||
| 112 | for cat in 1 2 3 4; do | ||
| 113 | local file="$dir/category${cat}-*.txt" | ||
| 114 | # shellcheck disable=SC2086 | ||
| 115 | if ls $file 1>/dev/null 2>&1; then | ||
| 116 | # shellcheck disable=SC2086 | ||
| 117 | cat $file | while IFS= read -r line; do | ||
| 118 | key=$(echo "$line" | extract_key) | ||
| 119 | echo "${key}|cat${cat}|${line}" | ||
| 120 | done | ||
| 121 | fi | ||
| 122 | done | sort -t'|' -k1,2 > "$output" | ||
| 123 | } | ||
| 124 | |||
| 125 | # Main | ||
| 126 | main() { | ||
| 127 | if [[ $# -ne 3 ]]; then | ||
| 128 | usage | ||
| 129 | fi | ||
| 130 | |||
| 131 | local prod_dir="$1" | ||
| 132 | local archive_dir="$2" | ||
| 133 | local output_dir="$3" | ||
| 134 | |||
| 135 | # Validate input directories | ||
| 136 | for dir in "$prod_dir" "$archive_dir"; do | ||
| 137 | if [[ ! -d "$dir" ]]; then | ||
| 138 | log_error "Directory not found: $dir" | ||
| 139 | exit 1 | ||
| 140 | fi | ||
| 141 | if [[ ! -f "$dir/category1-complete-match.txt" ]]; then | ||
| 142 | log_error "Missing category1-complete-match.txt in $dir" | ||
| 143 | exit 1 | ||
| 144 | fi | ||
| 145 | done | ||
| 146 | |||
| 147 | log_info "Comparing relay categories" | ||
| 148 | log_info "Prod: $prod_dir" | ||
| 149 | log_info "Archive: $archive_dir" | ||
| 150 | log_info "Output: $output_dir" | ||
| 151 | |||
| 152 | # Create output directory | ||
| 153 | mkdir -p "$output_dir" | ||
| 154 | |||
| 155 | # Create temp files for processing | ||
| 156 | local tmp_dir | ||
| 157 | tmp_dir=$(mktemp -d) | ||
| 158 | # shellcheck disable=SC2064 | ||
| 159 | trap "rm -rf '$tmp_dir'" EXIT | ||
| 160 | |||
| 161 | log_info "Building lookup tables..." | ||
| 162 | |||
| 163 | # Build lookup tables: key|category|full_line | ||
| 164 | build_lookup "$prod_dir" "$tmp_dir/prod_lookup.txt" | ||
| 165 | build_lookup "$archive_dir" "$tmp_dir/archive_lookup.txt" | ||
| 166 | |||
| 167 | # Extract just keys for comparison | ||
| 168 | cut -d'|' -f1,2 "$tmp_dir/prod_lookup.txt" | sort -u > "$tmp_dir/prod_keys.txt" | ||
| 169 | cut -d'|' -f1,2 "$tmp_dir/archive_lookup.txt" | sort -u > "$tmp_dir/archive_keys.txt" | ||
| 170 | |||
| 171 | log_info "Comparing categories..." | ||
| 172 | |||
| 173 | # Initialize output files | ||
| 174 | > "$output_dir/complete-in-both.txt" | ||
| 175 | > "$output_dir/complete-prod-missing-archive.txt" | ||
| 176 | > "$output_dir/complete-prod-incomplete-archive.txt" | ||
| 177 | > "$output_dir/incomplete-in-both.txt" | ||
| 178 | > "$output_dir/in-archive-not-prod.txt" | ||
| 179 | |||
| 180 | # Process prod category 1 (complete) entries | ||
| 181 | while IFS='|' read -r repo npub cat full_line; do | ||
| 182 | key="${repo}|${npub}" | ||
| 183 | |||
| 184 | # Look up in archive | ||
| 185 | archive_entry=$(grep "^${key}|" "$tmp_dir/archive_lookup.txt" 2>/dev/null | head -1 || echo "") | ||
| 186 | |||
| 187 | if [[ -z "$archive_entry" ]]; then | ||
| 188 | # Not in archive at all | ||
| 189 | echo "$repo | $npub | prod=complete | archive=missing" >> "$output_dir/complete-prod-missing-archive.txt" | ||
| 190 | else | ||
| 191 | archive_cat=$(echo "$archive_entry" | cut -d'|' -f3) | ||
| 192 | if [[ "$archive_cat" == "cat1" ]]; then | ||
| 193 | # Complete in both | ||
| 194 | echo "$repo | $npub | prod=complete | archive=complete" >> "$output_dir/complete-in-both.txt" | ||
| 195 | else | ||
| 196 | # Complete in prod, incomplete in archive | ||
| 197 | echo "$repo | $npub | prod=complete | archive=$archive_cat" >> "$output_dir/complete-prod-incomplete-archive.txt" | ||
| 198 | fi | ||
| 199 | fi | ||
| 200 | done < <(grep '|cat1|' "$tmp_dir/prod_lookup.txt" | sed 's/|cat1|/|cat1|/') | ||
| 201 | |||
| 202 | # Process prod categories 2-4 (incomplete) entries | ||
| 203 | for cat in cat2 cat3 cat4; do | ||
| 204 | while IFS='|' read -r repo npub _ full_line; do | ||
| 205 | key="${repo}|${npub}" | ||
| 206 | |||
| 207 | # Look up in archive | ||
| 208 | archive_entry=$(grep "^${key}|" "$tmp_dir/archive_lookup.txt" 2>/dev/null | head -1 || echo "") | ||
| 209 | |||
| 210 | if [[ -z "$archive_entry" ]]; then | ||
| 211 | # Incomplete in prod, missing in archive | ||
| 212 | echo "$repo | $npub | prod=$cat | archive=missing" >> "$output_dir/incomplete-in-both.txt" | ||
| 213 | else | ||
| 214 | archive_cat=$(echo "$archive_entry" | cut -d'|' -f3) | ||
| 215 | if [[ "$archive_cat" != "cat1" ]]; then | ||
| 216 | # Incomplete in both | ||
| 217 | echo "$repo | $npub | prod=$cat | archive=$archive_cat" >> "$output_dir/incomplete-in-both.txt" | ||
| 218 | fi | ||
| 219 | # If archive is complete but prod is not, that's unusual but not an error | ||
| 220 | fi | ||
| 221 | done < <(grep "|${cat}|" "$tmp_dir/prod_lookup.txt") | ||
| 222 | done | ||
| 223 | |||
| 224 | # Find entries in archive but not in prod | ||
| 225 | comm -23 "$tmp_dir/archive_keys.txt" "$tmp_dir/prod_keys.txt" | while IFS='|' read -r repo npub; do | ||
| 226 | key="${repo}|${npub}" | ||
| 227 | archive_entry=$(grep "^${key}|" "$tmp_dir/archive_lookup.txt" 2>/dev/null | head -1 || echo "") | ||
| 228 | archive_cat=$(echo "$archive_entry" | cut -d'|' -f3) | ||
| 229 | echo "$repo | $npub | prod=missing | archive=$archive_cat" >> "$output_dir/in-archive-not-prod.txt" | ||
| 230 | done | ||
| 231 | |||
| 232 | # Count results | ||
| 233 | local count_both count_missing count_incomplete count_both_incomplete count_archive_only | ||
| 234 | count_both=$(wc -l < "$output_dir/complete-in-both.txt" | tr -d ' ') | ||
| 235 | count_missing=$(wc -l < "$output_dir/complete-prod-missing-archive.txt" | tr -d ' ') | ||
| 236 | count_incomplete=$(wc -l < "$output_dir/complete-prod-incomplete-archive.txt" | tr -d ' ') | ||
| 237 | count_both_incomplete=$(wc -l < "$output_dir/incomplete-in-both.txt" | tr -d ' ') | ||
| 238 | count_archive_only=$(wc -l < "$output_dir/in-archive-not-prod.txt" | tr -d ' ') | ||
| 239 | |||
| 240 | # Generate summary | ||
| 241 | cat > "$output_dir/summary.txt" << EOF | ||
| 242 | # Relay Comparison Summary | ||
| 243 | Generated: $(date -Iseconds) | ||
| 244 | |||
| 245 | ## Input | ||
| 246 | - Prod: $prod_dir | ||
| 247 | - Archive: $archive_dir | ||
| 248 | |||
| 249 | ## Results | ||
| 250 | |||
| 251 | ### No Action Required | ||
| 252 | - Complete in both relays: $count_both | ||
| 253 | |||
| 254 | ### Action/Decision Required | ||
| 255 | - Complete in prod, MISSING from archive: $count_missing | ||
| 256 | - Complete in prod, INCOMPLETE in archive: $count_incomplete | ||
| 257 | - Incomplete in BOTH relays: $count_both_incomplete | ||
| 258 | |||
| 259 | ### For Reference | ||
| 260 | - In archive but not in prod: $count_archive_only | ||
| 261 | |||
| 262 | ## Files | ||
| 263 | - complete-in-both.txt: Repos successfully migrated (no action) | ||
| 264 | - complete-prod-missing-archive.txt: Need investigation - why not in archive? | ||
| 265 | - complete-prod-incomplete-archive.txt: Archive sync may still be in progress | ||
| 266 | - incomplete-in-both.txt: Git data incomplete on both relays | ||
| 267 | - in-archive-not-prod.txt: May be deleted from prod or new to archive | ||
| 268 | |||
| 269 | ## Next Steps | ||
| 270 | 1. Review complete-prod-missing-archive.txt - these repos need attention | ||
| 271 | 2. Check if archive sync is still running for incomplete entries | ||
| 272 | 3. Cross-reference with deletion events (kind 5) from Phase 1 | ||
| 273 | 4. Use Phase 4 logs to understand parse failures and purgatory expiry | ||
| 274 | EOF | ||
| 275 | |||
| 276 | # Display summary | ||
| 277 | echo "" | ||
| 278 | log_info "=== Comparison Summary ===" | ||
| 279 | log_success "Complete in both: $count_both (no action needed)" | ||
| 280 | log_error "Complete in prod, MISSING from archive: $count_missing" | ||
| 281 | log_warn "Complete in prod, incomplete in archive: $count_incomplete" | ||
| 282 | log_warn "Incomplete in both: $count_both_incomplete" | ||
| 283 | log_info "In archive only: $count_archive_only" | ||
| 284 | echo "" | ||
| 285 | log_info "Output files:" | ||
| 286 | echo " $output_dir/complete-in-both.txt" | ||
| 287 | echo " $output_dir/complete-prod-missing-archive.txt" | ||
| 288 | echo " $output_dir/complete-prod-incomplete-archive.txt" | ||
| 289 | echo " $output_dir/incomplete-in-both.txt" | ||
| 290 | echo " $output_dir/in-archive-not-prod.txt" | ||
| 291 | echo " $output_dir/summary.txt" | ||
| 292 | } | ||
| 293 | |||
| 294 | main "$@" | ||
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/22-compare-git-data.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/22-compare-git-data.sh new file mode 100755 index 0000000..76521d4 --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/22-compare-git-data.sh | |||
| @@ -0,0 +1,390 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | # | ||
| 3 | # 22-compare-git-data.sh - Compare actual git data between prod and archive relays | ||
| 4 | # | ||
| 5 | # PHASE 3c of the GRASP relay to ngit-grasp migration analysis pipeline. | ||
| 6 | # Compares actual git commits between prod and archive to determine which is ahead. | ||
| 7 | # | ||
| 8 | # KEY INSIGHT: | ||
| 9 | # Archive (ngit-grasp) enforces GRASP - git data ALWAYS matches a state event. | ||
| 10 | # If archive has different/newer data than prod, it means: | ||
| 11 | # - A state event authorized those commits at some point | ||
| 12 | # - Archive is actually MORE up-to-date than prod | ||
| 13 | # - Migration should use archive data (it's already correct) | ||
| 14 | # | ||
| 15 | # USAGE: | ||
| 16 | # ./22-compare-git-data.sh <prod-git-base> <archive-git-base> <repo-list> <output-dir> | ||
| 17 | # | ||
| 18 | # EXAMPLES: | ||
| 19 | # ./22-compare-git-data.sh /var/lib/grasp-relay/git /var/lib/ngit-grasp/git \ | ||
| 20 | # output/comparison/complete-prod-incomplete-archive.txt output/comparison | ||
| 21 | # | ||
| 22 | # INPUT: | ||
| 23 | # prod-git-base Base directory for prod git repos (e.g., /var/lib/grasp-relay/git) | ||
| 24 | # archive-git-base Base directory for archive git repos (e.g., /var/lib/ngit-grasp/git) | ||
| 25 | # repo-list File with repos to compare (format: "repo | npub | ...") | ||
| 26 | # | ||
| 27 | # OUTPUT: | ||
| 28 | # <output-dir>/git-ancestry.tsv - Tab-separated values: | ||
| 29 | # repo<TAB>npub<TAB>relationship<TAB>details | ||
| 30 | # | ||
| 31 | # Relationship values: | ||
| 32 | # archive-ahead - Archive has all prod commits plus more (GOOD - use archive) | ||
| 33 | # in-sync - Both have identical commits | ||
| 34 | # prod-ahead - Prod has commits archive is missing (needs re-sync) | ||
| 35 | # diverged - Both have unique commits (manual review) | ||
| 36 | # archive-only - Only archive has git data | ||
| 37 | # prod-only - Only prod has git data | ||
| 38 | # both-empty - Neither has git data | ||
| 39 | # | ||
| 40 | # PREREQUISITES: | ||
| 41 | # - git (for ref comparison) | ||
| 42 | # - Read access to both git directories (may need sudo) | ||
| 43 | # | ||
| 44 | # RUNTIME: Depends on number of repos to compare | ||
| 45 | # | ||
| 46 | # SEE ALSO: | ||
| 47 | # docs/how-to/migrate-to-ngit-grasp.md - Full migration guide | ||
| 48 | # 21-compare-relays.sh - Phase 3b script that identifies repos to compare | ||
| 49 | # | ||
| 50 | |||
| 51 | set -euo pipefail | ||
| 52 | |||
| 53 | # Colors for output (disabled if not a terminal) | ||
| 54 | if [[ -t 1 ]]; then | ||
| 55 | RED='\033[0;31m' | ||
| 56 | GREEN='\033[0;32m' | ||
| 57 | YELLOW='\033[0;33m' | ||
| 58 | BLUE='\033[0;34m' | ||
| 59 | NC='\033[0m' | ||
| 60 | else | ||
| 61 | RED='' | ||
| 62 | GREEN='' | ||
| 63 | YELLOW='' | ||
| 64 | BLUE='' | ||
| 65 | NC='' | ||
| 66 | fi | ||
| 67 | |||
| 68 | log_info() { | ||
| 69 | echo -e "${BLUE}[INFO]${NC} $*" >&2 | ||
| 70 | } | ||
| 71 | |||
| 72 | log_success() { | ||
| 73 | echo -e "${GREEN}[OK]${NC} $*" >&2 | ||
| 74 | } | ||
| 75 | |||
| 76 | log_warn() { | ||
| 77 | echo -e "${YELLOW}[WARN]${NC} $*" >&2 | ||
| 78 | } | ||
| 79 | |||
| 80 | log_error() { | ||
| 81 | echo -e "${RED}[ERROR]${NC} $*" >&2 | ||
| 82 | } | ||
| 83 | |||
| 84 | log_progress() { | ||
| 85 | echo -ne "\r${BLUE}[PROGRESS]${NC} $*" >&2 | ||
| 86 | } | ||
| 87 | |||
| 88 | usage() { | ||
| 89 | echo "Usage: $0 <prod-git-base> <archive-git-base> <repo-list> <output-dir>" | ||
| 90 | echo "" | ||
| 91 | echo "Arguments:" | ||
| 92 | echo " prod-git-base Base directory for prod git repos" | ||
| 93 | echo " archive-git-base Base directory for archive git repos" | ||
| 94 | echo " repo-list File with repos to compare (format: 'repo | npub | ...')" | ||
| 95 | echo " output-dir Directory to store output files" | ||
| 96 | echo "" | ||
| 97 | echo "Examples:" | ||
| 98 | echo " $0 /var/lib/grasp-relay/git /var/lib/ngit-grasp/git \\" | ||
| 99 | echo " output/comparison/complete-prod-incomplete-archive.txt output/comparison" | ||
| 100 | echo "" | ||
| 101 | echo "Output:" | ||
| 102 | echo " git-ancestry.tsv - TSV with: repo, npub, relationship, details" | ||
| 103 | exit 1 | ||
| 104 | } | ||
| 105 | |||
| 106 | # Get all branch refs from a git directory | ||
| 107 | # Args: $1=git_dir | ||
| 108 | # Returns: sorted list of "ref_name commit_hash" lines | ||
| 109 | get_git_refs() { | ||
| 110 | local git_dir="$1" | ||
| 111 | |||
| 112 | if [[ ! -d "$git_dir" ]]; then | ||
| 113 | return | ||
| 114 | fi | ||
| 115 | |||
| 116 | git --git-dir="$git_dir" show-ref --heads 2>/dev/null | sort || true | ||
| 117 | } | ||
| 118 | |||
| 119 | # Check if commit A is ancestor of commit B | ||
| 120 | # Args: $1=git_dir, $2=commit_a, $3=commit_b | ||
| 121 | # Returns: 0 if A is ancestor of B, 1 otherwise | ||
| 122 | is_ancestor() { | ||
| 123 | local git_dir="$1" | ||
| 124 | local commit_a="$2" | ||
| 125 | local commit_b="$3" | ||
| 126 | |||
| 127 | git --git-dir="$git_dir" merge-base --is-ancestor "$commit_a" "$commit_b" 2>/dev/null | ||
| 128 | } | ||
| 129 | |||
| 130 | # Compare git data between prod and archive for a single repo | ||
| 131 | # Args: $1=prod_git_dir, $2=archive_git_dir | ||
| 132 | # Returns: relationship string | ||
| 133 | compare_repo_git() { | ||
| 134 | local prod_git="$1" | ||
| 135 | local archive_git="$2" | ||
| 136 | |||
| 137 | local prod_exists=false | ||
| 138 | local archive_exists=false | ||
| 139 | |||
| 140 | [[ -d "$prod_git" ]] && prod_exists=true | ||
| 141 | [[ -d "$archive_git" ]] && archive_exists=true | ||
| 142 | |||
| 143 | # Handle cases where one or both don't exist | ||
| 144 | if [[ "$prod_exists" == "false" && "$archive_exists" == "false" ]]; then | ||
| 145 | echo "both-empty" | ||
| 146 | return | ||
| 147 | fi | ||
| 148 | |||
| 149 | if [[ "$prod_exists" == "false" ]]; then | ||
| 150 | echo "archive-only" | ||
| 151 | return | ||
| 152 | fi | ||
| 153 | |||
| 154 | if [[ "$archive_exists" == "false" ]]; then | ||
| 155 | echo "prod-only" | ||
| 156 | return | ||
| 157 | fi | ||
| 158 | |||
| 159 | # Both exist - get refs | ||
| 160 | local prod_refs archive_refs | ||
| 161 | prod_refs=$(get_git_refs "$prod_git") | ||
| 162 | archive_refs=$(get_git_refs "$archive_git") | ||
| 163 | |||
| 164 | # Handle empty refs | ||
| 165 | if [[ -z "$prod_refs" && -z "$archive_refs" ]]; then | ||
| 166 | echo "both-empty" | ||
| 167 | return | ||
| 168 | fi | ||
| 169 | |||
| 170 | if [[ -z "$prod_refs" ]]; then | ||
| 171 | echo "archive-only" | ||
| 172 | return | ||
| 173 | fi | ||
| 174 | |||
| 175 | if [[ -z "$archive_refs" ]]; then | ||
| 176 | echo "prod-only" | ||
| 177 | return | ||
| 178 | fi | ||
| 179 | |||
| 180 | # Compare refs - check if they're identical | ||
| 181 | if [[ "$prod_refs" == "$archive_refs" ]]; then | ||
| 182 | echo "in-sync" | ||
| 183 | return | ||
| 184 | fi | ||
| 185 | |||
| 186 | # Refs differ - need to check ancestry | ||
| 187 | # Strategy: For each branch, check if one is ancestor of the other | ||
| 188 | # If all archive branches are ahead of or equal to prod branches, archive is ahead | ||
| 189 | # If all prod branches are ahead of or equal to archive branches, prod is ahead | ||
| 190 | # Otherwise, they've diverged | ||
| 191 | |||
| 192 | local archive_ahead=true | ||
| 193 | local prod_ahead=true | ||
| 194 | local has_common_branch=false | ||
| 195 | |||
| 196 | # Create temporary file to use archive as reference repo for ancestry checks | ||
| 197 | # We need a repo that has both sets of commits to check ancestry | ||
| 198 | # Use archive since it's the target and should have the superset | ||
| 199 | |||
| 200 | # Check each prod branch against archive | ||
| 201 | while read -r prod_hash prod_ref; do | ||
| 202 | [[ -z "$prod_hash" ]] && continue | ||
| 203 | |||
| 204 | # Get the same branch from archive | ||
| 205 | local archive_hash | ||
| 206 | archive_hash=$(echo "$archive_refs" | grep " $prod_ref$" | awk '{print $1}' || echo "") | ||
| 207 | |||
| 208 | if [[ -z "$archive_hash" ]]; then | ||
| 209 | # Branch exists in prod but not archive - prod has something archive doesn't | ||
| 210 | # But this could be a deleted branch, so don't immediately say prod is ahead | ||
| 211 | continue | ||
| 212 | fi | ||
| 213 | |||
| 214 | has_common_branch=true | ||
| 215 | |||
| 216 | if [[ "$prod_hash" == "$archive_hash" ]]; then | ||
| 217 | # Same commit - neither ahead for this branch | ||
| 218 | continue | ||
| 219 | fi | ||
| 220 | |||
| 221 | # Different commits - check ancestry | ||
| 222 | # First, try to check if prod is ancestor of archive (archive ahead) | ||
| 223 | if is_ancestor "$archive_git" "$prod_hash" "$archive_hash" 2>/dev/null; then | ||
| 224 | # Prod commit is ancestor of archive commit - archive is ahead for this branch | ||
| 225 | prod_ahead=false | ||
| 226 | elif is_ancestor "$archive_git" "$archive_hash" "$prod_hash" 2>/dev/null; then | ||
| 227 | # Archive commit is ancestor of prod commit - prod is ahead for this branch | ||
| 228 | archive_ahead=false | ||
| 229 | else | ||
| 230 | # Neither is ancestor - diverged | ||
| 231 | archive_ahead=false | ||
| 232 | prod_ahead=false | ||
| 233 | fi | ||
| 234 | done <<< "$prod_refs" | ||
| 235 | |||
| 236 | # Also check for branches only in archive (archive has extra branches) | ||
| 237 | while read -r archive_hash archive_ref; do | ||
| 238 | [[ -z "$archive_hash" ]] && continue | ||
| 239 | |||
| 240 | local prod_hash | ||
| 241 | prod_hash=$(echo "$prod_refs" | grep " $archive_ref$" | awk '{print $1}' || echo "") | ||
| 242 | |||
| 243 | if [[ -z "$prod_hash" ]]; then | ||
| 244 | # Branch exists in archive but not prod - archive has something prod doesn't | ||
| 245 | # This means archive is ahead (has extra branches) | ||
| 246 | prod_ahead=false | ||
| 247 | fi | ||
| 248 | done <<< "$archive_refs" | ||
| 249 | |||
| 250 | # Determine final relationship | ||
| 251 | if [[ "$has_common_branch" == "false" ]]; then | ||
| 252 | # No common branches - completely different | ||
| 253 | echo "diverged" | ||
| 254 | return | ||
| 255 | fi | ||
| 256 | |||
| 257 | if [[ "$archive_ahead" == "true" && "$prod_ahead" == "false" ]]; then | ||
| 258 | echo "archive-ahead" | ||
| 259 | elif [[ "$prod_ahead" == "true" && "$archive_ahead" == "false" ]]; then | ||
| 260 | echo "prod-ahead" | ||
| 261 | elif [[ "$archive_ahead" == "true" && "$prod_ahead" == "true" ]]; then | ||
| 262 | # Both true means all common branches are identical | ||
| 263 | # But one might have extra branches | ||
| 264 | echo "in-sync" | ||
| 265 | else | ||
| 266 | echo "diverged" | ||
| 267 | fi | ||
| 268 | } | ||
| 269 | |||
| 270 | # Main | ||
| 271 | main() { | ||
| 272 | if [[ $# -ne 4 ]]; then | ||
| 273 | usage | ||
| 274 | fi | ||
| 275 | |||
| 276 | local prod_git_base="$1" | ||
| 277 | local archive_git_base="$2" | ||
| 278 | local repo_list="$3" | ||
| 279 | local output_dir="$4" | ||
| 280 | |||
| 281 | # Validate inputs | ||
| 282 | if [[ ! -d "$prod_git_base" ]]; then | ||
| 283 | log_error "Prod git base directory not found: $prod_git_base" | ||
| 284 | exit 1 | ||
| 285 | fi | ||
| 286 | |||
| 287 | if [[ ! -d "$archive_git_base" ]]; then | ||
| 288 | log_error "Archive git base directory not found: $archive_git_base" | ||
| 289 | exit 1 | ||
| 290 | fi | ||
| 291 | |||
| 292 | if [[ ! -f "$repo_list" ]]; then | ||
| 293 | log_error "Repo list file not found: $repo_list" | ||
| 294 | exit 1 | ||
| 295 | fi | ||
| 296 | |||
| 297 | log_info "=== Git Data Comparison ===" | ||
| 298 | log_info "Prod git base: $prod_git_base" | ||
| 299 | log_info "Archive git base: $archive_git_base" | ||
| 300 | log_info "Repo list: $repo_list" | ||
| 301 | log_info "Output: $output_dir" | ||
| 302 | log_info "Started: $(date)" | ||
| 303 | echo "" | ||
| 304 | |||
| 305 | # Create output directory | ||
| 306 | mkdir -p "$output_dir" | ||
| 307 | |||
| 308 | # Output file | ||
| 309 | local tsv_file="$output_dir/git-ancestry.tsv" | ||
| 310 | |||
| 311 | # Initialize TSV with header | ||
| 312 | echo -e "repo\tnpub\trelationship\tdetails" > "$tsv_file" | ||
| 313 | |||
| 314 | # Count repos | ||
| 315 | local total_repos | ||
| 316 | total_repos=$(grep -c -v '^#' "$repo_list" 2>/dev/null || echo "0") | ||
| 317 | log_info "Processing $total_repos repos..." | ||
| 318 | echo "" | ||
| 319 | |||
| 320 | # Counters | ||
| 321 | local count=0 | ||
| 322 | local count_archive_ahead=0 | ||
| 323 | local count_in_sync=0 | ||
| 324 | local count_prod_ahead=0 | ||
| 325 | local count_diverged=0 | ||
| 326 | local count_archive_only=0 | ||
| 327 | local count_prod_only=0 | ||
| 328 | local count_both_empty=0 | ||
| 329 | |||
| 330 | # Process each repo | ||
| 331 | while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do | ||
| 332 | # Skip comments and empty lines | ||
| 333 | [[ "$repo" =~ ^# ]] && continue | ||
| 334 | [[ -z "$repo" ]] && continue | ||
| 335 | |||
| 336 | # Clean up whitespace | ||
| 337 | repo="${repo// /}" | ||
| 338 | npub="${npub// /}" | ||
| 339 | |||
| 340 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 341 | |||
| 342 | count=$((count + 1)) | ||
| 343 | |||
| 344 | # Build git paths | ||
| 345 | local prod_git="$prod_git_base/${npub}/${repo}.git" | ||
| 346 | local archive_git="$archive_git_base/${npub}/${repo}.git" | ||
| 347 | |||
| 348 | # Compare | ||
| 349 | local relationship details="" | ||
| 350 | relationship=$(compare_repo_git "$prod_git" "$archive_git") | ||
| 351 | |||
| 352 | # Count by relationship | ||
| 353 | case "$relationship" in | ||
| 354 | archive-ahead) count_archive_ahead=$((count_archive_ahead + 1)) ;; | ||
| 355 | in-sync) count_in_sync=$((count_in_sync + 1)) ;; | ||
| 356 | prod-ahead) count_prod_ahead=$((count_prod_ahead + 1)) ;; | ||
| 357 | diverged) count_diverged=$((count_diverged + 1)) ;; | ||
| 358 | archive-only) count_archive_only=$((count_archive_only + 1)) ;; | ||
| 359 | prod-only) count_prod_only=$((count_prod_only + 1)) ;; | ||
| 360 | both-empty) count_both_empty=$((count_both_empty + 1)) ;; | ||
| 361 | esac | ||
| 362 | |||
| 363 | # Output TSV line | ||
| 364 | printf '%s\t%s\t%s\t%s\n' "$repo" "$npub" "$relationship" "$details" >> "$tsv_file" | ||
| 365 | |||
| 366 | # Progress indicator every 10 repos | ||
| 367 | if [[ $((count % 10)) -eq 0 ]]; then | ||
| 368 | log_progress "Processed $count/$total_repos repos..." | ||
| 369 | fi | ||
| 370 | done < "$repo_list" | ||
| 371 | |||
| 372 | # Clear progress line | ||
| 373 | echo "" >&2 | ||
| 374 | |||
| 375 | # Summary | ||
| 376 | echo "" | ||
| 377 | log_info "=== Comparison Summary ===" | ||
| 378 | log_success "Archive ahead (use archive data): $count_archive_ahead" | ||
| 379 | log_success "In sync: $count_in_sync" | ||
| 380 | log_warn "Prod ahead (needs re-sync): $count_prod_ahead" | ||
| 381 | log_error "Diverged (manual review): $count_diverged" | ||
| 382 | log_info "Archive only: $count_archive_only" | ||
| 383 | log_info "Prod only: $count_prod_only" | ||
| 384 | log_info "Both empty: $count_both_empty" | ||
| 385 | echo "" | ||
| 386 | log_info "Total: $count repos" | ||
| 387 | log_info "Output: $tsv_file" | ||
| 388 | } | ||
| 389 | |||
| 390 | main "$@" | ||
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/30-extract-parse-failures.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/30-extract-parse-failures.sh new file mode 100755 index 0000000..d762aae --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/30-extract-parse-failures.sh | |||
| @@ -0,0 +1,774 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | # | ||
| 3 | # 30-extract-parse-failures.sh - Extract parse failure events from systemd logs | ||
| 4 | # | ||
| 5 | # PHASE 4a of the GRASP relay to ngit-grasp migration analysis pipeline. | ||
| 6 | # Extracts structured [PARSE_FAIL] log entries AND "Invalid announcement" | ||
| 7 | # rejections from journalctl. | ||
| 8 | # | ||
| 9 | # USAGE: | ||
| 10 | # ./30-extract-parse-failures.sh <service-name> <output-dir> [options] | ||
| 11 | # | ||
| 12 | # EXAMPLES: | ||
| 13 | # # Extract from ngit-grasp service (last 30 days, default) | ||
| 14 | # ./30-extract-parse-failures.sh ngit-grasp.service output/logs | ||
| 15 | # | ||
| 16 | # # Extract with custom time range | ||
| 17 | # ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-01" | ||
| 18 | # | ||
| 19 | # # Extract from specific time window | ||
| 20 | # ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-15" --until "2026-01-22" | ||
| 21 | # | ||
| 22 | # OPTIONS: | ||
| 23 | # --since <date> Start date for log extraction (default: 30 days ago) | ||
| 24 | # --until <date> End date for log extraction (default: now) | ||
| 25 | # --dry-run Show what would be extracted without writing files | ||
| 26 | # | ||
| 27 | # ENRICHMENT: | ||
| 28 | # The script automatically enriches parse failures with repo/npub information | ||
| 29 | # by extracting from "Added rejected announcement" log entries which include | ||
| 30 | # pubkey and identifier fields. Hex pubkeys are converted to npub format using | ||
| 31 | # `nak encode npub <hex-pubkey>` if the nak tool is available. | ||
| 32 | # | ||
| 33 | # OUTPUT: | ||
| 34 | # <output-dir>/parse-failures.txt | ||
| 35 | # | ||
| 36 | # OUTPUT FORMAT (TSV): | ||
| 37 | # event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub | ||
| 38 | # | ||
| 39 | # EXPECTED LOG FORMATS: | ||
| 40 | # The script looks for three types of log entries: | ||
| 41 | # | ||
| 42 | # 1. Structured [PARSE_FAIL] entries: | ||
| 43 | # 2026-01-22T10:30:45Z ngit-grasp[1234]: [PARSE_FAIL] kind=30618 event_id=abc123... reason="invalid refs format" repo=myrepo npub=npub1... | ||
| 44 | # | ||
| 45 | # 2. "Invalid announcement" rejections (write policy): | ||
| 46 | # Event rejected by write policy event_id=abc123... relay=wss://... kind=30617 reason=Invalid announcement: multiple clone tags found... | ||
| 47 | # | ||
| 48 | # 3. "Added rejected announcement" entries (for enrichment): | ||
| 49 | # Added rejected announcement to two-tier index event_id=abc123... kind=30617 identifier=myrepo pubkey=hex... | ||
| 50 | # These entries provide pubkey and identifier for enriching write policy rejections. | ||
| 51 | # | ||
| 52 | # NOTE: Builder logs ("Rejected repository announcement note1xxx:") are NOT extracted | ||
| 53 | # because they use bech32 (note1) IDs while write policy logs use hex IDs. Extracting | ||
| 54 | # both would cause double-counting since deduplication only works within each format. | ||
| 55 | # Write policy logs contain the same events, so we don't lose any data. | ||
| 56 | # | ||
| 57 | # Required fields: kind, event_id, reason | ||
| 58 | # Enrichment fields: repo (identifier), npub (converted from hex pubkey) | ||
| 59 | # | ||
| 60 | # DEPENDENCY: | ||
| 61 | # This script requires logging improvements in ngit-grasp to emit structured | ||
| 62 | # [PARSE_FAIL] log entries. Until those are implemented, this script will | ||
| 63 | # find no matching entries (which is handled gracefully). | ||
| 64 | # | ||
| 65 | # "Invalid announcement" rejections are logged by the write policy and | ||
| 66 | # should be present in any ngit-grasp deployment. | ||
| 67 | # | ||
| 68 | # See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section) | ||
| 69 | # | ||
| 70 | # Expected Rust logging code for [PARSE_FAIL]: | ||
| 71 | # tracing::warn!( | ||
| 72 | # target: "migration", | ||
| 73 | # "[PARSE_FAIL] kind={} event_id={} reason=\"{}\" repo={} npub={}", | ||
| 74 | # event.kind, event.id, reason, identifier, npub | ||
| 75 | # ); | ||
| 76 | # | ||
| 77 | # PREREQUISITES: | ||
| 78 | # - journalctl (systemd) | ||
| 79 | # - grep, awk, sed (standard Unix tools) | ||
| 80 | # - Access to systemd journal (may require sudo or journal group membership) | ||
| 81 | # | ||
| 82 | # RUNTIME: Depends on log volume, typically < 30 seconds | ||
| 83 | # | ||
| 84 | # SEE ALSO: | ||
| 85 | # docs/how-to/migrate-to-ngit-grasp.md - Full migration guide | ||
| 86 | # 31-extract-purgatory-expiry.sh - Companion script for purgatory expiry logs | ||
| 87 | # | ||
| 88 | |||
| 89 | set -euo pipefail | ||
| 90 | |||
| 91 | # Get script directory for sourcing helpers | ||
| 92 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | ||
| 93 | |||
| 94 | # Source the service validation helper | ||
| 95 | if [[ -f "$SCRIPT_DIR/validate-service.sh" ]]; then | ||
| 96 | source "$SCRIPT_DIR/validate-service.sh" | ||
| 97 | fi | ||
| 98 | |||
| 99 | # Colors for output (disabled if not a terminal) | ||
| 100 | if [[ -t 1 ]]; then | ||
| 101 | RED='\033[0;31m' | ||
| 102 | GREEN='\033[0;32m' | ||
| 103 | YELLOW='\033[0;33m' | ||
| 104 | BLUE='\033[0;34m' | ||
| 105 | NC='\033[0m' | ||
| 106 | else | ||
| 107 | RED='' | ||
| 108 | GREEN='' | ||
| 109 | YELLOW='' | ||
| 110 | BLUE='' | ||
| 111 | NC='' | ||
| 112 | fi | ||
| 113 | |||
| 114 | log_info() { | ||
| 115 | echo -e "${BLUE}[INFO]${NC} $*" >&2 | ||
| 116 | } | ||
| 117 | |||
| 118 | log_success() { | ||
| 119 | echo -e "${GREEN}[OK]${NC} $*" >&2 | ||
| 120 | } | ||
| 121 | |||
| 122 | log_warn() { | ||
| 123 | echo -e "${YELLOW}[WARN]${NC} $*" >&2 | ||
| 124 | } | ||
| 125 | |||
| 126 | log_error() { | ||
| 127 | echo -e "${RED}[ERROR]${NC} $*" >&2 | ||
| 128 | } | ||
| 129 | |||
| 130 | usage() { | ||
| 131 | echo "Usage: $0 <service-name> <output-dir> [options]" | ||
| 132 | echo "" | ||
| 133 | echo "Arguments:" | ||
| 134 | echo " service-name Systemd service name (e.g., ngit-grasp.service)" | ||
| 135 | echo " output-dir Directory to store extracted log data" | ||
| 136 | echo "" | ||
| 137 | echo "Options:" | ||
| 138 | echo " --since <date> Start date (default: 30 days ago)" | ||
| 139 | echo " --until <date> End date (default: now)" | ||
| 140 | echo " --dry-run Show what would be extracted without writing" | ||
| 141 | echo "" | ||
| 142 | echo "Examples:" | ||
| 143 | echo " $0 ngit-grasp.service output/logs" | ||
| 144 | echo " $0 ngit-grasp.service output/logs --since '2026-01-01'" | ||
| 145 | echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'" | ||
| 146 | echo "" | ||
| 147 | echo "Expected log formats:" | ||
| 148 | echo " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." | ||
| 149 | echo " Event rejected by write policy event_id=abc123 ... kind=30617 reason=Invalid announcement: ..." | ||
| 150 | echo "" | ||
| 151 | echo "Enrichment:" | ||
| 152 | echo " Parse failures are automatically enriched with repo/npub from" | ||
| 153 | echo " 'Added rejected announcement' log entries. Hex pubkeys are converted" | ||
| 154 | echo " to npub format using 'nak encode npub' if available." | ||
| 155 | exit 1 | ||
| 156 | } | ||
| 157 | |||
| 158 | # ============================================================================= | ||
| 159 | # AWK-BASED BATCH PARSING FUNCTIONS | ||
| 160 | # ============================================================================= | ||
| 161 | # These functions use awk for efficient batch processing instead of per-line | ||
| 162 | # grep calls. This provides ~400x speedup for large log files. | ||
| 163 | # | ||
| 164 | # NOTE: parse_builder_rejection_line() was removed to fix double-counting bug. | ||
| 165 | # Builder logs use bech32 (note1) IDs while write policy logs use hex IDs. | ||
| 166 | # Since deduplication only works within each format, extracting both caused | ||
| 167 | # the same event to be counted twice. Write policy logs contain the same | ||
| 168 | # events, so we don't lose any data by only extracting from that source. | ||
| 169 | |||
| 170 | # Parse [PARSE_FAIL] log lines in batch using awk | ||
| 171 | # Input: file containing log lines with [PARSE_FAIL] | ||
| 172 | # Output: TSV lines: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub | ||
| 173 | parse_parse_fail_batch() { | ||
| 174 | local input_file="$1" | ||
| 175 | awk ' | ||
| 176 | { | ||
| 177 | # Extract kind=VALUE | ||
| 178 | kind = "" | ||
| 179 | if (match($0, /kind=([0-9]+)/, m)) kind = m[1] | ||
| 180 | |||
| 181 | # Extract event_id=VALUE (hex string) | ||
| 182 | event_id = "" | ||
| 183 | if (match($0, /event_id=([a-f0-9]+)/, m)) event_id = m[1] | ||
| 184 | |||
| 185 | # Extract reason="VALUE" (quoted string) | ||
| 186 | reason = "" | ||
| 187 | if (match($0, /reason="([^"]*)"/, m)) reason = m[1] | ||
| 188 | |||
| 189 | # Extract repo=VALUE (optional) | ||
| 190 | repo = "" | ||
| 191 | if (match($0, /repo=([^ ]+)/, m)) repo = m[1] | ||
| 192 | |||
| 193 | # Extract npub=VALUE (optional) | ||
| 194 | npub = "" | ||
| 195 | if (match($0, /npub=([^ ]+)/, m)) npub = m[1] | ||
| 196 | |||
| 197 | # Output if we have required fields | ||
| 198 | if (kind != "" && event_id != "" && reason != "") { | ||
| 199 | print event_id "\t" kind "\t" reason "\t" repo "\t" npub | ||
| 200 | } | ||
| 201 | } | ||
| 202 | ' "$input_file" | ||
| 203 | } | ||
| 204 | |||
| 205 | # Parse "Invalid announcement" rejection log lines in batch using awk | ||
| 206 | # Input: file containing "Event rejected by write policy" log lines | ||
| 207 | # Output: TSV lines: event_id<TAB>kind<TAB>reason<TAB><empty><TAB><empty> | ||
| 208 | parse_write_policy_rejection_batch() { | ||
| 209 | local input_file="$1" | ||
| 210 | awk ' | ||
| 211 | { | ||
| 212 | # Extract event_id=VALUE (hex string) | ||
| 213 | event_id = "" | ||
| 214 | if (match($0, /event_id=([a-f0-9]+)/, m)) event_id = m[1] | ||
| 215 | |||
| 216 | # Extract kind=VALUE | ||
| 217 | kind = "" | ||
| 218 | if (match($0, /kind=([0-9]+)/, m)) kind = m[1] | ||
| 219 | |||
| 220 | # Extract reason=VALUE (everything after "reason=") | ||
| 221 | reason = "" | ||
| 222 | if (match($0, /reason=(.*)$/, m)) reason = m[1] | ||
| 223 | |||
| 224 | # Output if we have required fields (repo and npub are empty) | ||
| 225 | if (kind != "" && event_id != "" && reason != "") { | ||
| 226 | print event_id "\t" kind "\t" reason "\t\t" | ||
| 227 | } | ||
| 228 | } | ||
| 229 | ' "$input_file" | ||
| 230 | } | ||
| 231 | |||
| 232 | # Parse "Added rejected announcement" log lines in batch using awk | ||
| 233 | # Input: file containing "Added rejected announcement to two-tier index" log lines | ||
| 234 | # Output: TSV lines: event_id<TAB>identifier<TAB>pubkey_hex | ||
| 235 | parse_rejected_announcement_batch() { | ||
| 236 | local input_file="$1" | ||
| 237 | awk ' | ||
| 238 | { | ||
| 239 | # Extract event_id=VALUE (hex string) | ||
| 240 | event_id = "" | ||
| 241 | if (match($0, /event_id=([a-f0-9]+)/, m)) event_id = m[1] | ||
| 242 | |||
| 243 | # Extract identifier=VALUE (repo name) | ||
| 244 | identifier = "" | ||
| 245 | if (match($0, /identifier=([^ ]+)/, m)) identifier = m[1] | ||
| 246 | |||
| 247 | # Extract pubkey=VALUE (hex string) | ||
| 248 | pubkey = "" | ||
| 249 | if (match($0, /pubkey=([a-f0-9]+)/, m)) pubkey = m[1] | ||
| 250 | |||
| 251 | # Output if we have all required fields | ||
| 252 | if (event_id != "" && identifier != "" && pubkey != "") { | ||
| 253 | print event_id "\t" identifier "\t" pubkey | ||
| 254 | } | ||
| 255 | } | ||
| 256 | ' "$input_file" | ||
| 257 | } | ||
| 258 | |||
| 259 | # Enrich parse failures with repo/npub by looking up event_id in "Added rejected announcement" log entries | ||
| 260 | # This is critical because "Invalid announcement" rejections only log event_id and kind, | ||
| 261 | # not the repo name or npub. Without enrichment, Phase 5 shows event_id|kind instead | ||
| 262 | # of repo|npub in action-required.txt, making the output unusable. | ||
| 263 | # | ||
| 264 | # Arguments: | ||
| 265 | # $1 - parse failures file to enrich (modified in place) | ||
| 266 | # $2 - lookup file containing event_id -> identifier|pubkey mappings from logs | ||
| 267 | # | ||
| 268 | # The function: | ||
| 269 | # 1. Uses the lookup table built from "Added rejected announcement" log entries | ||
| 270 | # 2. For each parse failure with empty repo/npub, looks up the event_id | ||
| 271 | # 3. Populates repo and npub columns from the lookup | ||
| 272 | # 4. Converts hex pubkeys to npub format using `nak encode npub` if available | ||
| 273 | # | ||
| 274 | # OPTIMIZATION: This function uses batch processing for efficiency: | ||
| 275 | # - Uses awk for O(n) join instead of per-line grep (O(n*m)) | ||
| 276 | # - Batches all pubkey->npub conversions in a single nak call | ||
| 277 | # - This reduces runtime from minutes to seconds for large datasets | ||
| 278 | enrich_with_repo_npub() { | ||
| 279 | local parse_failures_file="$1" | ||
| 280 | local lookup_file="$2" | ||
| 281 | |||
| 282 | # Validate lookup file exists and has content | ||
| 283 | if [[ ! -f "$lookup_file" ]] || [[ ! -s "$lookup_file" ]]; then | ||
| 284 | log_warn "No enrichment data available - repo/npub columns will remain empty" | ||
| 285 | return 0 | ||
| 286 | fi | ||
| 287 | |||
| 288 | log_info "Enriching parse failures with repo/npub from log entries..." | ||
| 289 | |||
| 290 | # Check if we have nak for pubkey->npub conversion | ||
| 291 | local can_convert_npub=false | ||
| 292 | if command -v nak &> /dev/null; then | ||
| 293 | can_convert_npub=true | ||
| 294 | log_info " Using 'nak' for pubkey->npub conversion" | ||
| 295 | else | ||
| 296 | log_warn " 'nak' not found - will use hex pubkeys instead of npub" | ||
| 297 | fi | ||
| 298 | |||
| 299 | local lookup_count | ||
| 300 | lookup_count=$(wc -l < "$lookup_file") | ||
| 301 | lookup_count="${lookup_count//[^0-9]/}" | ||
| 302 | log_info " Lookup table has $lookup_count entries" | ||
| 303 | |||
| 304 | # STEP 1: Extract unique pubkeys that need conversion | ||
| 305 | # Get pubkeys from lookup file (column 3), deduplicate | ||
| 306 | local unique_pubkeys_file npub_map_file | ||
| 307 | unique_pubkeys_file=$(mktemp) | ||
| 308 | npub_map_file=$(mktemp) | ||
| 309 | |||
| 310 | cut -f3 "$lookup_file" | sort -u > "$unique_pubkeys_file" | ||
| 311 | local unique_pubkey_count | ||
| 312 | unique_pubkey_count=$(wc -l < "$unique_pubkeys_file") | ||
| 313 | unique_pubkey_count="${unique_pubkey_count//[^0-9]/}" | ||
| 314 | log_info " Converting $unique_pubkey_count unique pubkeys to npub format..." | ||
| 315 | |||
| 316 | # STEP 2: Batch convert all pubkeys to npub in a single nak call | ||
| 317 | # nak reads hex pubkeys from stdin (one per line) and outputs npubs | ||
| 318 | if [[ "$can_convert_npub" == true && "$unique_pubkey_count" -gt 0 ]]; then | ||
| 319 | # Create mapping file: pubkey_hex<TAB>npub | ||
| 320 | # nak encode npub reads from stdin and outputs one npub per line | ||
| 321 | paste "$unique_pubkeys_file" <(nak encode npub < "$unique_pubkeys_file" 2>/dev/null) > "$npub_map_file" || { | ||
| 322 | # Fallback: if batch conversion fails, use hex pubkeys | ||
| 323 | log_warn " Batch npub conversion failed, using hex pubkeys" | ||
| 324 | awk '{print $1 "\t" $1}' "$unique_pubkeys_file" > "$npub_map_file" | ||
| 325 | } | ||
| 326 | else | ||
| 327 | # No nak available, use hex pubkeys as-is | ||
| 328 | awk '{print $1 "\t" $1}' "$unique_pubkeys_file" > "$npub_map_file" | ||
| 329 | fi | ||
| 330 | |||
| 331 | rm -f "$unique_pubkeys_file" | ||
| 332 | |||
| 333 | # STEP 3: Use awk for efficient join (O(n) instead of O(n*m) grep per line) | ||
| 334 | # This joins parse_failures with lookup_file on event_id, then with npub_map on pubkey | ||
| 335 | local enriched_file | ||
| 336 | enriched_file=$(mktemp) | ||
| 337 | |||
| 338 | # Copy header lines | ||
| 339 | grep '^#' "$parse_failures_file" > "$enriched_file" 2>/dev/null || true | ||
| 340 | |||
| 341 | # Use awk to perform the join efficiently | ||
| 342 | # Input files (order matters for ARGIND): | ||
| 343 | # 1. npub_map_file: pubkey_hex<TAB>npub | ||
| 344 | # 2. lookup_file: event_id<TAB>identifier<TAB>pubkey_hex | ||
| 345 | # 3. parse_failures_file: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub | ||
| 346 | awk -F'\t' -v OFS='\t' ' | ||
| 347 | # Track which file we are processing | ||
| 348 | FNR==1 { file_num++ } | ||
| 349 | |||
| 350 | # First file: npub_map (pubkey_hex -> npub) | ||
| 351 | file_num==1 { | ||
| 352 | npub_map[$1] = $2 | ||
| 353 | next | ||
| 354 | } | ||
| 355 | # Second file: lookup (event_id -> identifier, pubkey_hex) | ||
| 356 | file_num==2 { | ||
| 357 | lookup_repo[$1] = $2 | ||
| 358 | lookup_pubkey[$1] = $3 | ||
| 359 | next | ||
| 360 | } | ||
| 361 | # Third file: parse_failures | ||
| 362 | /^#/ { next } # Skip headers (already copied) | ||
| 363 | { | ||
| 364 | event_id = $1 | ||
| 365 | kind = $2 | ||
| 366 | reason = $3 | ||
| 367 | repo = $4 | ||
| 368 | npub = $5 | ||
| 369 | |||
| 370 | # If repo/npub empty, try to enrich from lookup | ||
| 371 | if (repo == "" && event_id in lookup_repo) { | ||
| 372 | repo = lookup_repo[event_id] | ||
| 373 | } | ||
| 374 | if (npub == "" && event_id in lookup_pubkey) { | ||
| 375 | pubkey = lookup_pubkey[event_id] | ||
| 376 | if (pubkey in npub_map) { | ||
| 377 | npub = npub_map[pubkey] | ||
| 378 | } else { | ||
| 379 | npub = pubkey # Fallback to hex | ||
| 380 | } | ||
| 381 | } | ||
| 382 | |||
| 383 | print event_id, kind, reason, repo, npub | ||
| 384 | } | ||
| 385 | ' "$npub_map_file" "$lookup_file" "$parse_failures_file" >> "$enriched_file" | ||
| 386 | |||
| 387 | rm -f "$npub_map_file" | ||
| 388 | |||
| 389 | # Count enriched entries | ||
| 390 | local enriched_count total_count | ||
| 391 | total_count=$(grep -v '^#' "$parse_failures_file" | wc -l) | ||
| 392 | total_count="${total_count//[^0-9]/}" | ||
| 393 | # Count entries that have non-empty repo AND npub after enrichment | ||
| 394 | enriched_count=$(grep -v '^#' "$enriched_file" | awk -F'\t' '$4 != "" && $5 != ""' | wc -l) | ||
| 395 | enriched_count="${enriched_count//[^0-9]/}" | ||
| 396 | |||
| 397 | # Replace original with enriched version | ||
| 398 | mv "$enriched_file" "$parse_failures_file" | ||
| 399 | |||
| 400 | log_info " Enriched $enriched_count of $total_count parse failures with repo/npub" | ||
| 401 | log_success "Enrichment complete" | ||
| 402 | } | ||
| 403 | |||
| 404 | # Parse "Added rejected announcement" log entries to build enrichment lookup table | ||
| 405 | # Input: log line containing "Added rejected announcement to two-tier index" | ||
| 406 | # Output: TSV line: event_id<TAB>identifier<TAB>pubkey_hex | ||
| 407 | parse_rejected_announcement_line() { | ||
| 408 | local line="$1" | ||
| 409 | |||
| 410 | local event_id identifier pubkey_hex | ||
| 411 | |||
| 412 | # Extract event_id=VALUE (hex string) | ||
| 413 | event_id=$(echo "$line" | grep -oP 'event_id=\K[a-f0-9]+' || echo "") | ||
| 414 | |||
| 415 | # Extract identifier=VALUE (repo name) | ||
| 416 | identifier=$(echo "$line" | grep -oP 'identifier=\K[^ ]+' || echo "") | ||
| 417 | |||
| 418 | # Extract pubkey=VALUE (hex string) | ||
| 419 | pubkey_hex=$(echo "$line" | grep -oP 'pubkey=\K[a-f0-9]+' || echo "") | ||
| 420 | |||
| 421 | # Only output if we have all required fields | ||
| 422 | if [[ -n "$event_id" && -n "$identifier" && -n "$pubkey_hex" ]]; then | ||
| 423 | printf '%s\t%s\t%s\n' "$event_id" "$identifier" "$pubkey_hex" | ||
| 424 | fi | ||
| 425 | } | ||
| 426 | |||
| 427 | # Main | ||
| 428 | main() { | ||
| 429 | if [[ $# -lt 2 ]]; then | ||
| 430 | usage | ||
| 431 | fi | ||
| 432 | |||
| 433 | local service="$1" | ||
| 434 | local output_dir="$2" | ||
| 435 | shift 2 | ||
| 436 | |||
| 437 | # Default time range: last 30 days | ||
| 438 | local since_date | ||
| 439 | since_date=$(date -d "30 days ago" "+%Y-%m-%d" 2>/dev/null || date -v-30d "+%Y-%m-%d" 2>/dev/null || echo "") | ||
| 440 | local until_date="" | ||
| 441 | local dry_run=false | ||
| 442 | |||
| 443 | # Parse options | ||
| 444 | while [[ $# -gt 0 ]]; do | ||
| 445 | case "$1" in | ||
| 446 | --since) | ||
| 447 | since_date="$2" | ||
| 448 | shift 2 | ||
| 449 | ;; | ||
| 450 | --until) | ||
| 451 | until_date="$2" | ||
| 452 | shift 2 | ||
| 453 | ;; | ||
| 454 | --dry-run) | ||
| 455 | dry_run=true | ||
| 456 | shift | ||
| 457 | ;; | ||
| 458 | *) | ||
| 459 | log_error "Unknown option: $1" | ||
| 460 | usage | ||
| 461 | ;; | ||
| 462 | esac | ||
| 463 | done | ||
| 464 | |||
| 465 | # Validate service name format | ||
| 466 | if [[ ! "$service" =~ \.service$ ]]; then | ||
| 467 | service="${service}.service" | ||
| 468 | fi | ||
| 469 | |||
| 470 | # Validate service is appropriate for structured logging | ||
| 471 | # This prevents the common mistake of using ngit-relay instead of ngit-grasp | ||
| 472 | if type validate_service_for_structured_logging &>/dev/null; then | ||
| 473 | # Use non-interactive mode if not a terminal, skip log check (we'll do our own) | ||
| 474 | local interactive="true" | ||
| 475 | [[ ! -t 0 ]] && interactive="false" | ||
| 476 | |||
| 477 | if ! validate_service_for_structured_logging "$service" "false" "$interactive"; then | ||
| 478 | log_error "Service validation failed. Use an ngit-grasp service for structured logging." | ||
| 479 | exit 1 | ||
| 480 | fi | ||
| 481 | else | ||
| 482 | # Fallback validation if helper not available | ||
| 483 | if [[ "$service" == *"ngit-relay"* ]]; then | ||
| 484 | log_error "Service name appears to be ngit-relay: $service" | ||
| 485 | log_error "Structured logging ([PARSE_FAIL]) only exists in ngit-grasp services." | ||
| 486 | log_error "Please use the ngit-grasp archive service instead." | ||
| 487 | log_error "" | ||
| 488 | log_error "To find the correct service:" | ||
| 489 | log_error " systemctl list-units 'ngit-grasp*' --all" | ||
| 490 | exit 1 | ||
| 491 | fi | ||
| 492 | fi | ||
| 493 | |||
| 494 | log_info "Extracting parse failures from systemd logs" | ||
| 495 | log_info "Service: $service" | ||
| 496 | log_info "Output: $output_dir" | ||
| 497 | log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" | ||
| 498 | |||
| 499 | # Check if journalctl is available | ||
| 500 | if ! command -v journalctl &> /dev/null; then | ||
| 501 | log_error "journalctl not found. This script requires systemd." | ||
| 502 | exit 1 | ||
| 503 | fi | ||
| 504 | |||
| 505 | # Validate service exists (check if journalctl can find any logs for it) | ||
| 506 | # Note: We don't require the service to be running, just that it has logs | ||
| 507 | if ! journalctl --no-pager -u "$service" -n 1 &>/dev/null; then | ||
| 508 | log_warn "Could not query logs for service: $service" | ||
| 509 | log_warn "This may indicate the service doesn't exist or you lack permissions." | ||
| 510 | log_warn "" | ||
| 511 | log_warn "To list available ngit-grasp services:" | ||
| 512 | log_warn " systemctl list-units 'ngit-grasp*' --all" | ||
| 513 | log_warn " journalctl --list-boots # Check if you have journal access" | ||
| 514 | log_warn "" | ||
| 515 | # Continue anyway - the service might exist but have no logs yet | ||
| 516 | fi | ||
| 517 | |||
| 518 | # Build journalctl command | ||
| 519 | local journal_cmd="journalctl -u $service --no-pager -o short-iso" | ||
| 520 | |||
| 521 | if [[ -n "$since_date" ]]; then | ||
| 522 | journal_cmd="$journal_cmd --since '$since_date'" | ||
| 523 | fi | ||
| 524 | |||
| 525 | if [[ -n "$until_date" ]]; then | ||
| 526 | journal_cmd="$journal_cmd --until '$until_date'" | ||
| 527 | fi | ||
| 528 | |||
| 529 | log_info "Running: $journal_cmd | grep '[PARSE_FAIL]' or 'Invalid announcement'" | ||
| 530 | |||
| 531 | if [[ "$dry_run" == true ]]; then | ||
| 532 | log_info "[DRY RUN] Would extract to: $output_dir/parse-failures.txt" | ||
| 533 | |||
| 534 | # Show sample of what would be extracted | ||
| 535 | log_info "Checking for matching log entries..." | ||
| 536 | local parse_fail_count invalid_announcement_count | ||
| 537 | parse_fail_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0") | ||
| 538 | parse_fail_count="${parse_fail_count//[^0-9]/}" # Strip non-numeric characters | ||
| 539 | parse_fail_count="${parse_fail_count:-0}" | ||
| 540 | |||
| 541 | invalid_announcement_count=$(eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep -c 'Invalid announcement' || echo "0") | ||
| 542 | invalid_announcement_count="${invalid_announcement_count//[^0-9]/}" | ||
| 543 | invalid_announcement_count="${invalid_announcement_count:-0}" | ||
| 544 | |||
| 545 | log_info "Found $parse_fail_count [PARSE_FAIL] entries" | ||
| 546 | log_info "Found $invalid_announcement_count 'Invalid announcement' rejections" | ||
| 547 | |||
| 548 | if [[ "$parse_fail_count" -eq 0 && "$invalid_announcement_count" -eq 0 ]]; then | ||
| 549 | log_warn "No matching entries found in logs." | ||
| 550 | log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." | ||
| 551 | log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" | ||
| 552 | fi | ||
| 553 | |||
| 554 | exit 0 | ||
| 555 | fi | ||
| 556 | |||
| 557 | # Create output directory | ||
| 558 | mkdir -p "$output_dir" | ||
| 559 | |||
| 560 | local output_file="$output_dir/parse-failures.txt" | ||
| 561 | local temp_file | ||
| 562 | temp_file=$(mktemp) | ||
| 563 | |||
| 564 | # Extract and parse log entries using streaming (avoids loading all logs into memory) | ||
| 565 | log_info "Extracting log entries..." | ||
| 566 | |||
| 567 | # Create temp files for intermediate results | ||
| 568 | local temp_stderr temp_parse_fail temp_write_policy_rejection temp_rejected_announcement | ||
| 569 | temp_stderr=$(mktemp) | ||
| 570 | temp_parse_fail=$(mktemp) | ||
| 571 | temp_write_policy_rejection=$(mktemp) | ||
| 572 | temp_rejected_announcement=$(mktemp) | ||
| 573 | |||
| 574 | # Extract [PARSE_FAIL] entries directly to temp file (streaming) | ||
| 575 | log_info " Searching for [PARSE_FAIL] entries..." | ||
| 576 | eval "$journal_cmd" 2>"$temp_stderr" | grep '\[PARSE_FAIL\]' > "$temp_parse_fail" || true | ||
| 577 | |||
| 578 | local journal_stderr | ||
| 579 | journal_stderr=$(cat "$temp_stderr" 2>/dev/null || true) | ||
| 580 | if [[ -n "$journal_stderr" ]]; then | ||
| 581 | log_warn "journalctl reported: $journal_stderr" | ||
| 582 | fi | ||
| 583 | |||
| 584 | # Extract "Event rejected by write policy" with "Invalid announcement" (streaming) | ||
| 585 | # NOTE: We only extract from write policy logs (hex IDs), not builder logs (note1 IDs) | ||
| 586 | # to avoid double-counting. Both log sources contain the same events. | ||
| 587 | log_info " Searching for write policy rejections..." | ||
| 588 | eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep 'Invalid announcement' > "$temp_write_policy_rejection" || true | ||
| 589 | |||
| 590 | # Extract "Added rejected announcement" entries for enrichment (streaming) | ||
| 591 | # These contain pubkey and identifier which we use to enrich write policy rejections | ||
| 592 | log_info " Searching for rejected announcement entries (for enrichment)..." | ||
| 593 | eval "$journal_cmd" 2>/dev/null | grep 'Added rejected announcement to two-tier index' > "$temp_rejected_announcement" || true | ||
| 594 | |||
| 595 | rm -f "$temp_stderr" | ||
| 596 | |||
| 597 | # Check if we found anything | ||
| 598 | local parse_fail_line_count write_policy_line_count rejected_announcement_line_count | ||
| 599 | parse_fail_line_count=$(wc -l < "$temp_parse_fail") | ||
| 600 | parse_fail_line_count="${parse_fail_line_count//[^0-9]/}" | ||
| 601 | write_policy_line_count=$(wc -l < "$temp_write_policy_rejection") | ||
| 602 | write_policy_line_count="${write_policy_line_count//[^0-9]/}" | ||
| 603 | rejected_announcement_line_count=$(wc -l < "$temp_rejected_announcement") | ||
| 604 | rejected_announcement_line_count="${rejected_announcement_line_count//[^0-9]/}" | ||
| 605 | |||
| 606 | log_info " Found $parse_fail_line_count [PARSE_FAIL] log lines" | ||
| 607 | log_info " Found $write_policy_line_count write policy rejection log lines" | ||
| 608 | log_info " Found $rejected_announcement_line_count rejected announcement log lines (for enrichment)" | ||
| 609 | |||
| 610 | local total_invalid_announcement_lines=$write_policy_line_count | ||
| 611 | |||
| 612 | if [[ "$parse_fail_line_count" -eq 0 && "$total_invalid_announcement_lines" -eq 0 ]]; then | ||
| 613 | log_warn "No matching entries found in logs." | ||
| 614 | log_warn "" | ||
| 615 | log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." | ||
| 616 | log_warn "The script looks for:" | ||
| 617 | log_warn "" | ||
| 618 | log_warn " 1. [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." | ||
| 619 | log_warn " 2. Event rejected by write policy event_id=... kind=30617 reason=Invalid announcement: ..." | ||
| 620 | log_warn "" | ||
| 621 | log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" | ||
| 622 | log_warn "" | ||
| 623 | |||
| 624 | # Create empty output file with header comment | ||
| 625 | { | ||
| 626 | echo "# Parse failures and invalid announcements extracted from $service" | ||
| 627 | echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" | ||
| 628 | echo "# Extracted: $(date -Iseconds)" | ||
| 629 | echo "#" | ||
| 630 | echo "# Includes:" | ||
| 631 | echo "# - [PARSE_FAIL] structured log entries" | ||
| 632 | echo "# - \"Invalid announcement\" rejections" | ||
| 633 | echo "#" | ||
| 634 | echo "# Format: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub" | ||
| 635 | echo "# Note: repo and npub may be empty for some entries" | ||
| 636 | echo "#" | ||
| 637 | echo "# NOTE: No matching entries found." | ||
| 638 | echo "# This is expected if ngit-grasp logging improvements are not yet deployed." | ||
| 639 | } > "$output_file" | ||
| 640 | |||
| 641 | rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_rejected_announcement" | ||
| 642 | log_info "Created empty output file: $output_file" | ||
| 643 | exit 0 | ||
| 644 | fi | ||
| 645 | |||
| 646 | # Write header | ||
| 647 | { | ||
| 648 | echo "# Parse failures and invalid announcements extracted from $service" | ||
| 649 | echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" | ||
| 650 | echo "# Extracted: $(date -Iseconds)" | ||
| 651 | echo "#" | ||
| 652 | echo "# Includes:" | ||
| 653 | echo "# - [PARSE_FAIL] structured log entries" | ||
| 654 | echo "# - \"Invalid announcement\" rejections" | ||
| 655 | echo "#" | ||
| 656 | echo "# Format: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub" | ||
| 657 | echo "# Note: repo and npub may be empty for some entries" | ||
| 658 | } > "$output_file" | ||
| 659 | |||
| 660 | # Parse [PARSE_FAIL] entries using batch awk processing | ||
| 661 | log_info " Parsing [PARSE_FAIL] entries..." | ||
| 662 | local parse_fail_count=0 | ||
| 663 | if [[ "$parse_fail_line_count" -gt 0 ]]; then | ||
| 664 | parse_parse_fail_batch "$temp_parse_fail" >> "$output_file" | ||
| 665 | parse_fail_count=$(grep -v '^#' "$output_file" | wc -l) | ||
| 666 | parse_fail_count="${parse_fail_count//[^0-9]/}" | ||
| 667 | fi | ||
| 668 | |||
| 669 | # Parse write policy rejection entries using batch awk processing | ||
| 670 | log_info " Parsing write policy rejection entries..." | ||
| 671 | local write_policy_count=0 | ||
| 672 | if [[ "$write_policy_line_count" -gt 0 ]]; then | ||
| 673 | local before_count | ||
| 674 | before_count=$(grep -v '^#' "$output_file" 2>/dev/null | wc -l || echo "0") | ||
| 675 | before_count="${before_count//[^0-9]/}" | ||
| 676 | before_count="${before_count:-0}" | ||
| 677 | parse_write_policy_rejection_batch "$temp_write_policy_rejection" >> "$output_file" | ||
| 678 | local after_count | ||
| 679 | after_count=$(grep -v '^#' "$output_file" 2>/dev/null | wc -l || echo "0") | ||
| 680 | after_count="${after_count//[^0-9]/}" | ||
| 681 | after_count="${after_count:-0}" | ||
| 682 | write_policy_count=$((after_count - before_count)) | ||
| 683 | fi | ||
| 684 | |||
| 685 | local invalid_announcement_count=$write_policy_count | ||
| 686 | |||
| 687 | # Build enrichment lookup table from "Added rejected announcement" entries | ||
| 688 | local enrichment_lookup_file | ||
| 689 | enrichment_lookup_file=$(mktemp) | ||
| 690 | |||
| 691 | log_info " Building enrichment lookup table..." | ||
| 692 | if [[ "$rejected_announcement_line_count" -gt 0 ]]; then | ||
| 693 | parse_rejected_announcement_batch "$temp_rejected_announcement" > "$enrichment_lookup_file" | ||
| 694 | fi | ||
| 695 | |||
| 696 | rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_rejected_announcement" | ||
| 697 | |||
| 698 | # Deduplicate by event_id (first column) - keep first occurrence | ||
| 699 | log_info " Deduplicating entries..." | ||
| 700 | local deduped_file | ||
| 701 | deduped_file=$(mktemp) | ||
| 702 | # Preserve header lines (starting with #) and deduplicate data lines | ||
| 703 | grep '^#' "$output_file" > "$deduped_file" | ||
| 704 | grep -v '^#' "$output_file" | sort -t$'\t' -k1,1 -u >> "$deduped_file" | ||
| 705 | mv "$deduped_file" "$output_file" | ||
| 706 | |||
| 707 | # Deduplicate enrichment lookup table by event_id | ||
| 708 | if [[ -s "$enrichment_lookup_file" ]]; then | ||
| 709 | sort -t$'\t' -k1,1 -u "$enrichment_lookup_file" > "$enrichment_lookup_file.deduped" | ||
| 710 | mv "$enrichment_lookup_file.deduped" "$enrichment_lookup_file" | ||
| 711 | fi | ||
| 712 | |||
| 713 | # Enrich with repo/npub from "Added rejected announcement" log entries | ||
| 714 | # This is critical for usability - without it, action-required.txt shows | ||
| 715 | # event_id|kind instead of repo|npub, making parse failures unidentifiable | ||
| 716 | enrich_with_repo_npub "$output_file" "$enrichment_lookup_file" | ||
| 717 | |||
| 718 | rm -f "$enrichment_lookup_file" | ||
| 719 | |||
| 720 | # Count final entries (excluding header lines) | ||
| 721 | local count | ||
| 722 | count=$(grep -v '^#' "$output_file" | wc -l) | ||
| 723 | count="${count//[^0-9]/}" # Strip whitespace | ||
| 724 | count="${count:-0}" | ||
| 725 | |||
| 726 | rm -f "$temp_file" | ||
| 727 | |||
| 728 | # Summary | ||
| 729 | echo "" | ||
| 730 | log_info "=== Extraction Summary ===" | ||
| 731 | log_info "Service: $service" | ||
| 732 | log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" | ||
| 733 | log_success "Extracted $count total entries" | ||
| 734 | log_info " - [PARSE_FAIL] entries: $parse_fail_count" | ||
| 735 | log_info " - Invalid announcement rejections: $invalid_announcement_count" | ||
| 736 | echo "" | ||
| 737 | log_info "Output file: $output_file" | ||
| 738 | |||
| 739 | if [[ $count -gt 0 ]]; then | ||
| 740 | echo "" | ||
| 741 | log_info "Sample entries (first 5):" | ||
| 742 | # Use a subshell to avoid SIGPIPE issues with set -e | ||
| 743 | # New format: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub | ||
| 744 | (grep -v '^#' "$output_file" | head -5 | while IFS=$'\t' read -r event_id kind reason repo npub; do | ||
| 745 | echo " kind=$kind event_id=${event_id:0:16}... reason=\"${reason:0:60}...\"" | ||
| 746 | done) || true | ||
| 747 | fi | ||
| 748 | |||
| 749 | # Breakdown by kind | ||
| 750 | if [[ $count -gt 0 ]]; then | ||
| 751 | echo "" | ||
| 752 | log_info "Breakdown by event kind:" | ||
| 753 | # Use a subshell to avoid SIGPIPE issues with set -e | ||
| 754 | # kind is now column 2 | ||
| 755 | (grep -v '^#' "$output_file" | awk -F'\t' '{print $2}' | sort | uniq -c | sort -rn | while read -r cnt kind; do | ||
| 756 | echo " kind $kind: $cnt failures" | ||
| 757 | done) || true | ||
| 758 | fi | ||
| 759 | |||
| 760 | # Breakdown by reason pattern (for invalid announcements) | ||
| 761 | if [[ $invalid_announcement_count -gt 0 ]]; then | ||
| 762 | echo "" | ||
| 763 | log_info "Breakdown by reason pattern:" | ||
| 764 | # Extract the main reason type (before the colon details) | ||
| 765 | (grep -v '^#' "$output_file" | awk -F'\t' '{print $3}' | sed 's/:.*//' | sort | uniq -c | sort -rn | head -10 | while read -r cnt reason; do | ||
| 766 | echo " $reason: $cnt" | ||
| 767 | done) || true | ||
| 768 | fi | ||
| 769 | |||
| 770 | # Explicit success exit | ||
| 771 | exit 0 | ||
| 772 | } | ||
| 773 | |||
| 774 | main "$@" | ||
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/31-extract-purgatory-expiry.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/31-extract-purgatory-expiry.sh new file mode 100755 index 0000000..a0c8ad0 --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/31-extract-purgatory-expiry.sh | |||
| @@ -0,0 +1,408 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | # | ||
| 3 | # 31-extract-purgatory-expiry.sh - Extract purgatory expiry events from systemd logs | ||
| 4 | # | ||
| 5 | # PHASE 4b of the GRASP relay to ngit-grasp migration analysis pipeline. | ||
| 6 | # Extracts structured [PURGATORY_EXPIRED] log entries from journalctl. | ||
| 7 | # | ||
| 8 | # USAGE: | ||
| 9 | # ./31-extract-purgatory-expiry.sh <service-name> <output-dir> [options] | ||
| 10 | # | ||
| 11 | # EXAMPLES: | ||
| 12 | # # Extract from ngit-grasp service (last 30 days, default) | ||
| 13 | # ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs | ||
| 14 | # | ||
| 15 | # # Extract with custom time range | ||
| 16 | # ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs --since "2026-01-01" | ||
| 17 | # | ||
| 18 | # # Extract from specific time window | ||
| 19 | # ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs --since "2026-01-15" --until "2026-01-22" | ||
| 20 | # | ||
| 21 | # OPTIONS: | ||
| 22 | # --since <date> Start date for log extraction (default: 30 days ago) | ||
| 23 | # --until <date> End date for log extraction (default: now) | ||
| 24 | # --dry-run Show what would be extracted without writing files | ||
| 25 | # | ||
| 26 | # OUTPUT: | ||
| 27 | # <output-dir>/purgatory-expired.txt | ||
| 28 | # | ||
| 29 | # OUTPUT FORMAT (TSV): | ||
| 30 | # repo<TAB>npub<TAB>timestamp<TAB>reason | ||
| 31 | # | ||
| 32 | # EXPECTED LOG FORMAT: | ||
| 33 | # The script looks for structured log entries in this format: | ||
| 34 | # | ||
| 35 | # 2026-01-22T10:30:45Z ngit-grasp[1234]: [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason="clone URL unreachable after 7 days" | ||
| 36 | # | ||
| 37 | # Required fields: repo, npub | ||
| 38 | # Optional fields: reason (explains why purgatory expired) | ||
| 39 | # | ||
| 40 | # BACKGROUND: | ||
| 41 | # "Purgatory" is the state where ngit-grasp has received an announcement event | ||
| 42 | # but cannot yet sync the git data (e.g., clone URL unreachable, git server down). | ||
| 43 | # After a configurable timeout (default 7 days), the repository is marked as | ||
| 44 | # expired and removed from purgatory. | ||
| 45 | # | ||
| 46 | # Purgatory expiry during migration analysis indicates repositories that: | ||
| 47 | # - Had valid announcements on the production relay | ||
| 48 | # - Could not be synced to the archive relay | ||
| 49 | # - May need manual intervention or investigation | ||
| 50 | # | ||
| 51 | # DEPENDENCY: | ||
| 52 | # This script requires logging improvements in ngit-grasp to emit structured | ||
| 53 | # [PURGATORY_EXPIRED] log entries. Until those are implemented, this script | ||
| 54 | # will find no matching entries (which is handled gracefully). | ||
| 55 | # | ||
| 56 | # See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section) | ||
| 57 | # | ||
| 58 | # Expected Rust logging code: | ||
| 59 | # tracing::warn!( | ||
| 60 | # target: "migration", | ||
| 61 | # "[PURGATORY_EXPIRED] repo={} npub={} reason=\"{}\"", | ||
| 62 | # identifier, npub, reason | ||
| 63 | # ); | ||
| 64 | # | ||
| 65 | # PREREQUISITES: | ||
| 66 | # - journalctl (systemd) | ||
| 67 | # - grep, awk (standard Unix tools) | ||
| 68 | # - Access to systemd journal (may require sudo or journal group membership) | ||
| 69 | # | ||
| 70 | # RUNTIME: Depends on log volume, typically < 30 seconds | ||
| 71 | # | ||
| 72 | # SEE ALSO: | ||
| 73 | # docs/how-to/migrate-to-ngit-grasp.md - Full migration guide | ||
| 74 | # 30-extract-parse-failures.sh - Companion script for parse failure logs | ||
| 75 | # | ||
| 76 | |||
| 77 | set -euo pipefail | ||
| 78 | |||
| 79 | # Get script directory for sourcing helpers | ||
| 80 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | ||
| 81 | |||
| 82 | # Source the service validation helper | ||
| 83 | if [[ -f "$SCRIPT_DIR/validate-service.sh" ]]; then | ||
| 84 | source "$SCRIPT_DIR/validate-service.sh" | ||
| 85 | fi | ||
| 86 | |||
| 87 | # Colors for output (disabled if not a terminal) | ||
| 88 | if [[ -t 1 ]]; then | ||
| 89 | RED='\033[0;31m' | ||
| 90 | GREEN='\033[0;32m' | ||
| 91 | YELLOW='\033[0;33m' | ||
| 92 | BLUE='\033[0;34m' | ||
| 93 | NC='\033[0m' | ||
| 94 | else | ||
| 95 | RED='' | ||
| 96 | GREEN='' | ||
| 97 | YELLOW='' | ||
| 98 | BLUE='' | ||
| 99 | NC='' | ||
| 100 | fi | ||
| 101 | |||
| 102 | log_info() { | ||
| 103 | echo -e "${BLUE}[INFO]${NC} $*" >&2 | ||
| 104 | } | ||
| 105 | |||
| 106 | log_success() { | ||
| 107 | echo -e "${GREEN}[OK]${NC} $*" >&2 | ||
| 108 | } | ||
| 109 | |||
| 110 | log_warn() { | ||
| 111 | echo -e "${YELLOW}[WARN]${NC} $*" >&2 | ||
| 112 | } | ||
| 113 | |||
| 114 | log_error() { | ||
| 115 | echo -e "${RED}[ERROR]${NC} $*" >&2 | ||
| 116 | } | ||
| 117 | |||
| 118 | usage() { | ||
| 119 | echo "Usage: $0 <service-name> <output-dir> [options]" | ||
| 120 | echo "" | ||
| 121 | echo "Arguments:" | ||
| 122 | echo " service-name Systemd service name (e.g., ngit-grasp.service)" | ||
| 123 | echo " output-dir Directory to store extracted log data" | ||
| 124 | echo "" | ||
| 125 | echo "Options:" | ||
| 126 | echo " --since <date> Start date (default: 30 days ago)" | ||
| 127 | echo " --until <date> End date (default: now)" | ||
| 128 | echo " --dry-run Show what would be extracted without writing" | ||
| 129 | echo "" | ||
| 130 | echo "Examples:" | ||
| 131 | echo " $0 ngit-grasp.service output/logs" | ||
| 132 | echo " $0 ngit-grasp.service output/logs --since '2026-01-01'" | ||
| 133 | echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'" | ||
| 134 | echo "" | ||
| 135 | echo "Expected log format:" | ||
| 136 | echo " [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason=\"...\"" | ||
| 137 | exit 1 | ||
| 138 | } | ||
| 139 | |||
| 140 | # Parse a single log line and extract fields | ||
| 141 | # Input: log line containing [PURGATORY_EXPIRED] | ||
| 142 | # Output: TSV line: repo<TAB>npub<TAB>timestamp<TAB>reason | ||
| 143 | parse_log_line() { | ||
| 144 | local line="$1" | ||
| 145 | |||
| 146 | # Extract timestamp from the beginning of the log line | ||
| 147 | # Format: 2026-01-22T10:30:45+0000 or similar ISO format | ||
| 148 | local timestamp repo npub reason | ||
| 149 | |||
| 150 | # Extract ISO timestamp from beginning of line | ||
| 151 | timestamp=$(echo "$line" | grep -oP '^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}' || echo "") | ||
| 152 | |||
| 153 | # Extract repo=VALUE (unquoted identifier) | ||
| 154 | repo=$(echo "$line" | grep -oP 'repo=\K[^ ]+' || echo "") | ||
| 155 | |||
| 156 | # Extract npub=VALUE (npub1... format) | ||
| 157 | npub=$(echo "$line" | grep -oP 'npub=\K[^ ]+' || echo "") | ||
| 158 | |||
| 159 | # Extract reason="VALUE" (quoted string, optional) | ||
| 160 | reason=$(echo "$line" | grep -oP 'reason="\K[^"]*' || echo "") | ||
| 161 | |||
| 162 | # Only output if we have the required fields | ||
| 163 | if [[ -n "$repo" && -n "$npub" ]]; then | ||
| 164 | printf '%s\t%s\t%s\t%s\n' "$repo" "$npub" "$timestamp" "$reason" | ||
| 165 | fi | ||
| 166 | } | ||
| 167 | |||
| 168 | # Main | ||
| 169 | main() { | ||
| 170 | if [[ $# -lt 2 ]]; then | ||
| 171 | usage | ||
| 172 | fi | ||
| 173 | |||
| 174 | local service="$1" | ||
| 175 | local output_dir="$2" | ||
| 176 | shift 2 | ||
| 177 | |||
| 178 | # Default time range: last 30 days | ||
| 179 | local since_date | ||
| 180 | since_date=$(date -d "30 days ago" "+%Y-%m-%d" 2>/dev/null || date -v-30d "+%Y-%m-%d" 2>/dev/null || echo "") | ||
| 181 | local until_date="" | ||
| 182 | local dry_run=false | ||
| 183 | |||
| 184 | # Parse options | ||
| 185 | while [[ $# -gt 0 ]]; do | ||
| 186 | case "$1" in | ||
| 187 | --since) | ||
| 188 | since_date="$2" | ||
| 189 | shift 2 | ||
| 190 | ;; | ||
| 191 | --until) | ||
| 192 | until_date="$2" | ||
| 193 | shift 2 | ||
| 194 | ;; | ||
| 195 | --dry-run) | ||
| 196 | dry_run=true | ||
| 197 | shift | ||
| 198 | ;; | ||
| 199 | *) | ||
| 200 | log_error "Unknown option: $1" | ||
| 201 | usage | ||
| 202 | ;; | ||
| 203 | esac | ||
| 204 | done | ||
| 205 | |||
| 206 | # Validate service name format | ||
| 207 | if [[ ! "$service" =~ \.service$ ]]; then | ||
| 208 | service="${service}.service" | ||
| 209 | fi | ||
| 210 | |||
| 211 | # Validate service is appropriate for structured logging | ||
| 212 | # This prevents the common mistake of using ngit-relay instead of ngit-grasp | ||
| 213 | if type validate_service_for_structured_logging &>/dev/null; then | ||
| 214 | # Use non-interactive mode if not a terminal, skip log check (we'll do our own) | ||
| 215 | local interactive="true" | ||
| 216 | [[ ! -t 0 ]] && interactive="false" | ||
| 217 | |||
| 218 | if ! validate_service_for_structured_logging "$service" "false" "$interactive"; then | ||
| 219 | log_error "Service validation failed. Use an ngit-grasp service for structured logging." | ||
| 220 | exit 1 | ||
| 221 | fi | ||
| 222 | else | ||
| 223 | # Fallback validation if helper not available | ||
| 224 | if [[ "$service" == *"ngit-relay"* ]]; then | ||
| 225 | log_error "Service name appears to be ngit-relay: $service" | ||
| 226 | log_error "Structured logging ([PURGATORY_EXPIRED]) only exists in ngit-grasp services." | ||
| 227 | log_error "Please use the ngit-grasp archive service instead." | ||
| 228 | log_error "" | ||
| 229 | log_error "To find the correct service:" | ||
| 230 | log_error " systemctl list-units 'ngit-grasp*' --all" | ||
| 231 | exit 1 | ||
| 232 | fi | ||
| 233 | fi | ||
| 234 | |||
| 235 | log_info "Extracting purgatory expiry events from systemd logs" | ||
| 236 | log_info "Service: $service" | ||
| 237 | log_info "Output: $output_dir" | ||
| 238 | log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" | ||
| 239 | |||
| 240 | # Check if journalctl is available | ||
| 241 | if ! command -v journalctl &> /dev/null; then | ||
| 242 | log_error "journalctl not found. This script requires systemd." | ||
| 243 | exit 1 | ||
| 244 | fi | ||
| 245 | |||
| 246 | # Validate service exists (check if journalctl can find any logs for it) | ||
| 247 | # Note: We don't require the service to be running, just that it has logs | ||
| 248 | if ! journalctl --no-pager -u "$service" -n 1 &>/dev/null; then | ||
| 249 | log_warn "Could not query logs for service: $service" | ||
| 250 | log_warn "This may indicate the service doesn't exist or you lack permissions." | ||
| 251 | log_warn "" | ||
| 252 | log_warn "To list available ngit-grasp services:" | ||
| 253 | log_warn " systemctl list-units 'ngit-grasp*' --all" | ||
| 254 | log_warn " journalctl --list-boots # Check if you have journal access" | ||
| 255 | log_warn "" | ||
| 256 | # Continue anyway - the service might exist but have no logs yet | ||
| 257 | fi | ||
| 258 | |||
| 259 | # Build journalctl command | ||
| 260 | local journal_cmd="journalctl -u $service --no-pager -o short-iso" | ||
| 261 | |||
| 262 | if [[ -n "$since_date" ]]; then | ||
| 263 | journal_cmd="$journal_cmd --since '$since_date'" | ||
| 264 | fi | ||
| 265 | |||
| 266 | if [[ -n "$until_date" ]]; then | ||
| 267 | journal_cmd="$journal_cmd --until '$until_date'" | ||
| 268 | fi | ||
| 269 | |||
| 270 | log_info "Running: $journal_cmd | grep '\\[PURGATORY_EXPIRED\\]'" | ||
| 271 | |||
| 272 | if [[ "$dry_run" == true ]]; then | ||
| 273 | log_info "[DRY RUN] Would extract to: $output_dir/purgatory-expired.txt" | ||
| 274 | |||
| 275 | # Show sample of what would be extracted | ||
| 276 | log_info "Checking for matching log entries..." | ||
| 277 | local sample_count | ||
| 278 | sample_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PURGATORY_EXPIRED\]' || echo "0") | ||
| 279 | sample_count="${sample_count//[^0-9]/}" # Strip non-numeric characters | ||
| 280 | sample_count="${sample_count:-0}" | ||
| 281 | log_info "Found $sample_count matching log entries" | ||
| 282 | |||
| 283 | if [[ "$sample_count" -eq 0 ]]; then | ||
| 284 | log_warn "No [PURGATORY_EXPIRED] entries found in logs." | ||
| 285 | log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." | ||
| 286 | log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" | ||
| 287 | fi | ||
| 288 | |||
| 289 | exit 0 | ||
| 290 | fi | ||
| 291 | |||
| 292 | # Create output directory | ||
| 293 | mkdir -p "$output_dir" | ||
| 294 | |||
| 295 | local output_file="$output_dir/purgatory-expired.txt" | ||
| 296 | local temp_file | ||
| 297 | temp_file=$(mktemp) | ||
| 298 | |||
| 299 | # Extract and parse log entries | ||
| 300 | log_info "Extracting log entries..." | ||
| 301 | |||
| 302 | # Get raw log lines containing [PURGATORY_EXPIRED] | ||
| 303 | # Capture stderr separately to detect journalctl errors | ||
| 304 | local raw_lines journal_stderr journal_exit | ||
| 305 | local temp_stderr | ||
| 306 | temp_stderr=$(mktemp) | ||
| 307 | |||
| 308 | raw_lines=$(eval "$journal_cmd" 2>"$temp_stderr" | grep '\[PURGATORY_EXPIRED\]' || true) | ||
| 309 | journal_exit=$? | ||
| 310 | journal_stderr=$(cat "$temp_stderr" 2>/dev/null || true) | ||
| 311 | rm -f "$temp_stderr" | ||
| 312 | |||
| 313 | # Report any journalctl errors (but don't fail - empty logs are valid) | ||
| 314 | if [[ -n "$journal_stderr" ]]; then | ||
| 315 | log_warn "journalctl reported: $journal_stderr" | ||
| 316 | fi | ||
| 317 | |||
| 318 | if [[ -z "$raw_lines" ]]; then | ||
| 319 | log_warn "No [PURGATORY_EXPIRED] entries found in logs." | ||
| 320 | log_warn "" | ||
| 321 | log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." | ||
| 322 | log_warn "The structured log format required by this script:" | ||
| 323 | log_warn "" | ||
| 324 | log_warn " [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason=\"...\"" | ||
| 325 | log_warn "" | ||
| 326 | log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" | ||
| 327 | log_warn "" | ||
| 328 | |||
| 329 | # Create empty output file with header comment | ||
| 330 | { | ||
| 331 | echo "# Purgatory expiry events extracted from $service" | ||
| 332 | echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" | ||
| 333 | echo "# Extracted: $(date -Iseconds)" | ||
| 334 | echo "# Format: repo<TAB>npub<TAB>timestamp<TAB>reason" | ||
| 335 | echo "#" | ||
| 336 | echo "# NOTE: No [PURGATORY_EXPIRED] entries found." | ||
| 337 | echo "# This is expected if ngit-grasp logging improvements are not yet deployed." | ||
| 338 | } > "$output_file" | ||
| 339 | |||
| 340 | log_info "Created empty output file: $output_file" | ||
| 341 | exit 0 | ||
| 342 | fi | ||
| 343 | |||
| 344 | # Write header | ||
| 345 | { | ||
| 346 | echo "# Purgatory expiry events extracted from $service" | ||
| 347 | echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" | ||
| 348 | echo "# Extracted: $(date -Iseconds)" | ||
| 349 | echo "# Format: repo<TAB>npub<TAB>timestamp<TAB>reason" | ||
| 350 | } > "$output_file" | ||
| 351 | |||
| 352 | # Parse each line | ||
| 353 | local count=0 | ||
| 354 | while IFS= read -r line; do | ||
| 355 | local parsed | ||
| 356 | parsed=$(parse_log_line "$line") | ||
| 357 | if [[ -n "$parsed" ]]; then | ||
| 358 | echo "$parsed" >> "$output_file" | ||
| 359 | count=$((count + 1)) | ||
| 360 | fi | ||
| 361 | done <<< "$raw_lines" | ||
| 362 | |||
| 363 | rm -f "$temp_file" | ||
| 364 | |||
| 365 | # Summary | ||
| 366 | echo "" | ||
| 367 | log_info "=== Extraction Summary ===" | ||
| 368 | log_info "Service: $service" | ||
| 369 | log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" | ||
| 370 | log_success "Extracted $count purgatory expiry entries" | ||
| 371 | echo "" | ||
| 372 | log_info "Output file: $output_file" | ||
| 373 | |||
| 374 | if [[ $count -gt 0 ]]; then | ||
| 375 | echo "" | ||
| 376 | log_info "Sample entries (first 5):" | ||
| 377 | # Use a subshell to avoid SIGPIPE issues with set -e | ||
| 378 | (tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub timestamp reason; do | ||
| 379 | echo " repo=$repo npub=${npub:0:20}... timestamp=$timestamp" | ||
| 380 | done) || true | ||
| 381 | fi | ||
| 382 | |||
| 383 | # Show unique repos affected | ||
| 384 | if [[ $count -gt 0 ]]; then | ||
| 385 | echo "" | ||
| 386 | local unique_repos | ||
| 387 | unique_repos=$(tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort -u | wc -l) | ||
| 388 | log_info "Unique repositories affected: $unique_repos" | ||
| 389 | |||
| 390 | echo "" | ||
| 391 | log_info "Repositories with purgatory expiry:" | ||
| 392 | # Use a subshell to avoid SIGPIPE issues with set -e | ||
| 393 | (tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort | uniq -c | sort -rn | head -10 | while read -r cnt repo; do | ||
| 394 | echo " $repo: $cnt expiry events" | ||
| 395 | done) || true | ||
| 396 | |||
| 397 | local total_repos | ||
| 398 | total_repos=$(tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort -u | wc -l) | ||
| 399 | if [[ $total_repos -gt 10 ]]; then | ||
| 400 | echo " ... and $((total_repos - 10)) more repositories" | ||
| 401 | fi | ||
| 402 | fi | ||
| 403 | |||
| 404 | # Explicit success exit | ||
| 405 | exit 0 | ||
| 406 | } | ||
| 407 | |||
| 408 | main "$@" | ||
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/40-classify-actions.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/40-classify-actions.sh new file mode 100755 index 0000000..8b61636 --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/40-classify-actions.sh | |||
| @@ -0,0 +1,662 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | # | ||
| 3 | # 40-classify-actions.sh - Classify repos by migration action required | ||
| 4 | # | ||
| 5 | # Implements the redesigned classification system (Option B) with user feedback: | ||
| 6 | # | ||
| 7 | # Tier 1: No Action Required (ready-for-migration.txt) | ||
| 8 | # - Complete in both (prod=cat1, archive=cat1) | ||
| 9 | # - Deleted by user (kind 5 event) | ||
| 10 | # - Empty in prod (prod=cat2, any archive status) | ||
| 11 | # - Archive-only (archive=any, prod=missing) | ||
| 12 | # - Not in prod (purgatory-only, prod=missing) | ||
| 13 | # - Archive ahead (archive has newer git data than prod - GRASP enforced) | ||
| 14 | # | ||
| 15 | # Tier 2: Action Required (needs-resync.txt) | ||
| 16 | # - Complete in prod, missing from archive (with purgatory context) | ||
| 17 | # - Complete in prod, incomplete in archive AND prod is ahead (with purgatory context) | ||
| 18 | # | ||
| 19 | # Tier 3: Manual Investigation (manual-review.txt) | ||
| 20 | # - Partial in prod (prod=cat3) | ||
| 21 | # - No-match in prod (prod=cat4) | ||
| 22 | # - Parse failures | ||
| 23 | # - Conflicting states | ||
| 24 | # - Diverged git history (both have unique commits) | ||
| 25 | # | ||
| 26 | # KEY INSIGHT: | ||
| 27 | # Archive (ngit-grasp) enforces GRASP - git data ALWAYS matches a state event. | ||
| 28 | # If archive has different/newer data than prod, it means: | ||
| 29 | # - A state event authorized those commits at some point | ||
| 30 | # - Archive is actually MORE up-to-date than prod | ||
| 31 | # - Migration should use archive data (it's already correct) | ||
| 32 | # | ||
| 33 | # Usage: ./40-classify-actions.sh <analysis-dir> | ||
| 34 | # | ||
| 35 | # Output format: repo | npub | prod_status | archive_status | context | action | ||
| 36 | # | ||
| 37 | |||
| 38 | set -euo pipefail | ||
| 39 | |||
| 40 | # Colors for output | ||
| 41 | RED='\033[0;31m' | ||
| 42 | GREEN='\033[0;32m' | ||
| 43 | YELLOW='\033[1;33m' | ||
| 44 | BLUE='\033[0;34m' | ||
| 45 | NC='\033[0m' # No Color | ||
| 46 | |||
| 47 | log_info() { echo -e "${BLUE}[INFO]${NC} $*"; } | ||
| 48 | log_success() { echo -e "${GREEN}[OK]${NC} $*"; } | ||
| 49 | log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } | ||
| 50 | log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; } | ||
| 51 | |||
| 52 | # Check arguments | ||
| 53 | if [[ $# -lt 1 ]]; then | ||
| 54 | echo "Usage: $0 <analysis-dir>" | ||
| 55 | echo "Example: $0 work/migration-analysis-20260123-200701" | ||
| 56 | exit 1 | ||
| 57 | fi | ||
| 58 | |||
| 59 | ANALYSIS_DIR="$1" | ||
| 60 | |||
| 61 | # Validate analysis directory | ||
| 62 | if [[ ! -d "$ANALYSIS_DIR" ]]; then | ||
| 63 | log_error "Analysis directory not found: $ANALYSIS_DIR" | ||
| 64 | exit 1 | ||
| 65 | fi | ||
| 66 | |||
| 67 | # Define paths | ||
| 68 | PROD_DIR="$ANALYSIS_DIR/prod" | ||
| 69 | ARCHIVE_DIR="$ANALYSIS_DIR/archive" | ||
| 70 | COMPARISON_DIR="$ANALYSIS_DIR/comparison" | ||
| 71 | LOGS_DIR="$ANALYSIS_DIR/logs" | ||
| 72 | RESULTS_DIR="$ANALYSIS_DIR/results" | ||
| 73 | |||
| 74 | # Validate required directories | ||
| 75 | for dir in "$PROD_DIR" "$ARCHIVE_DIR" "$COMPARISON_DIR" "$LOGS_DIR"; do | ||
| 76 | if [[ ! -d "$dir" ]]; then | ||
| 77 | log_error "Required directory not found: $dir" | ||
| 78 | exit 1 | ||
| 79 | fi | ||
| 80 | done | ||
| 81 | |||
| 82 | # Create results directory | ||
| 83 | mkdir -p "$RESULTS_DIR" | ||
| 84 | |||
| 85 | # Output files | ||
| 86 | READY_FILE="$RESULTS_DIR/ready-for-migration.txt" | ||
| 87 | RESYNC_FILE="$RESULTS_DIR/needs-resync.txt" | ||
| 88 | REVIEW_FILE="$RESULTS_DIR/manual-review.txt" | ||
| 89 | SUMMARY_FILE="$RESULTS_DIR/summary.txt" | ||
| 90 | |||
| 91 | # Temporary files for processing | ||
| 92 | TMP_DIR=$(mktemp -d) | ||
| 93 | trap 'rm -rf "$TMP_DIR"' EXIT | ||
| 94 | |||
| 95 | log_info "Starting classification with revised system (Option B)" | ||
| 96 | log_info "Analysis directory: $ANALYSIS_DIR" | ||
| 97 | |||
| 98 | # ============================================================================ | ||
| 99 | # Phase 1: Build lookup tables from source data | ||
| 100 | # ============================================================================ | ||
| 101 | |||
| 102 | log_info "Building lookup tables..." | ||
| 103 | |||
| 104 | # Build prod category lookup: repo|npub -> category | ||
| 105 | declare -A PROD_CAT | ||
| 106 | while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do | ||
| 107 | repo="${repo// /}" # Remove all spaces | ||
| 108 | npub="${npub// /}" # Remove all spaces | ||
| 109 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 110 | PROD_CAT["$repo|$npub"]="cat1" | ||
| 111 | done < "$PROD_DIR/category1-complete-match.txt" | ||
| 112 | |||
| 113 | while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do | ||
| 114 | repo="${repo// /}" | ||
| 115 | npub="${npub// /}" | ||
| 116 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 117 | PROD_CAT["$repo|$npub"]="cat2" | ||
| 118 | done < "$PROD_DIR/category2-empty-blank.txt" | ||
| 119 | |||
| 120 | while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do | ||
| 121 | repo="${repo// /}" | ||
| 122 | npub="${npub// /}" | ||
| 123 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 124 | PROD_CAT["$repo|$npub"]="cat3" | ||
| 125 | done < "$PROD_DIR/category3-partial-match.txt" | ||
| 126 | |||
| 127 | while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do | ||
| 128 | repo="${repo// /}" | ||
| 129 | npub="${npub// /}" | ||
| 130 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 131 | PROD_CAT["$repo|$npub"]="cat4" | ||
| 132 | done < "$PROD_DIR/category4-no-match.txt" | ||
| 133 | |||
| 134 | log_info "Loaded ${#PROD_CAT[@]} prod entries" | ||
| 135 | |||
| 136 | # Build archive category lookup: repo|npub -> category | ||
| 137 | declare -A ARCHIVE_CAT | ||
| 138 | while IFS='|' read -r repo npub rest; do | ||
| 139 | repo="${repo// /}" | ||
| 140 | npub="${npub// /}" | ||
| 141 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 142 | ARCHIVE_CAT["$repo|$npub"]="cat1" | ||
| 143 | done < "$ARCHIVE_DIR/category1-complete-match.txt" | ||
| 144 | |||
| 145 | while IFS='|' read -r repo npub rest; do | ||
| 146 | repo="${repo// /}" | ||
| 147 | npub="${npub// /}" | ||
| 148 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 149 | ARCHIVE_CAT["$repo|$npub"]="cat2" | ||
| 150 | done < "$ARCHIVE_DIR/category2-empty-blank.txt" | ||
| 151 | |||
| 152 | while IFS='|' read -r repo npub rest; do | ||
| 153 | repo="${repo// /}" | ||
| 154 | npub="${npub// /}" | ||
| 155 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 156 | ARCHIVE_CAT["$repo|$npub"]="cat3" | ||
| 157 | done < "$ARCHIVE_DIR/category3-partial-match.txt" | ||
| 158 | |||
| 159 | while IFS='|' read -r repo npub rest; do | ||
| 160 | repo="${repo// /}" | ||
| 161 | npub="${npub// /}" | ||
| 162 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 163 | ARCHIVE_CAT["$repo|$npub"]="cat4" | ||
| 164 | done < "$ARCHIVE_DIR/category4-no-match.txt" | ||
| 165 | |||
| 166 | log_info "Loaded ${#ARCHIVE_CAT[@]} archive entries" | ||
| 167 | |||
| 168 | # Build purgatory lookup: repo|npub -> 1 (if purgatory expired) | ||
| 169 | declare -A PURGATORY | ||
| 170 | PURGATORY_COUNT=0 | ||
| 171 | if [[ -f "$LOGS_DIR/purgatory-expired.txt" ]]; then | ||
| 172 | while IFS=$'\t' read -r repo npub timestamp reason || [[ -n "$repo" ]]; do | ||
| 173 | # Skip comments and empty lines | ||
| 174 | [[ "$repo" =~ ^# ]] && continue | ||
| 175 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 176 | PURGATORY["$repo|$npub"]=1 | ||
| 177 | PURGATORY_COUNT=$((PURGATORY_COUNT + 1)) | ||
| 178 | done < "$LOGS_DIR/purgatory-expired.txt" | ||
| 179 | fi | ||
| 180 | log_info "Loaded $PURGATORY_COUNT purgatory entries" | ||
| 181 | |||
| 182 | # Build parse failure lookup: repo|npub -> 1 (if parse failure logged) | ||
| 183 | # Parse failures file format: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub | ||
| 184 | declare -A PARSE_FAIL | ||
| 185 | PARSE_FAIL_COUNT=0 | ||
| 186 | if [[ -f "$LOGS_DIR/parse-failures.txt" ]]; then | ||
| 187 | while IFS=$'\t' read -r event_id kind reason repo npub || [[ -n "$event_id" ]]; do | ||
| 188 | # Skip comments and empty lines | ||
| 189 | [[ "$event_id" =~ ^# ]] && continue | ||
| 190 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 191 | PARSE_FAIL["$repo|$npub"]=1 | ||
| 192 | PARSE_FAIL_COUNT=$((PARSE_FAIL_COUNT + 1)) | ||
| 193 | done < "$LOGS_DIR/parse-failures.txt" | ||
| 194 | fi | ||
| 195 | log_info "Loaded $PARSE_FAIL_COUNT parse failure entries" | ||
| 196 | |||
| 197 | # Build deletion lookup: repo|npub -> 1 (if kind 5 deletion event) | ||
| 198 | # Deletions are in NDJSON format with "a" tags like "30617:pubkey_hex:repo" | ||
| 199 | # We need to convert hex pubkeys to npub format using nak | ||
| 200 | declare -A DELETED | ||
| 201 | |||
| 202 | # Helper function to process deletion file (NDJSON format) | ||
| 203 | # Extracts unique pubkey_hex:repo pairs and converts to npub | ||
| 204 | process_deletions() { | ||
| 205 | local file="$1" | ||
| 206 | [[ ! -f "$file" ]] && return | ||
| 207 | |||
| 208 | # Extract unique pubkey_hex|repo pairs from NDJSON | ||
| 209 | # Each line is a JSON object, extract "a" tags | ||
| 210 | local pairs | ||
| 211 | pairs=$(jq -r '.tags[] | select(.[0] == "a") | .[1]' "$file" 2>/dev/null | \ | ||
| 212 | sed 's/^30617://' | awk -F: '{print $1 "|" $2}' | sort -u) | ||
| 213 | |||
| 214 | # Get unique hex pubkeys for batch conversion | ||
| 215 | local hex_keys | ||
| 216 | hex_keys=$(echo "$pairs" | cut -d'|' -f1 | sort -u) | ||
| 217 | |||
| 218 | # Build hex->npub lookup via batch nak call | ||
| 219 | declare -A HEX_TO_NPUB | ||
| 220 | while read -r hex; do | ||
| 221 | [[ -z "$hex" ]] && continue | ||
| 222 | local npub | ||
| 223 | npub=$(nak encode npub "$hex" 2>/dev/null || echo "") | ||
| 224 | [[ -n "$npub" ]] && HEX_TO_NPUB["$hex"]="$npub" | ||
| 225 | done <<< "$hex_keys" | ||
| 226 | |||
| 227 | # Now process pairs with cached npub values | ||
| 228 | while IFS='|' read -r pubkey_hex repo; do | ||
| 229 | [[ -z "$repo" || -z "$pubkey_hex" ]] && continue | ||
| 230 | local npub="${HEX_TO_NPUB[$pubkey_hex]:-}" | ||
| 231 | [[ -z "$npub" ]] && continue | ||
| 232 | DELETED["$repo|$npub"]=1 | ||
| 233 | done <<< "$pairs" | ||
| 234 | } | ||
| 235 | |||
| 236 | # Process prod and archive deletions | ||
| 237 | process_deletions "$PROD_DIR/raw/deletions.json" | ||
| 238 | process_deletions "$ARCHIVE_DIR/raw/deletions.json" | ||
| 239 | DELETED_COUNT=0 | ||
| 240 | [[ ${#DELETED[@]} -gt 0 ]] && DELETED_COUNT=${#DELETED[@]} | ||
| 241 | log_info "Loaded $DELETED_COUNT deletion entries" | ||
| 242 | |||
| 243 | # Build git ancestry lookup: repo|npub -> relationship (archive-ahead, prod-ahead, diverged, etc.) | ||
| 244 | # This data comes from 22-compare-git-data.sh which compares actual git commits | ||
| 245 | declare -A GIT_ANCESTRY | ||
| 246 | GIT_ANCESTRY_COUNT=0 | ||
| 247 | if [[ -f "$COMPARISON_DIR/git-ancestry.tsv" ]]; then | ||
| 248 | while IFS=$'\t' read -r repo npub relationship details || [[ -n "$repo" ]]; do | ||
| 249 | # Skip header and comments | ||
| 250 | [[ "$repo" == "repo" ]] && continue | ||
| 251 | [[ "$repo" =~ ^# ]] && continue | ||
| 252 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 253 | GIT_ANCESTRY["$repo|$npub"]="$relationship" | ||
| 254 | GIT_ANCESTRY_COUNT=$((GIT_ANCESTRY_COUNT + 1)) | ||
| 255 | done < "$COMPARISON_DIR/git-ancestry.tsv" | ||
| 256 | log_info "Loaded $GIT_ANCESTRY_COUNT git ancestry entries" | ||
| 257 | else | ||
| 258 | log_warn "No git-ancestry.tsv found - will not check if archive is ahead of prod" | ||
| 259 | log_warn "Run 22-compare-git-data.sh to enable archive-ahead detection" | ||
| 260 | fi | ||
| 261 | |||
| 262 | # ============================================================================ | ||
| 263 | # Phase 2: Build unique repo list from all sources | ||
| 264 | # ============================================================================ | ||
| 265 | |||
| 266 | log_info "Building unique repo list..." | ||
| 267 | |||
| 268 | declare -A ALL_REPOS | ||
| 269 | for key in "${!PROD_CAT[@]}"; do | ||
| 270 | ALL_REPOS["$key"]=1 | ||
| 271 | done | ||
| 272 | for key in "${!ARCHIVE_CAT[@]}"; do | ||
| 273 | ALL_REPOS["$key"]=1 | ||
| 274 | done | ||
| 275 | for key in "${!PURGATORY[@]}"; do | ||
| 276 | ALL_REPOS["$key"]=1 | ||
| 277 | done | ||
| 278 | |||
| 279 | log_info "Total unique repos: ${#ALL_REPOS[@]}" | ||
| 280 | |||
| 281 | # ============================================================================ | ||
| 282 | # Phase 3: Classify each repo according to revised decision tree | ||
| 283 | # ============================================================================ | ||
| 284 | |||
| 285 | log_info "Classifying repos..." | ||
| 286 | |||
| 287 | # Counters for summary | ||
| 288 | declare -A COUNTS | ||
| 289 | COUNTS[ready_complete_both]=0 | ||
| 290 | COUNTS[ready_deleted]=0 | ||
| 291 | COUNTS[ready_empty_prod]=0 | ||
| 292 | COUNTS[ready_archive_only]=0 | ||
| 293 | COUNTS[ready_not_in_prod]=0 | ||
| 294 | COUNTS[ready_archive_ahead]=0 | ||
| 295 | COUNTS[resync_missing_archive]=0 | ||
| 296 | COUNTS[resync_incomplete_archive]=0 | ||
| 297 | COUNTS[review_partial_prod]=0 | ||
| 298 | COUNTS[review_nomatch_prod]=0 | ||
| 299 | COUNTS[review_parse_failure]=0 | ||
| 300 | COUNTS[review_conflicting]=0 | ||
| 301 | COUNTS[review_diverged]=0 | ||
| 302 | |||
| 303 | # Output arrays | ||
| 304 | declare -a READY_LINES | ||
| 305 | declare -a RESYNC_LINES | ||
| 306 | declare -a REVIEW_LINES | ||
| 307 | |||
| 308 | # Helper function to get context string | ||
| 309 | get_context() { | ||
| 310 | local key="$1" | ||
| 311 | local prod_status="$2" | ||
| 312 | local archive_status="$3" | ||
| 313 | local context="" | ||
| 314 | |||
| 315 | # Check purgatory | ||
| 316 | if [[ -n "${PURGATORY[$key]:-}" ]]; then | ||
| 317 | context="purgatory-expired" | ||
| 318 | fi | ||
| 319 | |||
| 320 | # Check parse failure | ||
| 321 | if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then | ||
| 322 | if [[ -n "$context" ]]; then | ||
| 323 | context="$context, parse-failure" | ||
| 324 | else | ||
| 325 | context="parse-failure" | ||
| 326 | fi | ||
| 327 | fi | ||
| 328 | |||
| 329 | # Add archive context for unexpected states | ||
| 330 | if [[ "$prod_status" == "empty" && "$archive_status" != "missing" && "$archive_status" != "empty" ]]; then | ||
| 331 | if [[ -n "$context" ]]; then | ||
| 332 | context="$context, archive-has-data" | ||
| 333 | else | ||
| 334 | context="archive-has-data" | ||
| 335 | fi | ||
| 336 | fi | ||
| 337 | |||
| 338 | echo "${context:-none}" | ||
| 339 | } | ||
| 340 | |||
| 341 | # Helper to convert category to human-readable status | ||
| 342 | cat_to_status() { | ||
| 343 | case "$1" in | ||
| 344 | cat1) echo "complete" ;; | ||
| 345 | cat2) echo "empty" ;; | ||
| 346 | cat3) echo "partial" ;; | ||
| 347 | cat4) echo "no-match" ;; | ||
| 348 | missing) echo "missing" ;; | ||
| 349 | *) echo "$1" ;; | ||
| 350 | esac | ||
| 351 | } | ||
| 352 | |||
| 353 | LOOP_COUNT=0 | ||
| 354 | for key in "${!ALL_REPOS[@]}"; do | ||
| 355 | LOOP_COUNT=$((LOOP_COUNT + 1)) | ||
| 356 | [[ $((LOOP_COUNT % 100)) -eq 0 ]] && log_info "Processed $LOOP_COUNT repos..." | ||
| 357 | IFS='|' read -r repo npub <<< "$key" | ||
| 358 | |||
| 359 | prod_cat="${PROD_CAT[$key]:-missing}" | ||
| 360 | archive_cat="${ARCHIVE_CAT[$key]:-missing}" | ||
| 361 | prod_status=$(cat_to_status "$prod_cat") | ||
| 362 | archive_status=$(cat_to_status "$archive_cat") | ||
| 363 | |||
| 364 | # Decision tree implementation | ||
| 365 | |||
| 366 | # 1. Is there a kind 5 deletion event? | ||
| 367 | if [[ -n "${DELETED[$key]:-}" ]]; then | ||
| 368 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 369 | READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | deleted by user") | ||
| 370 | COUNTS[ready_deleted]=$((COUNTS[ready_deleted] + 1)) | ||
| 371 | continue | ||
| 372 | fi | ||
| 373 | |||
| 374 | # 2. What is the prod status? | ||
| 375 | case "$prod_cat" in | ||
| 376 | missing) | ||
| 377 | # Not in prod | ||
| 378 | if [[ "$archive_cat" != "missing" ]]; then | ||
| 379 | # In archive but not in prod -> no action (archive-only) | ||
| 380 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 381 | READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive-only (not in prod)") | ||
| 382 | COUNTS[ready_archive_only]=$((COUNTS[ready_archive_only] + 1)) | ||
| 383 | elif [[ -n "${PURGATORY[$key]:-}" ]]; then | ||
| 384 | # Purgatory only, not in prod -> no action | ||
| 385 | context="purgatory-expired" | ||
| 386 | READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | purgatory-only (not in prod)") | ||
| 387 | COUNTS[ready_not_in_prod]=$((COUNTS[ready_not_in_prod] + 1)) | ||
| 388 | fi | ||
| 389 | # Otherwise skip (not a real repo - no data anywhere) | ||
| 390 | ;; | ||
| 391 | |||
| 392 | cat2) | ||
| 393 | # Empty in prod -> ALWAYS no action required | ||
| 394 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 395 | READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | empty in prod (user never pushed)") | ||
| 396 | COUNTS[ready_empty_prod]=$((COUNTS[ready_empty_prod] + 1)) | ||
| 397 | ;; | ||
| 398 | |||
| 399 | cat1) | ||
| 400 | # Complete in prod | ||
| 401 | if [[ "$archive_cat" == "cat1" ]]; then | ||
| 402 | # Complete in both -> no action | ||
| 403 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 404 | READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in both") | ||
| 405 | COUNTS[ready_complete_both]=$((COUNTS[ready_complete_both] + 1)) | ||
| 406 | else | ||
| 407 | # Complete in prod, missing/incomplete in archive | ||
| 408 | # Check for parse failure - if so, needs manual review | ||
| 409 | if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then | ||
| 410 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 411 | REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in prod with parse failure") | ||
| 412 | COUNTS[review_parse_failure]=$((COUNTS[review_parse_failure] + 1)) | ||
| 413 | else | ||
| 414 | # Check git ancestry to see if archive is actually ahead | ||
| 415 | git_relationship="${GIT_ANCESTRY[$key]:-unknown}" | ||
| 416 | |||
| 417 | if [[ "$git_relationship" == "archive-ahead" || "$git_relationship" == "in-sync" ]]; then | ||
| 418 | # Archive has newer/same git data - this is GOOD | ||
| 419 | # Archive's git data was authorized by a state event (GRASP enforced) | ||
| 420 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 421 | if [[ -n "$context" && "$context" != "none" ]]; then | ||
| 422 | context="$context, git=$git_relationship" | ||
| 423 | else | ||
| 424 | context="git=$git_relationship" | ||
| 425 | fi | ||
| 426 | READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive ahead (use archive data)") | ||
| 427 | COUNTS[ready_archive_ahead]=$((COUNTS[ready_archive_ahead] + 1)) | ||
| 428 | elif [[ "$git_relationship" == "diverged" ]]; then | ||
| 429 | # Git histories diverged - needs manual review | ||
| 430 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 431 | if [[ -n "$context" && "$context" != "none" ]]; then | ||
| 432 | context="$context, git=diverged" | ||
| 433 | else | ||
| 434 | context="git=diverged" | ||
| 435 | fi | ||
| 436 | REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | git histories diverged (manual review)") | ||
| 437 | COUNTS[review_diverged]=$((COUNTS[review_diverged] + 1)) | ||
| 438 | else | ||
| 439 | # prod-ahead, archive-only, prod-only, both-empty, or unknown | ||
| 440 | # These need resync - include purgatory context | ||
| 441 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 442 | if [[ "$git_relationship" != "unknown" ]]; then | ||
| 443 | if [[ -n "$context" && "$context" != "none" ]]; then | ||
| 444 | context="$context, git=$git_relationship" | ||
| 445 | else | ||
| 446 | context="git=$git_relationship" | ||
| 447 | fi | ||
| 448 | fi | ||
| 449 | if [[ "$archive_cat" == "missing" ]]; then | ||
| 450 | RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync to archive") | ||
| 451 | COUNTS[resync_missing_archive]=$((COUNTS[resync_missing_archive] + 1)) | ||
| 452 | else | ||
| 453 | RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync (archive incomplete)") | ||
| 454 | COUNTS[resync_incomplete_archive]=$((COUNTS[resync_incomplete_archive] + 1)) | ||
| 455 | fi | ||
| 456 | fi | ||
| 457 | fi | ||
| 458 | fi | ||
| 459 | ;; | ||
| 460 | |||
| 461 | cat3) | ||
| 462 | # Partial in prod -> ALWAYS manual investigation | ||
| 463 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 464 | REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | partial in prod (investigate git data)") | ||
| 465 | COUNTS[review_partial_prod]=$((COUNTS[review_partial_prod] + 1)) | ||
| 466 | ;; | ||
| 467 | |||
| 468 | cat4) | ||
| 469 | # No-match in prod -> ALWAYS manual investigation | ||
| 470 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 471 | REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | no-match in prod (git corruption)") | ||
| 472 | COUNTS[review_nomatch_prod]=$((COUNTS[review_nomatch_prod] + 1)) | ||
| 473 | ;; | ||
| 474 | esac | ||
| 475 | done | ||
| 476 | |||
| 477 | # ============================================================================ | ||
| 478 | # Phase 4: Write output files | ||
| 479 | # ============================================================================ | ||
| 480 | |||
| 481 | log_info "Writing output files..." | ||
| 482 | |||
| 483 | TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%S+00:00") | ||
| 484 | |||
| 485 | # Write ready-for-migration.txt | ||
| 486 | { | ||
| 487 | echo "# Ready for Migration - No action required" | ||
| 488 | echo "# Generated: $TIMESTAMP" | ||
| 489 | echo "# Format: repo | npub | prod_status | archive_status | context | reason" | ||
| 490 | echo "#" | ||
| 491 | for line in "${READY_LINES[@]}"; do | ||
| 492 | echo "$line" | ||
| 493 | done | ||
| 494 | } > "$READY_FILE" | ||
| 495 | |||
| 496 | # Write needs-resync.txt | ||
| 497 | { | ||
| 498 | echo "# Needs Re-sync - Action required" | ||
| 499 | echo "# Generated: $TIMESTAMP" | ||
| 500 | echo "# Format: repo | npub | prod_status | archive_status | context | action" | ||
| 501 | echo "#" | ||
| 502 | echo "# Context meanings:" | ||
| 503 | echo "# purgatory-expired = archive tried to sync but failed (30min timeout)" | ||
| 504 | echo "# none = archive never tried or announcement missing" | ||
| 505 | echo "#" | ||
| 506 | for line in "${RESYNC_LINES[@]}"; do | ||
| 507 | echo "$line" | ||
| 508 | done | ||
| 509 | } > "$RESYNC_FILE" | ||
| 510 | |||
| 511 | # Write manual-review.txt | ||
| 512 | { | ||
| 513 | echo "# Manual Review Required - Investigation needed" | ||
| 514 | echo "# Generated: $TIMESTAMP" | ||
| 515 | echo "# Format: repo | npub | prod_status | archive_status | context | reason" | ||
| 516 | echo "#" | ||
| 517 | for line in "${REVIEW_LINES[@]}"; do | ||
| 518 | echo "$line" | ||
| 519 | done | ||
| 520 | } > "$REVIEW_FILE" | ||
| 521 | |||
| 522 | # ============================================================================ | ||
| 523 | # Phase 5: Generate summary | ||
| 524 | # ============================================================================ | ||
| 525 | |||
| 526 | log_info "Generating summary..." | ||
| 527 | |||
| 528 | TOTAL_READY="${#READY_LINES[@]}" | ||
| 529 | TOTAL_RESYNC="${#RESYNC_LINES[@]}" | ||
| 530 | TOTAL_REVIEW="${#REVIEW_LINES[@]}" | ||
| 531 | TOTAL=$((TOTAL_READY + TOTAL_RESYNC + TOTAL_REVIEW)) | ||
| 532 | |||
| 533 | # Calculate percentages | ||
| 534 | if [[ $TOTAL -gt 0 ]]; then | ||
| 535 | PCT_READY=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_READY / $TOTAL) * 100}") | ||
| 536 | PCT_RESYNC=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_RESYNC / $TOTAL) * 100}") | ||
| 537 | PCT_REVIEW=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_REVIEW / $TOTAL) * 100}") | ||
| 538 | else | ||
| 539 | PCT_READY="0.0" | ||
| 540 | PCT_RESYNC="0.0" | ||
| 541 | PCT_REVIEW="0.0" | ||
| 542 | fi | ||
| 543 | |||
| 544 | { | ||
| 545 | echo "# Migration Classification Summary" | ||
| 546 | echo "Generated: $TIMESTAMP" | ||
| 547 | echo "Analysis Directory: $ANALYSIS_DIR" | ||
| 548 | echo "" | ||
| 549 | echo "## Overview" | ||
| 550 | echo "" | ||
| 551 | echo "| Category | Count | Percentage |" | ||
| 552 | echo "|----------|-------|------------|" | ||
| 553 | echo "| Ready for Migration | $TOTAL_READY | $PCT_READY% |" | ||
| 554 | echo "| Needs Re-sync | $TOTAL_RESYNC | $PCT_RESYNC% |" | ||
| 555 | echo "| Manual Review | $TOTAL_REVIEW | $PCT_REVIEW% |" | ||
| 556 | echo "| **Total** | **$TOTAL** | **100%** |" | ||
| 557 | echo "" | ||
| 558 | echo "## Tier 1: Ready for Migration ($TOTAL_READY repos)" | ||
| 559 | echo "" | ||
| 560 | echo "These repositories are ready for migration or don't need migration:" | ||
| 561 | echo "" | ||
| 562 | echo "| Reason | Count |" | ||
| 563 | echo "|--------|-------|" | ||
| 564 | echo "| complete in both prod and archive | ${COUNTS[ready_complete_both]} |" | ||
| 565 | echo "| archive ahead (has newer git data) | ${COUNTS[ready_archive_ahead]} |" | ||
| 566 | echo "| deleted by user | ${COUNTS[ready_deleted]} |" | ||
| 567 | echo "| empty in prod (user never pushed) | ${COUNTS[ready_empty_prod]} |" | ||
| 568 | echo "| archive-only (not in prod) | ${COUNTS[ready_archive_only]} |" | ||
| 569 | echo "| purgatory-only (not in prod) | ${COUNTS[ready_not_in_prod]} |" | ||
| 570 | echo "" | ||
| 571 | echo "## Tier 2: Needs Re-sync ($TOTAL_RESYNC repos)" | ||
| 572 | echo "" | ||
| 573 | echo "These repositories need re-sync to archive before migration:" | ||
| 574 | echo "" | ||
| 575 | echo "| Reason | Count | Action |" | ||
| 576 | echo "|--------|-------|--------|" | ||
| 577 | echo "| complete in prod, missing from archive | ${COUNTS[resync_missing_archive]} | trigger re-sync |" | ||
| 578 | echo "| complete in prod, incomplete in archive | ${COUNTS[resync_incomplete_archive]} | trigger re-sync |" | ||
| 579 | echo "" | ||
| 580 | echo "### Purgatory Context" | ||
| 581 | echo "" | ||
| 582 | echo "Repos in needs-resync.txt include purgatory context:" | ||
| 583 | echo "- **purgatory-expired**: Archive tried to sync but failed (30min timeout)" | ||
| 584 | echo "- **none**: Archive never tried or announcement missing" | ||
| 585 | echo "" | ||
| 586 | echo "## Tier 3: Manual Review ($TOTAL_REVIEW repos)" | ||
| 587 | echo "" | ||
| 588 | echo "These repositories require human investigation:" | ||
| 589 | echo "" | ||
| 590 | echo "| Reason | Count |" | ||
| 591 | echo "|--------|-------|" | ||
| 592 | echo "| partial in prod (cat3) | ${COUNTS[review_partial_prod]} |" | ||
| 593 | echo "| no-match in prod (cat4) | ${COUNTS[review_nomatch_prod]} |" | ||
| 594 | echo "| complete in prod with parse failure | ${COUNTS[review_parse_failure]} |" | ||
| 595 | echo "| git histories diverged | ${COUNTS[review_diverged]} |" | ||
| 596 | echo "" | ||
| 597 | echo "## Input Data Summary" | ||
| 598 | echo "" | ||
| 599 | echo "### Prod Categories" | ||
| 600 | echo "- Category 1 (complete): $(wc -l < "$PROD_DIR/category1-complete-match.txt")" | ||
| 601 | echo "- Category 2 (empty): $(wc -l < "$PROD_DIR/category2-empty-blank.txt")" | ||
| 602 | echo "- Category 3 (partial): $(wc -l < "$PROD_DIR/category3-partial-match.txt")" | ||
| 603 | echo "- Category 4 (no match): $(wc -l < "$PROD_DIR/category4-no-match.txt")" | ||
| 604 | echo "" | ||
| 605 | echo "### Archive Categories" | ||
| 606 | echo "- Category 1 (complete): $(wc -l < "$ARCHIVE_DIR/category1-complete-match.txt")" | ||
| 607 | echo "- Category 2 (empty): $(wc -l < "$ARCHIVE_DIR/category2-empty-blank.txt")" | ||
| 608 | echo "- Category 3 (partial): $(wc -l < "$ARCHIVE_DIR/category3-partial-match.txt")" | ||
| 609 | echo "- Category 4 (no match): $(wc -l < "$ARCHIVE_DIR/category4-no-match.txt")" | ||
| 610 | echo "" | ||
| 611 | echo "### Logs" | ||
| 612 | echo "- Parse failures: $(grep -c -v '^#' "$LOGS_DIR/parse-failures.txt" 2>/dev/null || echo 0)" | ||
| 613 | echo "- Purgatory expired: $(grep -c -v '^#' "$LOGS_DIR/purgatory-expired.txt" 2>/dev/null || echo 0)" | ||
| 614 | echo "" | ||
| 615 | echo "## Output Files" | ||
| 616 | echo "" | ||
| 617 | echo "- \`results/ready-for-migration.txt\` - $TOTAL_READY repos ready for migration" | ||
| 618 | echo "- \`results/needs-resync.txt\` - $TOTAL_RESYNC repos needing re-sync" | ||
| 619 | echo "- \`results/manual-review.txt\` - $TOTAL_REVIEW repos needing investigation" | ||
| 620 | echo "- \`results/summary.txt\` - This summary file" | ||
| 621 | echo "" | ||
| 622 | echo "## Recommended Next Steps" | ||
| 623 | echo "" | ||
| 624 | echo "1. **Review needs-resync.txt** - Trigger re-sync for these repos" | ||
| 625 | echo "2. **Review manual-review.txt** - Investigate unusual states" | ||
| 626 | echo "3. **Verify ready-for-migration.txt** - Spot-check a few repos" | ||
| 627 | echo "4. **Plan migration window** - Schedule cutover when action items resolved" | ||
| 628 | } > "$SUMMARY_FILE" | ||
| 629 | |||
| 630 | # ============================================================================ | ||
| 631 | # Phase 6: Print summary to console | ||
| 632 | # ============================================================================ | ||
| 633 | |||
| 634 | echo "" | ||
| 635 | log_success "Classification complete!" | ||
| 636 | echo "" | ||
| 637 | echo "=== Summary ===" | ||
| 638 | echo "Ready for Migration: $TOTAL_READY ($PCT_READY%)" | ||
| 639 | echo " - Complete in both: ${COUNTS[ready_complete_both]}" | ||
| 640 | echo " - Archive ahead: ${COUNTS[ready_archive_ahead]}" | ||
| 641 | echo " - Deleted by user: ${COUNTS[ready_deleted]}" | ||
| 642 | echo " - Empty in prod: ${COUNTS[ready_empty_prod]}" | ||
| 643 | echo " - Archive-only: ${COUNTS[ready_archive_only]}" | ||
| 644 | echo " - Purgatory-only: ${COUNTS[ready_not_in_prod]}" | ||
| 645 | echo "" | ||
| 646 | echo "Needs Re-sync: $TOTAL_RESYNC ($PCT_RESYNC%)" | ||
| 647 | echo " - Missing from archive: ${COUNTS[resync_missing_archive]}" | ||
| 648 | echo " - Incomplete in archive: ${COUNTS[resync_incomplete_archive]}" | ||
| 649 | echo "" | ||
| 650 | echo "Manual Review: $TOTAL_REVIEW ($PCT_REVIEW%)" | ||
| 651 | echo " - Partial in prod: ${COUNTS[review_partial_prod]}" | ||
| 652 | echo " - No-match in prod: ${COUNTS[review_nomatch_prod]}" | ||
| 653 | echo " - Parse failures: ${COUNTS[review_parse_failure]}" | ||
| 654 | echo " - Git diverged: ${COUNTS[review_diverged]}" | ||
| 655 | echo "" | ||
| 656 | echo "Total: $TOTAL repos" | ||
| 657 | echo "" | ||
| 658 | echo "Output files:" | ||
| 659 | echo " $READY_FILE" | ||
| 660 | echo " $RESYNC_FILE" | ||
| 661 | echo " $REVIEW_FILE" | ||
| 662 | echo " $SUMMARY_FILE" | ||
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/run-migration-analysis.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/run-migration-analysis.sh new file mode 100755 index 0000000..acc5e44 --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/run-migration-analysis.sh | |||
| @@ -0,0 +1,779 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | # | ||
| 3 | # run-migration-analysis.sh - Orchestrate the complete GRASP relay to ngit-grasp migration analysis | ||
| 4 | # | ||
| 5 | # This script runs all 5 phases of the migration analysis pipeline in sequence, | ||
| 6 | # with proper error handling, progress reporting, and timing information. | ||
| 7 | # | ||
| 8 | # QUICK START: | ||
| 9 | # # Basic usage (local analysis only - Phases 1, 3, 5) | ||
| 10 | # ./run-migration-analysis.sh --prod-relay wss://relay.ngit.dev --archive-relay wss://archive.relay.ngit.dev | ||
| 11 | # | ||
| 12 | # # Full analysis including git sync check (requires VPS access) | ||
| 13 | # ./run-migration-analysis.sh \ | ||
| 14 | # --prod-relay wss://relay.ngit.dev \ | ||
| 15 | # --archive-relay wss://archive.relay.ngit.dev \ | ||
| 16 | # --prod-git /var/lib/grasp-relay/git \ | ||
| 17 | # --archive-git /var/lib/ngit-grasp/git | ||
| 18 | # | ||
| 19 | # USAGE: | ||
| 20 | # ./run-migration-analysis.sh [options] | ||
| 21 | # | ||
| 22 | # REQUIRED OPTIONS: | ||
| 23 | # --prod-relay <url> Production relay WebSocket URL (e.g., wss://relay.ngit.dev) | ||
| 24 | # --archive-relay <url> Archive relay WebSocket URL (e.g., wss://archive.relay.ngit.dev) | ||
| 25 | # | ||
| 26 | # OPTIONAL OPTIONS: | ||
| 27 | # --prod-git <path> Git base directory for prod (enables Phase 2) | ||
| 28 | # --archive-git <path> Git base directory for archive (enables Phase 2) | ||
| 29 | # --service <name> Systemd service name for log extraction (enables Phase 4) | ||
| 30 | # --output <dir> Output directory (default: work/migration-analysis-YYYYMMDD-HHMM) | ||
| 31 | # --since <date> Start date for log extraction (default: 30 days ago) | ||
| 32 | # --until <date> End date for log extraction (default: now) | ||
| 33 | # | ||
| 34 | # PHASE CONTROL: | ||
| 35 | # --skip-phase-1 Skip event fetching (use existing data) | ||
| 36 | # --skip-phase-2 Skip git sync check (use existing data) | ||
| 37 | # --skip-phase-3 Skip categorization (use existing data) | ||
| 38 | # --skip-phase-4 Skip log extraction (use existing data) | ||
| 39 | # --skip-phase-5 Skip final classification | ||
| 40 | # --only-phase-N Run only phase N (1-5) | ||
| 41 | # --from-phase-N Start from phase N (skip earlier phases) | ||
| 42 | # | ||
| 43 | # OTHER OPTIONS: | ||
| 44 | # --dry-run Show what would be executed without running | ||
| 45 | # --continue-on-error Continue to next phase even if current phase fails | ||
| 46 | # --help Show this help message | ||
| 47 | # | ||
| 48 | # PHASES: | ||
| 49 | # Phase 1: Fetch events from both relays (~30s each, local) | ||
| 50 | # Phase 2: Check git sync status (~20 min each, requires VPS) | ||
| 51 | # Phase 3: Categorize and compare results (fast, local) | ||
| 52 | # Phase 4: Extract logs from systemd (requires VPS) | ||
| 53 | # Phase 5: Final classification (fast, local) | ||
| 54 | # | ||
| 55 | # EXAMPLES: | ||
| 56 | # # Dry run to see what would happen | ||
| 57 | # ./run-migration-analysis.sh --prod-relay wss://relay.ngit.dev --archive-relay wss://archive.relay.ngit.dev --dry-run | ||
| 58 | # | ||
| 59 | # # Run only Phase 1 (fetch events) | ||
| 60 | # ./run-migration-analysis.sh --prod-relay wss://relay.ngit.dev --archive-relay wss://archive.relay.ngit.dev --only-phase-1 | ||
| 61 | # | ||
| 62 | # # Resume from Phase 3 using existing Phase 1-2 data | ||
| 63 | # ./run-migration-analysis.sh --prod-relay wss://relay.ngit.dev --archive-relay wss://archive.relay.ngit.dev --from-phase-3 --output work/migration-analysis-20260122-1430 | ||
| 64 | # | ||
| 65 | # # Full analysis on VPS with all features | ||
| 66 | # ./run-migration-analysis.sh \ | ||
| 67 | # --prod-relay wss://relay.ngit.dev \ | ||
| 68 | # --archive-relay wss://archive.relay.ngit.dev \ | ||
| 69 | # --prod-git /var/lib/grasp-relay/git \ | ||
| 70 | # --archive-git /var/lib/ngit-grasp/git \ | ||
| 71 | # --service ngit-grasp.service | ||
| 72 | # | ||
| 73 | # SEE ALSO: | ||
| 74 | # docs/how-to/migrate-to-ngit-grasp.md - Full migration guide | ||
| 75 | # | ||
| 76 | |||
| 77 | set -euo pipefail | ||
| 78 | |||
| 79 | # Get script directory for finding other scripts | ||
| 80 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | ||
| 81 | |||
| 82 | # Colors for output (disabled if not a terminal) | ||
| 83 | if [[ -t 1 ]]; then | ||
| 84 | RED='\033[0;31m' | ||
| 85 | GREEN='\033[0;32m' | ||
| 86 | YELLOW='\033[0;33m' | ||
| 87 | BLUE='\033[0;34m' | ||
| 88 | CYAN='\033[0;36m' | ||
| 89 | BOLD='\033[1m' | ||
| 90 | NC='\033[0m' | ||
| 91 | else | ||
| 92 | RED='' | ||
| 93 | GREEN='' | ||
| 94 | YELLOW='' | ||
| 95 | BLUE='' | ||
| 96 | CYAN='' | ||
| 97 | BOLD='' | ||
| 98 | NC='' | ||
| 99 | fi | ||
| 100 | |||
| 101 | # Logging functions | ||
| 102 | log_header() { | ||
| 103 | echo "" | ||
| 104 | echo -e "${BOLD}${CYAN}════════════════════════════════════════════════════════════════${NC}" | ||
| 105 | echo -e "${BOLD}${CYAN} $*${NC}" | ||
| 106 | echo -e "${BOLD}${CYAN}════════════════════════════════════════════════════════════════${NC}" | ||
| 107 | echo "" | ||
| 108 | } | ||
| 109 | |||
| 110 | log_phase() { | ||
| 111 | echo "" | ||
| 112 | echo -e "${BOLD}${BLUE}┌──────────────────────────────────────────────────────────────┐${NC}" | ||
| 113 | echo -e "${BOLD}${BLUE}│ $*${NC}" | ||
| 114 | echo -e "${BOLD}${BLUE}└──────────────────────────────────────────────────────────────┘${NC}" | ||
| 115 | } | ||
| 116 | |||
| 117 | log_info() { | ||
| 118 | echo -e "${BLUE}[INFO]${NC} $*" >&2 | ||
| 119 | } | ||
| 120 | |||
| 121 | log_success() { | ||
| 122 | echo -e "${GREEN}[OK]${NC} $*" >&2 | ||
| 123 | } | ||
| 124 | |||
| 125 | log_warn() { | ||
| 126 | echo -e "${YELLOW}[WARN]${NC} $*" >&2 | ||
| 127 | } | ||
| 128 | |||
| 129 | log_error() { | ||
| 130 | echo -e "${RED}[ERROR]${NC} $*" >&2 | ||
| 131 | } | ||
| 132 | |||
| 133 | log_step() { | ||
| 134 | echo -e "${CYAN} →${NC} $*" >&2 | ||
| 135 | } | ||
| 136 | |||
| 137 | # Default values | ||
| 138 | PROD_RELAY="" | ||
| 139 | ARCHIVE_RELAY="" | ||
| 140 | PROD_GIT="" | ||
| 141 | ARCHIVE_GIT="" | ||
| 142 | SERVICE_NAME="" | ||
| 143 | OUTPUT_DIR="" | ||
| 144 | DRY_RUN=false | ||
| 145 | CONTINUE_ON_ERROR=false | ||
| 146 | LOG_SINCE="" | ||
| 147 | LOG_UNTIL="" | ||
| 148 | |||
| 149 | # Phase control | ||
| 150 | SKIP_PHASE_1=false | ||
| 151 | SKIP_PHASE_2=false | ||
| 152 | SKIP_PHASE_3=false | ||
| 153 | SKIP_PHASE_4=false | ||
| 154 | SKIP_PHASE_5=false | ||
| 155 | ONLY_PHASE="" | ||
| 156 | FROM_PHASE="" | ||
| 157 | |||
| 158 | # Timing | ||
| 159 | declare -A PHASE_TIMES | ||
| 160 | |||
| 161 | usage() { | ||
| 162 | head -73 "$0" | tail -n +3 | sed 's/^# //' | sed 's/^#//' | ||
| 163 | exit 0 | ||
| 164 | } | ||
| 165 | |||
| 166 | # Parse command line arguments | ||
| 167 | parse_args() { | ||
| 168 | while [[ $# -gt 0 ]]; do | ||
| 169 | case "$1" in | ||
| 170 | --prod-relay) | ||
| 171 | PROD_RELAY="$2" | ||
| 172 | shift 2 | ||
| 173 | ;; | ||
| 174 | --archive-relay) | ||
| 175 | ARCHIVE_RELAY="$2" | ||
| 176 | shift 2 | ||
| 177 | ;; | ||
| 178 | --prod-git) | ||
| 179 | PROD_GIT="$2" | ||
| 180 | shift 2 | ||
| 181 | ;; | ||
| 182 | --archive-git) | ||
| 183 | ARCHIVE_GIT="$2" | ||
| 184 | shift 2 | ||
| 185 | ;; | ||
| 186 | --service) | ||
| 187 | SERVICE_NAME="$2" | ||
| 188 | shift 2 | ||
| 189 | ;; | ||
| 190 | --output) | ||
| 191 | OUTPUT_DIR="$2" | ||
| 192 | shift 2 | ||
| 193 | ;; | ||
| 194 | --skip-phase-1) | ||
| 195 | SKIP_PHASE_1=true | ||
| 196 | shift | ||
| 197 | ;; | ||
| 198 | --skip-phase-2) | ||
| 199 | SKIP_PHASE_2=true | ||
| 200 | shift | ||
| 201 | ;; | ||
| 202 | --skip-phase-3) | ||
| 203 | SKIP_PHASE_3=true | ||
| 204 | shift | ||
| 205 | ;; | ||
| 206 | --skip-phase-4) | ||
| 207 | SKIP_PHASE_4=true | ||
| 208 | shift | ||
| 209 | ;; | ||
| 210 | --skip-phase-5) | ||
| 211 | SKIP_PHASE_5=true | ||
| 212 | shift | ||
| 213 | ;; | ||
| 214 | --only-phase-1|--only-phase-2|--only-phase-3|--only-phase-4|--only-phase-5) | ||
| 215 | ONLY_PHASE="${1#--only-phase-}" | ||
| 216 | shift | ||
| 217 | ;; | ||
| 218 | --from-phase-1|--from-phase-2|--from-phase-3|--from-phase-4|--from-phase-5) | ||
| 219 | FROM_PHASE="${1#--from-phase-}" | ||
| 220 | shift | ||
| 221 | ;; | ||
| 222 | --dry-run) | ||
| 223 | DRY_RUN=true | ||
| 224 | shift | ||
| 225 | ;; | ||
| 226 | --continue-on-error) | ||
| 227 | CONTINUE_ON_ERROR=true | ||
| 228 | shift | ||
| 229 | ;; | ||
| 230 | --since) | ||
| 231 | LOG_SINCE="$2" | ||
| 232 | shift 2 | ||
| 233 | ;; | ||
| 234 | --until) | ||
| 235 | LOG_UNTIL="$2" | ||
| 236 | shift 2 | ||
| 237 | ;; | ||
| 238 | --help|-h) | ||
| 239 | usage | ||
| 240 | ;; | ||
| 241 | *) | ||
| 242 | log_error "Unknown option: $1" | ||
| 243 | echo "Use --help for usage information." | ||
| 244 | exit 1 | ||
| 245 | ;; | ||
| 246 | esac | ||
| 247 | done | ||
| 248 | } | ||
| 249 | |||
| 250 | # Validate required arguments | ||
| 251 | validate_args() { | ||
| 252 | local errors=0 | ||
| 253 | |||
| 254 | if [[ -z "$PROD_RELAY" ]]; then | ||
| 255 | log_error "Missing required option: --prod-relay" | ||
| 256 | errors=1 | ||
| 257 | fi | ||
| 258 | |||
| 259 | if [[ -z "$ARCHIVE_RELAY" ]]; then | ||
| 260 | log_error "Missing required option: --archive-relay" | ||
| 261 | errors=1 | ||
| 262 | fi | ||
| 263 | |||
| 264 | # Validate relay URLs | ||
| 265 | if [[ -n "$PROD_RELAY" && ! "$PROD_RELAY" =~ ^wss?:// ]]; then | ||
| 266 | log_error "Invalid prod relay URL: $PROD_RELAY (must start with ws:// or wss://)" | ||
| 267 | errors=1 | ||
| 268 | fi | ||
| 269 | |||
| 270 | if [[ -n "$ARCHIVE_RELAY" && ! "$ARCHIVE_RELAY" =~ ^wss?:// ]]; then | ||
| 271 | log_error "Invalid archive relay URL: $ARCHIVE_RELAY (must start with ws:// or wss://)" | ||
| 272 | errors=1 | ||
| 273 | fi | ||
| 274 | |||
| 275 | # Validate git paths if provided | ||
| 276 | if [[ -n "$PROD_GIT" && ! -d "$PROD_GIT" ]]; then | ||
| 277 | log_warn "Prod git directory not found: $PROD_GIT" | ||
| 278 | log_warn "Phase 2 will fail unless running on VPS with access to this path." | ||
| 279 | fi | ||
| 280 | |||
| 281 | if [[ -n "$ARCHIVE_GIT" && ! -d "$ARCHIVE_GIT" ]]; then | ||
| 282 | log_warn "Archive git directory not found: $ARCHIVE_GIT" | ||
| 283 | log_warn "Phase 2 will fail unless running on VPS with access to this path." | ||
| 284 | fi | ||
| 285 | |||
| 286 | if [[ $errors -eq 1 ]]; then | ||
| 287 | echo "" | ||
| 288 | echo "Use --help for usage information." | ||
| 289 | exit 1 | ||
| 290 | fi | ||
| 291 | } | ||
| 292 | |||
| 293 | # Check prerequisites | ||
| 294 | check_prerequisites() { | ||
| 295 | local missing=0 | ||
| 296 | |||
| 297 | log_info "Checking prerequisites..." | ||
| 298 | |||
| 299 | # Required tools | ||
| 300 | for tool in git nak jq awk sort; do | ||
| 301 | if command -v "$tool" &> /dev/null; then | ||
| 302 | log_step "$tool: found" | ||
| 303 | else | ||
| 304 | log_error "$tool: NOT FOUND" | ||
| 305 | missing=1 | ||
| 306 | fi | ||
| 307 | done | ||
| 308 | |||
| 309 | # Optional tools | ||
| 310 | if command -v journalctl &> /dev/null; then | ||
| 311 | log_step "journalctl: found (Phase 4 available)" | ||
| 312 | else | ||
| 313 | log_step "journalctl: not found (Phase 4 will be skipped)" | ||
| 314 | SKIP_PHASE_4=true | ||
| 315 | fi | ||
| 316 | |||
| 317 | if [[ $missing -eq 1 ]]; then | ||
| 318 | log_error "Missing required tools. Install them and try again." | ||
| 319 | exit 1 | ||
| 320 | fi | ||
| 321 | |||
| 322 | # Check scripts exist | ||
| 323 | for script in 01-fetch-events.sh 10-check-git-sync.sh 20-categorize.sh 21-compare-relays.sh 22-compare-git-data.sh 30-extract-parse-failures.sh 31-extract-purgatory-expiry.sh 40-classify-actions.sh; do | ||
| 324 | if [[ ! -x "$SCRIPT_DIR/$script" ]]; then | ||
| 325 | log_error "Script not found or not executable: $SCRIPT_DIR/$script" | ||
| 326 | missing=1 | ||
| 327 | fi | ||
| 328 | done | ||
| 329 | |||
| 330 | if [[ $missing -eq 1 ]]; then | ||
| 331 | exit 1 | ||
| 332 | fi | ||
| 333 | |||
| 334 | log_success "All prerequisites satisfied" | ||
| 335 | } | ||
| 336 | |||
| 337 | # Determine which phases to run | ||
| 338 | determine_phases() { | ||
| 339 | # Handle --only-phase-N | ||
| 340 | if [[ -n "$ONLY_PHASE" ]]; then | ||
| 341 | for i in 1 2 3 4 5; do | ||
| 342 | if [[ "$i" != "$ONLY_PHASE" ]]; then | ||
| 343 | eval "SKIP_PHASE_$i=true" | ||
| 344 | fi | ||
| 345 | done | ||
| 346 | fi | ||
| 347 | |||
| 348 | # Handle --from-phase-N | ||
| 349 | if [[ -n "$FROM_PHASE" ]]; then | ||
| 350 | for i in 1 2 3 4 5; do | ||
| 351 | if [[ "$i" -lt "$FROM_PHASE" ]]; then | ||
| 352 | eval "SKIP_PHASE_$i=true" | ||
| 353 | fi | ||
| 354 | done | ||
| 355 | fi | ||
| 356 | |||
| 357 | # Auto-skip Phase 2 if git paths not provided | ||
| 358 | if [[ -z "$PROD_GIT" && -z "$ARCHIVE_GIT" ]]; then | ||
| 359 | if [[ "$SKIP_PHASE_2" != "true" ]]; then | ||
| 360 | log_warn "No git paths provided. Phase 2 (git sync check) will be skipped." | ||
| 361 | log_warn "Use --prod-git and --archive-git to enable Phase 2." | ||
| 362 | SKIP_PHASE_2=true | ||
| 363 | fi | ||
| 364 | fi | ||
| 365 | |||
| 366 | # Auto-skip Phase 4 if service not provided | ||
| 367 | if [[ -z "$SERVICE_NAME" ]]; then | ||
| 368 | if [[ "$SKIP_PHASE_4" != "true" ]]; then | ||
| 369 | log_warn "No service name provided. Phase 4 (log extraction) will be skipped." | ||
| 370 | log_warn "Use --service to enable Phase 4." | ||
| 371 | SKIP_PHASE_4=true | ||
| 372 | fi | ||
| 373 | fi | ||
| 374 | } | ||
| 375 | |||
| 376 | # Setup output directory | ||
| 377 | setup_output_dir() { | ||
| 378 | if [[ -z "$OUTPUT_DIR" ]]; then | ||
| 379 | OUTPUT_DIR="work/migration-analysis-$(date +%Y%m%d-%H%M)" | ||
| 380 | fi | ||
| 381 | |||
| 382 | log_info "Output directory: $OUTPUT_DIR" | ||
| 383 | |||
| 384 | if [[ "$DRY_RUN" == "true" ]]; then | ||
| 385 | log_info "[DRY RUN] Would create directory structure" | ||
| 386 | return | ||
| 387 | fi | ||
| 388 | |||
| 389 | mkdir -p "$OUTPUT_DIR"/{prod/raw,archive/raw,comparison,logs,results} | ||
| 390 | |||
| 391 | # Save configuration | ||
| 392 | cat > "$OUTPUT_DIR/config.txt" << EOF | ||
| 393 | # Migration Analysis Configuration | ||
| 394 | # Generated: $(date -Iseconds) | ||
| 395 | |||
| 396 | PROD_RELAY=$PROD_RELAY | ||
| 397 | ARCHIVE_RELAY=$ARCHIVE_RELAY | ||
| 398 | PROD_GIT=$PROD_GIT | ||
| 399 | ARCHIVE_GIT=$ARCHIVE_GIT | ||
| 400 | SERVICE_NAME=$SERVICE_NAME | ||
| 401 | OUTPUT_DIR=$OUTPUT_DIR | ||
| 402 | EOF | ||
| 403 | |||
| 404 | log_success "Created output directory structure" | ||
| 405 | } | ||
| 406 | |||
| 407 | # Run a phase with timing and error handling | ||
| 408 | run_phase() { | ||
| 409 | local phase_num="$1" | ||
| 410 | local phase_name="$2" | ||
| 411 | shift 2 | ||
| 412 | local cmd=("$@") | ||
| 413 | |||
| 414 | local skip_var="SKIP_PHASE_$phase_num" | ||
| 415 | if [[ "${!skip_var}" == "true" ]]; then | ||
| 416 | log_phase "Phase $phase_num: $phase_name [SKIPPED]" | ||
| 417 | return 0 | ||
| 418 | fi | ||
| 419 | |||
| 420 | log_phase "Phase $phase_num: $phase_name" | ||
| 421 | |||
| 422 | if [[ "$DRY_RUN" == "true" ]]; then | ||
| 423 | log_info "[DRY RUN] Would execute:" | ||
| 424 | for c in "${cmd[@]}"; do | ||
| 425 | echo " $c" | ||
| 426 | done | ||
| 427 | return 0 | ||
| 428 | fi | ||
| 429 | |||
| 430 | local start_time | ||
| 431 | start_time=$(date +%s) | ||
| 432 | |||
| 433 | local exit_code=0 | ||
| 434 | |||
| 435 | # Execute the command(s) | ||
| 436 | for c in "${cmd[@]}"; do | ||
| 437 | log_step "Running: $c" | ||
| 438 | if ! eval "$c"; then | ||
| 439 | exit_code=1 | ||
| 440 | if [[ "$CONTINUE_ON_ERROR" == "true" ]]; then | ||
| 441 | log_warn "Command failed, continuing due to --continue-on-error" | ||
| 442 | else | ||
| 443 | log_error "Command failed" | ||
| 444 | break | ||
| 445 | fi | ||
| 446 | fi | ||
| 447 | done | ||
| 448 | |||
| 449 | local end_time | ||
| 450 | end_time=$(date +%s) | ||
| 451 | local duration=$((end_time - start_time)) | ||
| 452 | PHASE_TIMES[$phase_num]=$duration | ||
| 453 | |||
| 454 | if [[ $exit_code -eq 0 ]]; then | ||
| 455 | log_success "Phase $phase_num completed in ${duration}s" | ||
| 456 | else | ||
| 457 | log_error "Phase $phase_num failed after ${duration}s" | ||
| 458 | if [[ "$CONTINUE_ON_ERROR" != "true" ]]; then | ||
| 459 | return 1 | ||
| 460 | fi | ||
| 461 | fi | ||
| 462 | |||
| 463 | return $exit_code | ||
| 464 | } | ||
| 465 | |||
| 466 | # Phase 1: Fetch events | ||
| 467 | run_phase_1() { | ||
| 468 | local cmds=() | ||
| 469 | |||
| 470 | # Fetch from prod relay | ||
| 471 | cmds+=("'$SCRIPT_DIR/01-fetch-events.sh' '$PROD_RELAY' '$OUTPUT_DIR/prod'") | ||
| 472 | |||
| 473 | # Fetch from archive relay | ||
| 474 | cmds+=("'$SCRIPT_DIR/01-fetch-events.sh' '$ARCHIVE_RELAY' '$OUTPUT_DIR/archive'") | ||
| 475 | |||
| 476 | run_phase 1 "Fetch Events (~30s each)" "${cmds[@]}" | ||
| 477 | } | ||
| 478 | |||
| 479 | # Phase 2: Git sync check | ||
| 480 | run_phase_2() { | ||
| 481 | local cmds=() | ||
| 482 | |||
| 483 | if [[ -n "$PROD_GIT" ]]; then | ||
| 484 | cmds+=("'$SCRIPT_DIR/10-check-git-sync.sh' '$OUTPUT_DIR/prod/raw/state-events.json' '$PROD_GIT' '$OUTPUT_DIR/prod' --categorize") | ||
| 485 | else | ||
| 486 | log_warn "Skipping prod git sync check (no --prod-git provided)" | ||
| 487 | fi | ||
| 488 | |||
| 489 | if [[ -n "$ARCHIVE_GIT" ]]; then | ||
| 490 | cmds+=("'$SCRIPT_DIR/10-check-git-sync.sh' '$OUTPUT_DIR/archive/raw/state-events.json' '$ARCHIVE_GIT' '$OUTPUT_DIR/archive' --categorize") | ||
| 491 | else | ||
| 492 | log_warn "Skipping archive git sync check (no --archive-git provided)" | ||
| 493 | fi | ||
| 494 | |||
| 495 | if [[ ${#cmds[@]} -eq 0 ]]; then | ||
| 496 | log_warn "No git paths provided, skipping Phase 2" | ||
| 497 | return 0 | ||
| 498 | fi | ||
| 499 | |||
| 500 | run_phase 2 "Git Sync Check (~20 min each)" "${cmds[@]}" | ||
| 501 | } | ||
| 502 | |||
| 503 | # Phase 3: Categorize and compare | ||
| 504 | run_phase_3() { | ||
| 505 | local cmds=() | ||
| 506 | |||
| 507 | # Check if we have git-sync-status.tsv files (from Phase 2) | ||
| 508 | # If not, we can't run categorization | ||
| 509 | local has_prod_sync=false | ||
| 510 | local has_archive_sync=false | ||
| 511 | |||
| 512 | if [[ -f "$OUTPUT_DIR/prod/git-sync-status.tsv" ]]; then | ||
| 513 | has_prod_sync=true | ||
| 514 | fi | ||
| 515 | |||
| 516 | if [[ -f "$OUTPUT_DIR/archive/git-sync-status.tsv" ]]; then | ||
| 517 | has_archive_sync=true | ||
| 518 | fi | ||
| 519 | |||
| 520 | # Run categorization if we have sync data but no category files | ||
| 521 | if [[ "$has_prod_sync" == "true" && ! -f "$OUTPUT_DIR/prod/category1-complete-match.txt" ]]; then | ||
| 522 | cmds+=("'$SCRIPT_DIR/20-categorize.sh' '$OUTPUT_DIR/prod/git-sync-status.tsv' '$OUTPUT_DIR/prod'") | ||
| 523 | fi | ||
| 524 | |||
| 525 | if [[ "$has_archive_sync" == "true" && ! -f "$OUTPUT_DIR/archive/category1-complete-match.txt" ]]; then | ||
| 526 | cmds+=("'$SCRIPT_DIR/20-categorize.sh' '$OUTPUT_DIR/archive/git-sync-status.tsv' '$OUTPUT_DIR/archive'") | ||
| 527 | fi | ||
| 528 | |||
| 529 | # Run comparison if we have category files | ||
| 530 | if [[ -f "$OUTPUT_DIR/prod/category1-complete-match.txt" && -f "$OUTPUT_DIR/archive/category1-complete-match.txt" ]]; then | ||
| 531 | cmds+=("'$SCRIPT_DIR/21-compare-relays.sh' '$OUTPUT_DIR/prod' '$OUTPUT_DIR/archive' '$OUTPUT_DIR/comparison'") | ||
| 532 | else | ||
| 533 | log_warn "Missing category files for comparison." | ||
| 534 | log_warn "Phase 2 must complete successfully before Phase 3 can compare relays." | ||
| 535 | |||
| 536 | # Create placeholder comparison files if they don't exist | ||
| 537 | if [[ "$DRY_RUN" != "true" ]]; then | ||
| 538 | mkdir -p "$OUTPUT_DIR/comparison" | ||
| 539 | for f in complete-in-both.txt complete-prod-missing-archive.txt complete-prod-incomplete-archive.txt incomplete-in-both.txt in-archive-not-prod.txt; do | ||
| 540 | if [[ ! -f "$OUTPUT_DIR/comparison/$f" ]]; then | ||
| 541 | echo "# Placeholder - Phase 2 data not available" > "$OUTPUT_DIR/comparison/$f" | ||
| 542 | fi | ||
| 543 | done | ||
| 544 | echo "# Comparison not available - Phase 2 data missing" > "$OUTPUT_DIR/comparison/summary.txt" | ||
| 545 | fi | ||
| 546 | fi | ||
| 547 | |||
| 548 | if [[ ${#cmds[@]} -eq 0 ]]; then | ||
| 549 | log_warn "No categorization or comparison needed (already done or missing input)" | ||
| 550 | return 0 | ||
| 551 | fi | ||
| 552 | |||
| 553 | run_phase 3 "Categorize & Compare (fast)" "${cmds[@]}" | ||
| 554 | |||
| 555 | # Phase 3c: Compare git data between relays (requires git paths) | ||
| 556 | # This determines if archive is ahead of prod for repos with mismatched state | ||
| 557 | if [[ -n "$PROD_GIT" && -n "$ARCHIVE_GIT" ]]; then | ||
| 558 | # Build list of repos to compare: those where prod=complete but archive is not | ||
| 559 | local repos_to_compare="$OUTPUT_DIR/comparison/complete-prod-incomplete-archive.txt" | ||
| 560 | if [[ -f "$repos_to_compare" ]] && [[ ! -f "$OUTPUT_DIR/comparison/git-ancestry.tsv" ]]; then | ||
| 561 | log_info "Running git ancestry comparison (Phase 3c)..." | ||
| 562 | run_phase 3 "Git Ancestry Comparison" "'$SCRIPT_DIR/22-compare-git-data.sh' '$PROD_GIT' '$ARCHIVE_GIT' '$repos_to_compare' '$OUTPUT_DIR/comparison'" | ||
| 563 | fi | ||
| 564 | else | ||
| 565 | log_warn "Git paths not provided - skipping git ancestry comparison" | ||
| 566 | log_warn "Without git comparison, repos where archive is ahead will be incorrectly flagged as needing re-sync" | ||
| 567 | fi | ||
| 568 | } | ||
| 569 | |||
| 570 | # Phase 4: Extract logs | ||
| 571 | run_phase_4() { | ||
| 572 | if [[ -z "$SERVICE_NAME" ]]; then | ||
| 573 | log_warn "No service name provided, skipping Phase 4" | ||
| 574 | return 0 | ||
| 575 | fi | ||
| 576 | |||
| 577 | # Validate service name before running Phase 4 | ||
| 578 | # Structured logging only exists in ngit-grasp, not ngit-relay | ||
| 579 | if [[ "$SERVICE_NAME" == *"ngit-relay"* ]]; then | ||
| 580 | log_error "SERVICE_NAME appears to be ngit-relay: $SERVICE_NAME" | ||
| 581 | log_error "" | ||
| 582 | log_error "Phase 4 requires an ngit-grasp service with structured logging." | ||
| 583 | log_error "Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) only exists" | ||
| 584 | log_error "in ngit-grasp services, NOT in ngit-relay services." | ||
| 585 | log_error "" | ||
| 586 | log_error "Please update --service to use the ngit-grasp archive service." | ||
| 587 | log_error "" | ||
| 588 | log_error "To find the correct service name:" | ||
| 589 | log_error " systemctl list-units 'ngit-grasp*' --all" | ||
| 590 | log_error "" | ||
| 591 | log_error "Common ngit-grasp service names:" | ||
| 592 | log_error " - ngit-grasp.service" | ||
| 593 | log_error " - ngit-grasp-relay-ngit-dev.service (NixOS multi-instance)" | ||
| 594 | log_error " - ngit-grasp-archive.service" | ||
| 595 | return 1 | ||
| 596 | fi | ||
| 597 | |||
| 598 | # Warn if service name doesn't look like ngit-grasp | ||
| 599 | if [[ "$SERVICE_NAME" != *"ngit-grasp"* && "$SERVICE_NAME" != *"grasp"* ]]; then | ||
| 600 | log_warn "SERVICE_NAME doesn't contain 'ngit-grasp': $SERVICE_NAME" | ||
| 601 | log_warn "Structured logging only exists in ngit-grasp services." | ||
| 602 | log_warn "If this is not an ngit-grasp service, Phase 4 will find no logs." | ||
| 603 | fi | ||
| 604 | |||
| 605 | local cmds=() | ||
| 606 | |||
| 607 | # Build log extraction options | ||
| 608 | local log_opts="" | ||
| 609 | if [[ -n "$LOG_SINCE" ]]; then | ||
| 610 | log_opts="$log_opts --since '$LOG_SINCE'" | ||
| 611 | fi | ||
| 612 | if [[ -n "$LOG_UNTIL" ]]; then | ||
| 613 | log_opts="$log_opts --until '$LOG_UNTIL'" | ||
| 614 | fi | ||
| 615 | |||
| 616 | cmds+=("'$SCRIPT_DIR/30-extract-parse-failures.sh' '$SERVICE_NAME' '$OUTPUT_DIR/logs' $log_opts") | ||
| 617 | cmds+=("'$SCRIPT_DIR/31-extract-purgatory-expiry.sh' '$SERVICE_NAME' '$OUTPUT_DIR/logs' $log_opts") | ||
| 618 | |||
| 619 | run_phase 4 "Extract Logs (VPS required)" "${cmds[@]}" | ||
| 620 | } | ||
| 621 | |||
| 622 | # Phase 5: Final classification | ||
| 623 | run_phase_5() { | ||
| 624 | # Check if we have the minimum required files | ||
| 625 | local can_run=true | ||
| 626 | |||
| 627 | if [[ ! -d "$OUTPUT_DIR/prod" ]]; then | ||
| 628 | log_warn "Missing prod directory" | ||
| 629 | can_run=false | ||
| 630 | fi | ||
| 631 | |||
| 632 | if [[ ! -d "$OUTPUT_DIR/archive" ]]; then | ||
| 633 | log_warn "Missing archive directory" | ||
| 634 | can_run=false | ||
| 635 | fi | ||
| 636 | |||
| 637 | if [[ ! -d "$OUTPUT_DIR/comparison" ]]; then | ||
| 638 | log_warn "Missing comparison directory" | ||
| 639 | can_run=false | ||
| 640 | fi | ||
| 641 | |||
| 642 | # Create logs directory with empty files if missing | ||
| 643 | if [[ "$DRY_RUN" != "true" ]]; then | ||
| 644 | mkdir -p "$OUTPUT_DIR/logs" | ||
| 645 | for f in parse-failures.txt purgatory-expired.txt; do | ||
| 646 | if [[ ! -f "$OUTPUT_DIR/logs/$f" ]]; then | ||
| 647 | echo "# No data - Phase 4 not run" > "$OUTPUT_DIR/logs/$f" | ||
| 648 | fi | ||
| 649 | done | ||
| 650 | fi | ||
| 651 | |||
| 652 | if [[ "$can_run" == "false" ]]; then | ||
| 653 | log_error "Cannot run Phase 5 - missing required input directories" | ||
| 654 | return 1 | ||
| 655 | fi | ||
| 656 | |||
| 657 | run_phase 5 "Final Classification (fast)" "'$SCRIPT_DIR/40-classify-actions.sh' '$OUTPUT_DIR'" | ||
| 658 | } | ||
| 659 | |||
| 660 | # Display summary | ||
| 661 | display_summary() { | ||
| 662 | log_header "Migration Analysis Complete" | ||
| 663 | |||
| 664 | echo "Output Directory: $OUTPUT_DIR" | ||
| 665 | echo "" | ||
| 666 | |||
| 667 | # Phase timing summary | ||
| 668 | echo "Phase Timing:" | ||
| 669 | local total_time=0 | ||
| 670 | for phase in 1 2 3 4 5; do | ||
| 671 | local skip_var="SKIP_PHASE_$phase" | ||
| 672 | if [[ "${!skip_var}" == "true" ]]; then | ||
| 673 | echo " Phase $phase: SKIPPED" | ||
| 674 | elif [[ -n "${PHASE_TIMES[$phase]:-}" ]]; then | ||
| 675 | local t="${PHASE_TIMES[$phase]}" | ||
| 676 | echo " Phase $phase: ${t}s" | ||
| 677 | total_time=$((total_time + t)) | ||
| 678 | else | ||
| 679 | echo " Phase $phase: N/A" | ||
| 680 | fi | ||
| 681 | done | ||
| 682 | echo " ─────────────" | ||
| 683 | echo " Total: ${total_time}s" | ||
| 684 | echo "" | ||
| 685 | |||
| 686 | # Results summary | ||
| 687 | if [[ -f "$OUTPUT_DIR/results/summary.txt" ]]; then | ||
| 688 | echo "Results Summary:" | ||
| 689 | echo "" | ||
| 690 | # Extract key metrics from summary | ||
| 691 | if grep -q "No Action Required" "$OUTPUT_DIR/results/summary.txt"; then | ||
| 692 | grep -A1 "No Action Required" "$OUTPUT_DIR/results/summary.txt" | head -2 | ||
| 693 | fi | ||
| 694 | if grep -q "Action Required" "$OUTPUT_DIR/results/summary.txt"; then | ||
| 695 | grep -A1 "Action Required" "$OUTPUT_DIR/results/summary.txt" | head -2 | ||
| 696 | fi | ||
| 697 | if grep -q "Manual Investigation" "$OUTPUT_DIR/results/summary.txt"; then | ||
| 698 | grep -A1 "Manual Investigation" "$OUTPUT_DIR/results/summary.txt" | head -2 | ||
| 699 | fi | ||
| 700 | echo "" | ||
| 701 | fi | ||
| 702 | |||
| 703 | # Output files | ||
| 704 | echo "Output Files:" | ||
| 705 | echo " $OUTPUT_DIR/results/no-action-required.txt" | ||
| 706 | echo " $OUTPUT_DIR/results/action-required.txt" | ||
| 707 | echo " $OUTPUT_DIR/results/manual-investigation.txt" | ||
| 708 | echo " $OUTPUT_DIR/results/summary.txt" | ||
| 709 | echo "" | ||
| 710 | |||
| 711 | # Next steps | ||
| 712 | echo "Next Steps:" | ||
| 713 | echo " 1. Review results/summary.txt for overview" | ||
| 714 | echo " 2. Address items in results/action-required.txt" | ||
| 715 | echo " 3. Investigate items in results/manual-investigation.txt" | ||
| 716 | echo " 4. Plan migration window when action items are resolved" | ||
| 717 | echo "" | ||
| 718 | } | ||
| 719 | |||
| 720 | # Main | ||
| 721 | main() { | ||
| 722 | parse_args "$@" | ||
| 723 | |||
| 724 | log_header "GRASP Relay to ngit-grasp Migration Analysis" | ||
| 725 | |||
| 726 | validate_args | ||
| 727 | check_prerequisites | ||
| 728 | determine_phases | ||
| 729 | setup_output_dir | ||
| 730 | |||
| 731 | # Show configuration | ||
| 732 | log_info "Configuration:" | ||
| 733 | log_step "Prod relay: $PROD_RELAY" | ||
| 734 | log_step "Archive relay: $ARCHIVE_RELAY" | ||
| 735 | [[ -n "$PROD_GIT" ]] && log_step "Prod git: $PROD_GIT" | ||
| 736 | [[ -n "$ARCHIVE_GIT" ]] && log_step "Archive git: $ARCHIVE_GIT" | ||
| 737 | [[ -n "$SERVICE_NAME" ]] && log_step "Service: $SERVICE_NAME" | ||
| 738 | log_step "Output: $OUTPUT_DIR" | ||
| 739 | echo "" | ||
| 740 | |||
| 741 | # Show phase plan | ||
| 742 | log_info "Phase Plan:" | ||
| 743 | for phase in 1 2 3 4 5; do | ||
| 744 | local skip_var="SKIP_PHASE_$phase" | ||
| 745 | if [[ "${!skip_var}" == "true" ]]; then | ||
| 746 | log_step "Phase $phase: SKIP" | ||
| 747 | else | ||
| 748 | log_step "Phase $phase: RUN" | ||
| 749 | fi | ||
| 750 | done | ||
| 751 | echo "" | ||
| 752 | |||
| 753 | if [[ "$DRY_RUN" == "true" ]]; then | ||
| 754 | log_warn "DRY RUN MODE - No changes will be made" | ||
| 755 | echo "" | ||
| 756 | fi | ||
| 757 | |||
| 758 | # Run phases | ||
| 759 | local overall_exit=0 | ||
| 760 | |||
| 761 | run_phase_1 || overall_exit=1 | ||
| 762 | run_phase_2 || overall_exit=1 | ||
| 763 | run_phase_3 || overall_exit=1 | ||
| 764 | run_phase_4 || overall_exit=1 | ||
| 765 | run_phase_5 || overall_exit=1 | ||
| 766 | |||
| 767 | # Display summary | ||
| 768 | if [[ "$DRY_RUN" != "true" ]]; then | ||
| 769 | display_summary | ||
| 770 | fi | ||
| 771 | |||
| 772 | if [[ $overall_exit -ne 0 ]]; then | ||
| 773 | log_warn "Some phases failed. Review output for details." | ||
| 774 | fi | ||
| 775 | |||
| 776 | exit $overall_exit | ||
| 777 | } | ||
| 778 | |||
| 779 | main "$@" | ||
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/validate-service.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/validate-service.sh new file mode 100755 index 0000000..6988af3 --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/validate-service.sh | |||
| @@ -0,0 +1,151 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | # | ||
| 3 | # validate-service.sh - Validate service name for structured logging | ||
| 4 | # | ||
| 5 | # This helper script validates that a service name is appropriate for | ||
| 6 | # Phase 4 log extraction. Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) | ||
| 7 | # only exists in ngit-grasp services, NOT in ngit-relay services. | ||
| 8 | # | ||
| 9 | # USAGE: | ||
| 10 | # Source this script and call the validation function: | ||
| 11 | # | ||
| 12 | # source validate-service.sh | ||
| 13 | # validate_service_for_structured_logging "$SERVICE_NAME" || exit 1 | ||
| 14 | # | ||
| 15 | # BACKGROUND: | ||
| 16 | # Phase 4 of the migration analysis extracts structured log entries from | ||
| 17 | # journald. These log entries only exist in ngit-grasp services. If you | ||
| 18 | # accidentally specify an ngit-relay service, Phase 4 will find no logs | ||
| 19 | # and produce empty results. | ||
| 20 | # | ||
| 21 | # This validation prevents that common mistake by: | ||
| 22 | # 1. Checking if the service name contains "ngit-relay" (error) | ||
| 23 | # 2. Warning if the service name doesn't contain "ngit-grasp" | ||
| 24 | # 3. Optionally checking if structured logs actually exist | ||
| 25 | # | ||
| 26 | # SEE ALSO: | ||
| 27 | # docs/how-to/migrate-to-ngit-grasp.md - Full migration guide | ||
| 28 | # 30-extract-parse-failures.sh - Uses this validation | ||
| 29 | # 31-extract-purgatory-expiry.sh - Uses this validation | ||
| 30 | # | ||
| 31 | |||
| 32 | # Colors for output (disabled if not a terminal) | ||
| 33 | if [[ -t 1 ]]; then | ||
| 34 | _VS_RED='\033[0;31m' | ||
| 35 | _VS_YELLOW='\033[0;33m' | ||
| 36 | _VS_NC='\033[0m' | ||
| 37 | else | ||
| 38 | _VS_RED='' | ||
| 39 | _VS_YELLOW='' | ||
| 40 | _VS_NC='' | ||
| 41 | fi | ||
| 42 | |||
| 43 | # Validates that the service name is appropriate for structured logging | ||
| 44 | # | ||
| 45 | # Arguments: | ||
| 46 | # $1 - service_name: The systemd service name to validate | ||
| 47 | # $2 - check_logs: Whether to check if logs actually exist (default: "true") | ||
| 48 | # $3 - interactive: Whether to prompt for confirmation (default: "true") | ||
| 49 | # | ||
| 50 | # Returns: | ||
| 51 | # 0 - Service is valid for structured logging | ||
| 52 | # 1 - Service is invalid or user declined to continue | ||
| 53 | # | ||
| 54 | # Example: | ||
| 55 | # validate_service_for_structured_logging "ngit-grasp.service" || exit 1 | ||
| 56 | # validate_service_for_structured_logging "ngit-grasp.service" "false" # Skip log check | ||
| 57 | # validate_service_for_structured_logging "ngit-grasp.service" "true" "false" # Non-interactive | ||
| 58 | # | ||
| 59 | validate_service_for_structured_logging() { | ||
| 60 | local service_name="$1" | ||
| 61 | local check_logs="${2:-true}" | ||
| 62 | local interactive="${3:-true}" | ||
| 63 | |||
| 64 | # Check if service name looks like ngit-relay (ERROR - wrong service type) | ||
| 65 | if [[ "$service_name" == *"ngit-relay"* ]]; then | ||
| 66 | echo -e "${_VS_RED}ERROR: Service name appears to be ngit-relay: $service_name${_VS_NC}" >&2 | ||
| 67 | echo "" >&2 | ||
| 68 | echo "Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) only exists in" >&2 | ||
| 69 | echo "ngit-grasp services, NOT in ngit-relay services." >&2 | ||
| 70 | echo "" >&2 | ||
| 71 | echo "Please use the ngit-grasp archive service instead." >&2 | ||
| 72 | echo "" >&2 | ||
| 73 | echo "To find the correct service name:" >&2 | ||
| 74 | echo " systemctl list-units 'ngit-grasp*' --all" >&2 | ||
| 75 | echo "" >&2 | ||
| 76 | echo "Common ngit-grasp service names:" >&2 | ||
| 77 | echo " - ngit-grasp.service" >&2 | ||
| 78 | echo " - ngit-grasp-relay-ngit-dev.service (NixOS multi-instance)" >&2 | ||
| 79 | echo " - ngit-grasp-archive.service" >&2 | ||
| 80 | return 1 | ||
| 81 | fi | ||
| 82 | |||
| 83 | # Check if service name looks like ngit-grasp (WARNING if not) | ||
| 84 | if [[ "$service_name" != *"ngit-grasp"* && "$service_name" != *"grasp"* ]]; then | ||
| 85 | echo -e "${_VS_YELLOW}WARNING: Service name doesn't contain 'ngit-grasp': $service_name${_VS_NC}" >&2 | ||
| 86 | echo "" >&2 | ||
| 87 | echo "Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) only exists in" >&2 | ||
| 88 | echo "ngit-grasp services." >&2 | ||
| 89 | echo "" >&2 | ||
| 90 | |||
| 91 | if [[ "$interactive" == "true" ]]; then | ||
| 92 | read -p "Continue anyway? (y/N) " -n 1 -r | ||
| 93 | echo | ||
| 94 | if [[ ! $REPLY =~ ^[Yy]$ ]]; then | ||
| 95 | return 1 | ||
| 96 | fi | ||
| 97 | else | ||
| 98 | echo "Non-interactive mode: proceeding despite warning" >&2 | ||
| 99 | fi | ||
| 100 | fi | ||
| 101 | |||
| 102 | # Optionally check if structured logs actually exist | ||
| 103 | if [[ "$check_logs" == "true" ]]; then | ||
| 104 | # Check if journalctl is available | ||
| 105 | if ! command -v journalctl &> /dev/null; then | ||
| 106 | echo -e "${_VS_YELLOW}WARNING: journalctl not available, cannot verify logs exist${_VS_NC}" >&2 | ||
| 107 | return 0 | ||
| 108 | fi | ||
| 109 | |||
| 110 | # Check for structured log entries | ||
| 111 | # IMPORTANT: Use --no-pager to prevent hanging when run non-interactively (e.g., via SSH) | ||
| 112 | local has_parse_fail has_purgatory | ||
| 113 | has_parse_fail=$(journalctl --no-pager -u "$service_name" --since "7 days ago" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0") | ||
| 114 | has_purgatory=$(journalctl --no-pager -u "$service_name" --since "7 days ago" 2>/dev/null | grep -c '\[PURGATORY_EXPIRED\]' || echo "0") | ||
| 115 | |||
| 116 | # Strip any non-numeric characters (grep -c can have trailing whitespace) | ||
| 117 | has_parse_fail="${has_parse_fail//[^0-9]/}" | ||
| 118 | has_purgatory="${has_purgatory//[^0-9]/}" | ||
| 119 | has_parse_fail="${has_parse_fail:-0}" | ||
| 120 | has_purgatory="${has_purgatory:-0}" | ||
| 121 | |||
| 122 | if [[ "$has_parse_fail" -eq 0 && "$has_purgatory" -eq 0 ]]; then | ||
| 123 | echo -e "${_VS_YELLOW}WARNING: No structured logs found in $service_name (last 7 days)${_VS_NC}" >&2 | ||
| 124 | echo "" >&2 | ||
| 125 | echo "This may indicate:" >&2 | ||
| 126 | echo " 1. Wrong service (should be ngit-grasp archive service, not ngit-relay)" >&2 | ||
| 127 | echo " 2. Structured logging not yet deployed to this ngit-grasp instance" >&2 | ||
| 128 | echo " 3. No parse failures or purgatory expiry events in the time window" >&2 | ||
| 129 | echo "" >&2 | ||
| 130 | echo "To verify you have the right service:" >&2 | ||
| 131 | echo " systemctl list-units 'ngit-grasp*' --all" >&2 | ||
| 132 | echo " journalctl -u <service> | grep -E '\\[PARSE_FAIL\\]|\\[PURGATORY_EXPIRED\\]' | head -5" >&2 | ||
| 133 | echo "" >&2 | ||
| 134 | |||
| 135 | if [[ "$interactive" == "true" ]]; then | ||
| 136 | read -p "Continue anyway? (y/N) " -n 1 -r | ||
| 137 | echo | ||
| 138 | if [[ ! $REPLY =~ ^[Yy]$ ]]; then | ||
| 139 | return 1 | ||
| 140 | fi | ||
| 141 | else | ||
| 142 | echo "Non-interactive mode: proceeding despite warning" >&2 | ||
| 143 | fi | ||
| 144 | fi | ||
| 145 | fi | ||
| 146 | |||
| 147 | return 0 | ||
| 148 | } | ||
| 149 | |||
| 150 | # Export the function so it can be used after sourcing | ||
| 151 | export -f validate_service_for_structured_logging | ||