upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/docs/archive/2026-01-relay-ngit-dev-migration/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'docs/archive/2026-01-relay-ngit-dev-migration/scripts')
-rwxr-xr-xdocs/archive/2026-01-relay-ngit-dev-migration/scripts/01-fetch-events.sh206
-rwxr-xr-xdocs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh564
-rwxr-xr-xdocs/archive/2026-01-relay-ngit-dev-migration/scripts/20-categorize.sh212
-rwxr-xr-xdocs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh294
-rwxr-xr-xdocs/archive/2026-01-relay-ngit-dev-migration/scripts/22-compare-git-data.sh390
-rwxr-xr-xdocs/archive/2026-01-relay-ngit-dev-migration/scripts/30-extract-parse-failures.sh774
-rwxr-xr-xdocs/archive/2026-01-relay-ngit-dev-migration/scripts/31-extract-purgatory-expiry.sh408
-rwxr-xr-xdocs/archive/2026-01-relay-ngit-dev-migration/scripts/40-classify-actions.sh662
-rwxr-xr-xdocs/archive/2026-01-relay-ngit-dev-migration/scripts/run-migration-analysis.sh779
-rwxr-xr-xdocs/archive/2026-01-relay-ngit-dev-migration/scripts/validate-service.sh151
10 files changed, 4440 insertions, 0 deletions
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/01-fetch-events.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/01-fetch-events.sh
new file mode 100755
index 0000000..e0d6f26
--- /dev/null
+++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/01-fetch-events.sh
@@ -0,0 +1,206 @@
1#!/usr/bin/env bash
2#
3# 01-fetch-events.sh - Fetch nostr events from a relay for migration analysis
4#
5# PHASE 1 of the GRASP relay to ngit-grasp migration analysis pipeline.
6# Fetches kind 30618 (state), 30617 (announcement), and 5 (deletion) events.
7#
8# USAGE:
9# ./01-fetch-events.sh <relay-url> <output-dir>
10#
11# EXAMPLES:
12# # Fetch from production relay
13# ./01-fetch-events.sh wss://relay.ngit.dev output/prod
14#
15# # Fetch from archive relay
16# ./01-fetch-events.sh wss://archive.relay.ngit.dev output/archive
17#
18# # Full migration analysis setup
19# mkdir -p work/migration-analysis-$(date +%Y%m%d-%H%M)
20# ./01-fetch-events.sh wss://relay.ngit.dev work/migration-analysis-*/prod
21# ./01-fetch-events.sh wss://archive.relay.ngit.dev work/migration-analysis-*/archive
22#
23# OUTPUT:
24# <output-dir>/raw/state-events.json - kind 30618 events (one per line, JSONL)
25# <output-dir>/raw/announcements.json - kind 30617 events (one per line, JSONL)
26# <output-dir>/raw/deletions.json - kind 5 events (one per line, JSONL)
27#
28# OUTPUT FORMAT:
29# Each file contains one JSON event per line (JSONL format).
30# Events are the raw nostr event objects as returned by the relay.
31#
32# PREREQUISITES:
33# - nak (Nostr Army Knife) - https://github.com/fiatjaf/nak
34# - jq (for counting/validation)
35#
36# RUNTIME: ~30 seconds per relay (depends on network and event count)
37#
38# NOTES:
39# - Uses --paginate to ensure all events are fetched (not just first page)
40# - If event counts are exact multiples of 250, pagination may have failed
41# - Run Phase 1 and Phase 2 back-to-back for accurate snapshot
42#
43# SEE ALSO:
44# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide
45#
46
47set -euo pipefail
48
49# Colors for output (disabled if not a terminal)
50if [[ -t 1 ]]; then
51 RED='\033[0;31m'
52 GREEN='\033[0;32m'
53 YELLOW='\033[0;33m'
54 BLUE='\033[0;34m'
55 NC='\033[0m' # No Color
56else
57 RED=''
58 GREEN=''
59 YELLOW=''
60 BLUE=''
61 NC=''
62fi
63
64log_info() {
65 echo -e "${BLUE}[INFO]${NC} $*" >&2
66}
67
68log_success() {
69 echo -e "${GREEN}[OK]${NC} $*" >&2
70}
71
72log_warn() {
73 echo -e "${YELLOW}[WARN]${NC} $*" >&2
74}
75
76log_error() {
77 echo -e "${RED}[ERROR]${NC} $*" >&2
78}
79
80usage() {
81 echo "Usage: $0 <relay-url> <output-dir>"
82 echo ""
83 echo "Arguments:"
84 echo " relay-url WebSocket URL of the relay (e.g., wss://relay.ngit.dev)"
85 echo " output-dir Directory to store fetched events (e.g., output/prod)"
86 echo ""
87 echo "Examples:"
88 echo " $0 wss://relay.ngit.dev output/prod"
89 echo " $0 wss://archive.relay.ngit.dev output/archive"
90 exit 1
91}
92
93# Check prerequisites
94check_prerequisites() {
95 local missing=0
96
97 if ! command -v nak &> /dev/null; then
98 log_error "nak not found. Install from: https://github.com/fiatjaf/nak"
99 missing=1
100 fi
101
102 if ! command -v jq &> /dev/null; then
103 log_error "jq not found. Install with your package manager."
104 missing=1
105 fi
106
107 if [[ $missing -eq 1 ]]; then
108 exit 1
109 fi
110}
111
112# Fetch events of a specific kind
113# Args: $1=relay, $2=kind, $3=output_file, $4=description
114fetch_kind() {
115 local relay="$1"
116 local kind="$2"
117 local output_file="$3"
118 local description="$4"
119
120 log_info "Fetching $description (kind $kind) from $relay..."
121
122 local start_time
123 start_time=$(date +%s)
124
125 # Use --paginate to ensure we get all events, not just first page
126 # nak outputs one event per line (JSONL format)
127 if ! nak req -k "$kind" --paginate "$relay" > "$output_file" 2>/dev/null; then
128 log_error "Failed to fetch $description from $relay"
129 return 1
130 fi
131
132 local end_time
133 end_time=$(date +%s)
134 local duration=$((end_time - start_time))
135
136 # Count events
137 local count
138 count=$(wc -l < "$output_file" | tr -d ' ')
139
140 # Warn if count is suspicious (exact multiple of 250 suggests pagination issue)
141 if [[ $count -gt 0 ]] && [[ $((count % 250)) -eq 0 ]]; then
142 log_warn "$description count ($count) is exact multiple of 250 - pagination may have failed!"
143 fi
144
145 log_success "Fetched $count $description in ${duration}s -> $output_file"
146
147 echo "$count"
148}
149
150# Main
151main() {
152 if [[ $# -ne 2 ]]; then
153 usage
154 fi
155
156 local relay="$1"
157 local output_dir="$2"
158
159 # Validate relay URL
160 if [[ ! "$relay" =~ ^wss?:// ]]; then
161 log_error "Invalid relay URL: $relay (must start with ws:// or wss://)"
162 exit 1
163 fi
164
165 check_prerequisites
166
167 log_info "Starting event fetch from $relay"
168 log_info "Output directory: $output_dir"
169
170 # Create output directory structure
171 local raw_dir="$output_dir/raw"
172 mkdir -p "$raw_dir"
173
174 local total_start
175 total_start=$(date +%s)
176
177 # Fetch each event type
178 local state_count announcement_count deletion_count
179
180 state_count=$(fetch_kind "$relay" 30618 "$raw_dir/state-events.json" "state events")
181 announcement_count=$(fetch_kind "$relay" 30617 "$raw_dir/announcements.json" "announcements")
182 deletion_count=$(fetch_kind "$relay" 5 "$raw_dir/deletions.json" "deletion requests")
183
184 local total_end
185 total_end=$(date +%s)
186 local total_duration=$((total_end - total_start))
187
188 # Summary
189 echo ""
190 log_info "=== Fetch Summary ==="
191 log_info "Relay: $relay"
192 log_info "Output: $output_dir"
193 log_info "State events (30618): $state_count"
194 log_info "Announcements (30617): $announcement_count"
195 log_info "Deletions (5): $deletion_count"
196 log_info "Total time: ${total_duration}s"
197 echo ""
198
199 # Output file listing for easy copy/paste
200 log_info "Output files:"
201 echo " $raw_dir/state-events.json"
202 echo " $raw_dir/announcements.json"
203 echo " $raw_dir/deletions.json"
204}
205
206main "$@"
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh
new file mode 100755
index 0000000..b4536cb
--- /dev/null
+++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh
@@ -0,0 +1,564 @@
1#!/usr/bin/env bash
2#
3# 10-check-git-sync.sh - Compare state events to actual git data on disk
4#
5# PHASE 2 of the GRASP relay to ngit-grasp migration analysis pipeline.
6# Compares kind 30618 state events against actual git refs on disk.
7#
8# USAGE:
9# ./10-check-git-sync.sh <state-events.json> <git-base-dir> <output-dir> [--categorize]
10#
11# EXAMPLES:
12# # Check source relay against source git data
13# ./10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod
14#
15# # Check target relay against target git data
16# ./10-check-git-sync.sh output/archive/raw/state-events.json /var/lib/ngit-grasp/git output/archive
17#
18# # Check and categorize in one step (convenience mode)
19# ./10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod --categorize
20#
21# INPUT:
22# state-events.json - JSONL file from Phase 1 (01-fetch-events.sh)
23# One kind 30618 event per line
24# git-base-dir - Base directory containing git repos
25# Structure: <git-base>/<npub>/<repo>.git/
26#
27# OUTPUT:
28# <output-dir>/git-sync-status.tsv - Tab-separated values:
29# repo<TAB>npub<TAB>state_refs<TAB>git_refs<TAB>matches<TAB>reason
30#
31# With --categorize flag, also outputs:
32# <output-dir>/category1-complete-match.txt
33# <output-dir>/category2-empty-blank.txt
34# <output-dir>/category3-partial-match.txt
35# <output-dir>/category4-no-match.txt
36#
37# CATEGORIES:
38# 1. Complete Match - All refs in state event match git data perfectly
39# 2. Empty/Blank - No git data available (directory missing or empty)
40# 3. Partial Match - Some refs match, some don't
41# 4. No Match - Git data exists but commit hashes don't match
42#
43# PREREQUISITES:
44# - nak (for npub encoding) - https://github.com/fiatjaf/nak
45# - jq (for JSON parsing)
46# - Read access to git directories (may need sudo)
47#
48# RUNTIME: ~20 minutes on VPS (git operations are slow)
49#
50# NOTES:
51# - Must run on VPS with access to git directories
52# - Progress indicator updates every 10 events
53# - Handles packed refs (git show-ref) and loose refs
54#
55# SEE ALSO:
56# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide
57# 01-fetch-events.sh - Phase 1 script that produces input for this script
58# 20-categorize.sh - Phase 3a script that consumes output from this script
59#
60
61set -euo pipefail
62
63# Colors for output (disabled if not a terminal)
64if [[ -t 1 ]]; then
65 RED='\033[0;31m'
66 GREEN='\033[0;32m'
67 YELLOW='\033[0;33m'
68 BLUE='\033[0;34m'
69 NC='\033[0m'
70else
71 RED=''
72 GREEN=''
73 YELLOW=''
74 BLUE=''
75 NC=''
76fi
77
78log_info() {
79 echo -e "${BLUE}[INFO]${NC} $*" >&2
80}
81
82log_success() {
83 echo -e "${GREEN}[OK]${NC} $*" >&2
84}
85
86log_warn() {
87 echo -e "${YELLOW}[WARN]${NC} $*" >&2
88}
89
90log_error() {
91 echo -e "${RED}[ERROR]${NC} $*" >&2
92}
93
94log_progress() {
95 # Overwrite current line for progress updates
96 echo -ne "\r${BLUE}[PROGRESS]${NC} $*" >&2
97}
98
99usage() {
100 echo "Usage: $0 <state-events.json> <git-base-dir> <output-dir> [--categorize]"
101 echo ""
102 echo "Arguments:"
103 echo " state-events.json JSONL file from Phase 1 (kind 30618 events)"
104 echo " git-base-dir Base directory for git repos (e.g., /var/lib/grasp-relay/git)"
105 echo " output-dir Directory to store output files"
106 echo " --categorize Optional: also output category files (like Phase 3)"
107 echo ""
108 echo "Examples:"
109 echo " $0 output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod"
110 echo " $0 output/archive/raw/state-events.json /var/lib/ngit-grasp/git output/archive"
111 echo ""
112 echo "Output:"
113 echo " git-sync-status.tsv - TSV with: repo, npub, state_refs, git_refs, matches, reason"
114 exit 1
115}
116
117# Check prerequisites
118check_prerequisites() {
119 local missing=0
120
121 if ! command -v git &> /dev/null; then
122 log_error "git not found. Install with your package manager."
123 missing=1
124 fi
125
126 if ! command -v nak &> /dev/null; then
127 log_error "nak not found. Install from: https://github.com/fiatjaf/nak"
128 log_error "Or run: nix-shell -p nak jq --run \"$0 $*\""
129 missing=1
130 fi
131
132 if ! command -v jq &> /dev/null; then
133 log_error "jq not found. Install with your package manager."
134 missing=1
135 fi
136
137 if [[ $missing -eq 1 ]]; then
138 exit 1
139 fi
140}
141
142# Convert hex pubkey to npub
143# Args: $1=hex_pubkey
144# Returns: npub string or empty on error
145hex_to_npub() {
146 local hex="$1"
147 nak encode npub "$hex" 2>/dev/null || echo ""
148}
149
150# Count refs in state event (only refs/heads/)
151# Args: $1=event_json
152# Returns: count
153count_state_refs() {
154 local event="$1"
155 echo "$event" | jq '[.tags[] | select(.[0] | startswith("refs/heads/"))] | length' 2>/dev/null || echo "0"
156}
157
158# Get git refs from disk
159# Args: $1=git_dir
160# Returns: count of refs/heads/ refs
161count_git_refs() {
162 local git_dir="$1"
163
164 if [[ ! -d "$git_dir" ]]; then
165 echo "0"
166 return
167 fi
168
169 # Try git show-ref first (handles packed refs correctly)
170 # Note: We capture output separately to avoid pipefail issues
171 local count
172 if count=$(git --git-dir="$git_dir" show-ref --heads 2>/dev/null | wc -l); then
173 echo "$count" | tr -d ' '
174 return
175 fi
176
177 # Fallback: count loose refs (when git is not available or fails)
178 if [[ -d "$git_dir/refs/heads" ]]; then
179 find "$git_dir/refs/heads" -type f 2>/dev/null | wc -l | tr -d ' '
180 else
181 echo "0"
182 fi
183}
184
185# Get ref hash from git directory
186# Args: $1=git_dir, $2=ref_path (e.g., refs/heads/main)
187# Returns: commit hash or empty
188get_git_ref_hash() {
189 local git_dir="$1"
190 local ref_path="$2"
191
192 # Try git show-ref first (handles packed refs)
193 local hash
194 hash=$(git --git-dir="$git_dir" show-ref --hash "$ref_path" 2>/dev/null | head -1 || echo "")
195
196 if [[ -n "$hash" ]]; then
197 echo "$hash"
198 return
199 fi
200
201 # Fallback: read loose ref file
202 local ref_file="$git_dir/$ref_path"
203 if [[ -f "$ref_file" ]]; then
204 cat "$ref_file" 2>/dev/null | tr -d '\n' || echo ""
205 else
206 echo ""
207 fi
208}
209
210# Compare state event refs to git refs
211# Args: $1=event_json, $2=git_dir
212# Returns: count of matching refs
213count_matching_refs() {
214 local event="$1"
215 local git_dir="$2"
216 local matching=0
217
218 # Extract refs/heads/ tags and compare
219 while IFS= read -r ref_tag; do
220 [[ -z "$ref_tag" ]] && continue
221
222 local ref_path expected_hash
223 ref_path=$(echo "$ref_tag" | jq -r '.[0]' 2>/dev/null || echo "")
224 expected_hash=$(echo "$ref_tag" | jq -r '.[1]' 2>/dev/null || echo "")
225
226 # Skip if not a heads ref or hash is missing
227 [[ ! "$ref_path" =~ ^refs/heads/ ]] && continue
228 [[ -z "$expected_hash" || "$expected_hash" == "null" ]] && continue
229
230 # Get actual hash from git
231 local actual_hash
232 actual_hash=$(get_git_ref_hash "$git_dir" "$ref_path")
233
234 if [[ "$expected_hash" == "$actual_hash" ]]; then
235 matching=$((matching + 1))
236 fi
237 done < <(echo "$event" | jq -c '.tags[] | select(.[0] | startswith("refs/heads/"))' 2>/dev/null)
238
239 echo "$matching"
240}
241
242# Categorize a single entry
243# Args: $1=state_refs, $2=git_refs, $3=matches, $4=reason
244# Returns: category number (1-4)
245categorize_entry() {
246 local state_refs="$1"
247 local git_refs="$2"
248 local matches="$3"
249 local reason="$4"
250
251 # Category 2: Empty/Blank
252 if [[ -n "$reason" ]] || [[ "$git_refs" -eq 0 ]]; then
253 echo "2"
254 return
255 fi
256
257 # Category 1: Complete Match
258 if [[ "$state_refs" -gt 0 ]] && [[ "$state_refs" -eq "$git_refs" ]] && [[ "$matches" -eq "$state_refs" ]]; then
259 echo "1"
260 return
261 fi
262
263 # Category 4: No Match
264 if [[ "$git_refs" -gt 0 ]] && [[ "$matches" -eq 0 ]]; then
265 echo "4"
266 return
267 fi
268
269 # Category 3: Partial Match (default for anything else with matches > 0)
270 if [[ "$matches" -gt 0 ]]; then
271 echo "3"
272 return
273 fi
274
275 # Fallback to category 2
276 echo "2"
277}
278
279# Format entry for category file
280# Args: $1=repo, $2=npub, $3=state_refs, $4=git_refs, $5=matches, $6=reason
281format_category_line() {
282 local repo="$1"
283 local npub="$2"
284 local state_refs="$3"
285 local git_refs="$4"
286 local matches="$5"
287 local reason="$6"
288
289 if [[ -n "$reason" ]]; then
290 echo "$repo | $npub | state_refs=$state_refs | git_refs=$git_refs | matches=$matches | reason=$reason"
291 else
292 echo "$repo | $npub | state_refs=$state_refs | git_refs=$git_refs | matches=$matches"
293 fi
294}
295
296# Process a single state event
297# Args: $1=event_json, $2=git_base
298# Outputs: TSV line to stdout
299process_event() {
300 local event="$1"
301 local git_base="$2"
302
303 # Extract repository identifier (d tag)
304 local identifier
305 identifier=$(echo "$event" | jq -r '.tags[] | select(.[0] == "d") | .[1]' 2>/dev/null | head -1 || echo "")
306
307 if [[ -z "$identifier" ]]; then
308 return 1
309 fi
310
311 # Extract maintainer pubkey (hex)
312 local hex_pubkey
313 hex_pubkey=$(echo "$event" | jq -r '.pubkey' 2>/dev/null || echo "")
314
315 if [[ -z "$hex_pubkey" ]]; then
316 return 1
317 fi
318
319 # Convert to npub
320 local npub
321 npub=$(hex_to_npub "$hex_pubkey")
322
323 if [[ -z "$npub" ]]; then
324 return 1
325 fi
326
327 # Count state refs
328 local state_refs
329 state_refs=$(count_state_refs "$event")
330
331 # Find git directory
332 local git_dir="$git_base/${npub}/${identifier}.git"
333
334 # Check git directory status
335 local git_refs=0
336 local matches=0
337 local reason=""
338
339 if [[ ! -d "$git_dir" ]]; then
340 reason="no_git_dir"
341 elif [[ ! -d "$git_dir/refs/heads" ]] && [[ ! -f "$git_dir/packed-refs" ]]; then
342 reason="empty_refs"
343 else
344 git_refs=$(count_git_refs "$git_dir")
345
346 if [[ "$git_refs" -eq 0 ]]; then
347 reason="empty_refs"
348 elif [[ "$state_refs" -eq 0 ]]; then
349 reason="no_state_refs"
350 else
351 matches=$(count_matching_refs "$event" "$git_dir")
352 fi
353 fi
354
355 # Output TSV line: repo, npub, state_refs, git_refs, matches, reason
356 printf '%s\t%s\t%s\t%s\t%s\t%s\n' "$identifier" "$npub" "$state_refs" "$git_refs" "$matches" "$reason"
357}
358
359# Main
360main() {
361 local do_categorize=0
362 local args=()
363
364 # Parse arguments
365 for arg in "$@"; do
366 if [[ "$arg" == "--categorize" ]]; then
367 do_categorize=1
368 else
369 args+=("$arg")
370 fi
371 done
372
373 if [[ ${#args[@]} -ne 3 ]]; then
374 usage
375 fi
376
377 local state_events_file="${args[0]}"
378 local git_base="${args[1]}"
379 local output_dir="${args[2]}"
380
381 # Validate inputs
382 if [[ ! -f "$state_events_file" ]]; then
383 log_error "State events file not found: $state_events_file"
384 exit 1
385 fi
386
387 if [[ ! -d "$git_base" ]]; then
388 log_error "Git base directory not found: $git_base"
389 log_error "This script must run on the VPS with access to git directories."
390 exit 1
391 fi
392
393 # Check read permissions
394 if ! ls "$git_base" >/dev/null 2>&1; then
395 log_error "Cannot read git base directory (permission denied): $git_base"
396 log_error "Try running with sudo or grant read permissions."
397 exit 1
398 fi
399
400 check_prerequisites
401
402 log_info "=== Git State Synchronization Check ==="
403 log_info "State events: $state_events_file"
404 log_info "Git base: $git_base"
405 log_info "Output: $output_dir"
406 if [[ $do_categorize -eq 1 ]]; then
407 log_info "Mode: TSV + categorization"
408 else
409 log_info "Mode: TSV only (use 20-categorize.sh for categories)"
410 fi
411 log_info "Started: $(date)"
412 echo ""
413
414 # Create output directory
415 mkdir -p "$output_dir"
416
417 # Output files
418 local tsv_file="$output_dir/git-sync-status.tsv"
419
420 # Initialize TSV with header
421 echo -e "repo\tnpub\tstate_refs\tgit_refs\tmatches\treason" > "$tsv_file"
422
423 # Initialize category files if categorizing
424 local cat1="" cat2="" cat3="" cat4=""
425 if [[ $do_categorize -eq 1 ]]; then
426 cat1="$output_dir/category1-complete-match.txt"
427 cat2="$output_dir/category2-empty-blank.txt"
428 cat3="$output_dir/category3-partial-match.txt"
429 cat4="$output_dir/category4-no-match.txt"
430 > "$cat1"
431 > "$cat2"
432 > "$cat3"
433 > "$cat4"
434 fi
435
436 # Count total events
437 local total_events
438 total_events=$(wc -l < "$state_events_file" | tr -d ' ')
439 log_info "Processing $total_events state events..."
440 echo ""
441
442 # Process each event
443 local count=0
444 local processed=0
445 local skipped=0
446 local count_cat1=0 count_cat2=0 count_cat3=0 count_cat4=0
447 local start_time
448 start_time=$(date +%s)
449
450 while IFS= read -r event; do
451 count=$((count + 1))
452
453 # Skip empty lines
454 [[ -z "$event" ]] && continue
455
456 # Process event
457 local result
458 if result=$(process_event "$event" "$git_base"); then
459 processed=$((processed + 1))
460
461 # Write to TSV (skip header line)
462 echo "$result" >> "$tsv_file"
463
464 # Categorize if requested
465 if [[ $do_categorize -eq 1 ]]; then
466 # Parse result
467 IFS=$'\t' read -r repo npub state_refs git_refs matches reason <<< "$result"
468
469 local category
470 category=$(categorize_entry "$state_refs" "$git_refs" "$matches" "$reason")
471
472 local cat_line
473 cat_line=$(format_category_line "$repo" "$npub" "$state_refs" "$git_refs" "$matches" "$reason")
474
475 case "$category" in
476 1) echo "$cat_line" >> "$cat1"; count_cat1=$((count_cat1 + 1)) ;;
477 2) echo "$cat_line" >> "$cat2"; count_cat2=$((count_cat2 + 1)) ;;
478 3) echo "$cat_line" >> "$cat3"; count_cat3=$((count_cat3 + 1)) ;;
479 4) echo "$cat_line" >> "$cat4"; count_cat4=$((count_cat4 + 1)) ;;
480 esac
481 fi
482 else
483 skipped=$((skipped + 1))
484 fi
485
486 # Progress indicator every 10 events
487 if [[ $((count % 10)) -eq 0 ]]; then
488 local elapsed=$(($(date +%s) - start_time))
489 local rate=0
490 if [[ $elapsed -gt 0 ]]; then
491 rate=$((count / elapsed))
492 fi
493 local eta="?"
494 if [[ $rate -gt 0 ]]; then
495 eta=$(( (total_events - count) / rate ))
496 fi
497 log_progress "Processed $count/$total_events events (~${rate}/s, ETA: ${eta}s)..."
498 fi
499 done < "$state_events_file"
500
501 # Clear progress line
502 echo "" >&2
503
504 local end_time
505 end_time=$(date +%s)
506 local duration=$((end_time - start_time))
507
508 # Summary
509 echo ""
510 log_info "=== Analysis Complete ==="
511 log_info "Finished: $(date)"
512 log_info "Duration: ${duration}s"
513 log_info "Processed: $processed events"
514 if [[ $skipped -gt 0 ]]; then
515 log_warn "Skipped: $skipped events (missing identifier or pubkey)"
516 fi
517 echo ""
518
519 if [[ $do_categorize -eq 1 ]]; then
520 # Calculate percentages
521 local total=$((count_cat1 + count_cat2 + count_cat3 + count_cat4))
522 local pct1=0 pct2=0 pct3=0 pct4=0
523 if [[ $total -gt 0 ]]; then
524 pct1=$(awk "BEGIN {printf \"%.1f\", ($count_cat1/$total)*100}")
525 pct2=$(awk "BEGIN {printf \"%.1f\", ($count_cat2/$total)*100}")
526 pct3=$(awk "BEGIN {printf \"%.1f\", ($count_cat3/$total)*100}")
527 pct4=$(awk "BEGIN {printf \"%.1f\", ($count_cat4/$total)*100}")
528 fi
529
530 log_info "=== Category Summary ==="
531 log_success "Category 1 (Complete Match): $count_cat1 ($pct1%)"
532 log_warn "Category 2 (Empty/Blank): $count_cat2 ($pct2%)"
533 log_warn "Category 3 (Partial Match): $count_cat3 ($pct3%)"
534 log_error "Category 4 (No Match): $count_cat4 ($pct4%)"
535 echo ""
536
537 # Validation warning
538 if [[ $count_cat2 -eq $total ]] && [[ $total -gt 0 ]]; then
539 log_error "WARNING: 100% of repos categorized as Empty/Blank"
540 log_error "This usually indicates a permission or path issue."
541 echo ""
542 log_info "Troubleshooting:"
543 echo " 1. Verify git data exists: sudo ls -la $git_base | head -10"
544 echo " 2. Check sample repo: sudo find $git_base -name '*.git' -type d | head -1"
545 echo " 3. Re-run with sudo if not already using it"
546 echo ""
547 fi
548 fi
549
550 log_info "Output files:"
551 echo " $tsv_file"
552 if [[ $do_categorize -eq 1 ]]; then
553 echo " $cat1"
554 echo " $cat2"
555 echo " $cat3"
556 echo " $cat4"
557 else
558 echo ""
559 log_info "Next step: Run 20-categorize.sh to categorize results"
560 echo " ./20-categorize.sh $tsv_file $output_dir"
561 fi
562}
563
564main "$@"
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/20-categorize.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/20-categorize.sh
new file mode 100755
index 0000000..b38dc00
--- /dev/null
+++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/20-categorize.sh
@@ -0,0 +1,212 @@
1#!/usr/bin/env bash
2#
3# 20-categorize.sh - Categorize git sync status into 4 categories
4#
5# PHASE 3a of the GRASP relay to ngit-grasp migration analysis pipeline.
6# Takes git-sync-status.tsv from Phase 2 and categorizes into 4 files.
7#
8# USAGE:
9# ./20-categorize.sh <git-sync-status.tsv> <output-dir>
10#
11# EXAMPLES:
12# ./20-categorize.sh output/prod/git-sync-status.tsv output/prod
13# ./20-categorize.sh output/archive/git-sync-status.tsv output/archive
14#
15# INPUT FORMAT (git-sync-status.tsv):
16# Tab-separated values with columns:
17# repo<TAB>npub<TAB>state_refs<TAB>git_refs<TAB>matches<TAB>reason
18#
19# Where reason is optional and can be: no_git_dir, empty_refs, no_state_refs
20#
21# OUTPUT:
22# <output-dir>/category1-complete-match.txt - All refs match perfectly
23# <output-dir>/category2-empty-blank.txt - No git data available
24# <output-dir>/category3-partial-match.txt - Some refs match
25# <output-dir>/category4-no-match.txt - Git exists but refs don't match
26#
27# OUTPUT FORMAT:
28# repo | npub | state_refs=N | git_refs=N | matches=N [| reason=X]
29#
30# CATEGORIES:
31# 1. Complete Match: state_refs == git_refs == matches (all > 0)
32# 2. Empty/Blank: git_refs == 0 OR reason in (no_git_dir, empty_refs, no_state_refs)
33# 3. Partial Match: matches > 0 AND matches < state_refs
34# 4. No Match: git_refs > 0 AND matches == 0
35#
36# PREREQUISITES:
37# - awk (standard Unix tool)
38#
39# RUNTIME: < 1 second (local processing only)
40#
41# SEE ALSO:
42# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide
43# 10-check-git-sync.sh - Phase 2 script that produces input for this script
44#
45
46set -euo pipefail
47
48# Colors for output (disabled if not a terminal)
49if [[ -t 1 ]]; then
50 RED='\033[0;31m'
51 GREEN='\033[0;32m'
52 YELLOW='\033[0;33m'
53 BLUE='\033[0;34m'
54 NC='\033[0m'
55else
56 RED=''
57 GREEN=''
58 YELLOW=''
59 BLUE=''
60 NC=''
61fi
62
63log_info() {
64 echo -e "${BLUE}[INFO]${NC} $*" >&2
65}
66
67log_success() {
68 echo -e "${GREEN}[OK]${NC} $*" >&2
69}
70
71log_warn() {
72 echo -e "${YELLOW}[WARN]${NC} $*" >&2
73}
74
75log_error() {
76 echo -e "${RED}[ERROR]${NC} $*" >&2
77}
78
79usage() {
80 echo "Usage: $0 <git-sync-status.tsv> <output-dir>"
81 echo ""
82 echo "Arguments:"
83 echo " git-sync-status.tsv TSV file from Phase 2 (10-check-git-sync.sh)"
84 echo " output-dir Directory to store categorized output"
85 echo ""
86 echo "Examples:"
87 echo " $0 output/prod/git-sync-status.tsv output/prod"
88 echo " $0 output/archive/git-sync-status.tsv output/archive"
89 echo ""
90 echo "Input format (TSV):"
91 echo " repo<TAB>npub<TAB>state_refs<TAB>git_refs<TAB>matches<TAB>reason"
92 echo ""
93 echo "Output files:"
94 echo " category1-complete-match.txt - All refs match"
95 echo " category2-empty-blank.txt - No git data"
96 echo " category3-partial-match.txt - Some refs match"
97 echo " category4-no-match.txt - Git exists, refs don't match"
98 exit 1
99}
100
101# Main
102main() {
103 if [[ $# -ne 2 ]]; then
104 usage
105 fi
106
107 local input_file="$1"
108 local output_dir="$2"
109
110 # Validate input file
111 if [[ ! -f "$input_file" ]]; then
112 log_error "Input file not found: $input_file"
113 exit 1
114 fi
115
116 log_info "Categorizing git sync status"
117 log_info "Input: $input_file"
118 log_info "Output: $output_dir"
119
120 # Create output directory
121 mkdir -p "$output_dir"
122
123 # Output files
124 local cat1="$output_dir/category1-complete-match.txt"
125 local cat2="$output_dir/category2-empty-blank.txt"
126 local cat3="$output_dir/category3-partial-match.txt"
127 local cat4="$output_dir/category4-no-match.txt"
128
129 # Clear previous results
130 > "$cat1"
131 > "$cat2"
132 > "$cat3"
133 > "$cat4"
134
135 # Process input file with awk
136 # Input: repo<TAB>npub<TAB>state_refs<TAB>git_refs<TAB>matches<TAB>reason
137 awk -F'\t' -v cat1="$cat1" -v cat2="$cat2" -v cat3="$cat3" -v cat4="$cat4" '
138 BEGIN {
139 count1 = 0; count2 = 0; count3 = 0; count4 = 0
140 }
141 NR == 1 && /^repo/ { next } # Skip header if present
142 NF >= 5 {
143 repo = $1
144 npub = $2
145 state_refs = int($3)
146 git_refs = int($4)
147 matches = int($5)
148 reason = (NF >= 6) ? $6 : ""
149
150 # Format output line
151 if (reason != "") {
152 line = repo " | " npub " | state_refs=" state_refs " | git_refs=" git_refs " | matches=" matches " | reason=" reason
153 } else {
154 line = repo " | " npub " | state_refs=" state_refs " | git_refs=" git_refs " | matches=" matches
155 }
156
157 # Categorize
158 if (reason == "no_git_dir" || reason == "empty_refs" || reason == "no_state_refs" || git_refs == 0) {
159 # Category 2: Empty/Blank
160 print line >> cat2
161 count2++
162 } else if (state_refs > 0 && state_refs == git_refs && matches == state_refs) {
163 # Category 1: Complete Match
164 print line >> cat1
165 count1++
166 } else if (matches > 0 && matches < state_refs) {
167 # Category 3: Partial Match
168 print line >> cat3
169 count3++
170 } else if (git_refs > 0 && matches == 0) {
171 # Category 4: No Match
172 print line >> cat4
173 count4++
174 } else if (matches > 0) {
175 # Edge case: matches > 0 but does not fit other categories
176 # This can happen when git_refs > state_refs but all state refs match
177 # Treat as partial match
178 print line >> cat3
179 count3++
180 } else {
181 # Fallback: treat as category 2 (empty/blank)
182 print line >> cat2
183 count2++
184 }
185 }
186 END {
187 total = count1 + count2 + count3 + count4
188 print "COUNTS:" count1 ":" count2 ":" count3 ":" count4 ":" total
189 }
190 ' "$input_file" 2>&1 | while IFS= read -r line; do
191 if [[ "$line" =~ ^COUNTS: ]]; then
192 # Parse counts from awk output
193 IFS=':' read -r _ c1 c2 c3 c4 total <<< "$line"
194
195 echo ""
196 log_info "=== Categorization Summary ==="
197 log_info "Total entries: $total"
198 log_success "Category 1 (Complete Match): $c1"
199 log_warn "Category 2 (Empty/Blank): $c2"
200 log_warn "Category 3 (Partial Match): $c3"
201 log_error "Category 4 (No Match): $c4"
202 echo ""
203 log_info "Output files:"
204 echo " $cat1"
205 echo " $cat2"
206 echo " $cat3"
207 echo " $cat4"
208 fi
209 done
210}
211
212main "$@"
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh
new file mode 100755
index 0000000..b9c0d30
--- /dev/null
+++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh
@@ -0,0 +1,294 @@
1#!/usr/bin/env bash
2#
3# 21-compare-relays.sh - Compare prod vs archive category files to find gaps
4#
5# PHASE 3b of the GRASP relay to ngit-grasp migration analysis pipeline.
6# Compares categorized output from prod and archive to identify:
7# - Repos complete in prod but missing/incomplete in archive
8# - Repos in archive but not in prod
9# - Status differences between relays
10#
11# USAGE:
12# ./21-compare-relays.sh <prod-dir> <archive-dir> <output-dir>
13#
14# EXAMPLES:
15# ./21-compare-relays.sh output/prod output/archive output/comparison
16#
17# INPUT:
18# Both prod-dir and archive-dir must contain:
19# - category1-complete-match.txt
20# - category2-empty-blank.txt
21# - category3-partial-match.txt
22# - category4-no-match.txt
23#
24# OUTPUT:
25# <output-dir>/complete-in-both.txt - Repos complete in both relays (no action)
26# <output-dir>/complete-prod-missing-archive.txt - Complete in prod, not in archive cat1
27# <output-dir>/complete-prod-incomplete-archive.txt - Complete in prod, incomplete in archive
28# <output-dir>/incomplete-in-both.txt - Incomplete in both relays
29# <output-dir>/in-archive-not-prod.txt - In archive but not in prod
30# <output-dir>/summary.txt - Human-readable summary
31#
32# OUTPUT FORMAT:
33# Each file contains lines in the format:
34# repo | npub | prod_status | archive_status
35#
36# PREREQUISITES:
37# - awk, sort, comm (standard Unix tools)
38#
39# RUNTIME: < 1 second (local processing only)
40#
41# SEE ALSO:
42# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide
43# 20-categorize.sh - Phase 3a script that produces input for this script
44#
45
46set -euo pipefail
47
48# Colors for output (disabled if not a terminal)
49if [[ -t 1 ]]; then
50 RED='\033[0;31m'
51 GREEN='\033[0;32m'
52 YELLOW='\033[0;33m'
53 BLUE='\033[0;34m'
54 NC='\033[0m'
55else
56 RED=''
57 GREEN=''
58 YELLOW=''
59 BLUE=''
60 NC=''
61fi
62
63log_info() {
64 echo -e "${BLUE}[INFO]${NC} $*" >&2
65}
66
67log_success() {
68 echo -e "${GREEN}[OK]${NC} $*" >&2
69}
70
71log_warn() {
72 echo -e "${YELLOW}[WARN]${NC} $*" >&2
73}
74
75log_error() {
76 echo -e "${RED}[ERROR]${NC} $*" >&2
77}
78
79usage() {
80 echo "Usage: $0 <prod-dir> <archive-dir> <output-dir>"
81 echo ""
82 echo "Arguments:"
83 echo " prod-dir Directory containing prod category files"
84 echo " archive-dir Directory containing archive category files"
85 echo " output-dir Directory to store comparison results"
86 echo ""
87 echo "Examples:"
88 echo " $0 output/prod output/archive output/comparison"
89 echo ""
90 echo "Required input files in each directory:"
91 echo " category1-complete-match.txt"
92 echo " category2-empty-blank.txt"
93 echo " category3-partial-match.txt"
94 echo " category4-no-match.txt"
95 exit 1
96}
97
98# Extract repo|npub key from category line
99# Input: "repo | npub | state_refs=N | ..."
100# Output: "repo|npub"
101extract_key() {
102 awk -F' \\| ' '{print $1 "|" $2}'
103}
104
105# Build lookup table from category files
106# Args: $1=directory, $2=output_file
107build_lookup() {
108 local dir="$1"
109 local output="$2"
110
111 # Process all 4 category files
112 for cat in 1 2 3 4; do
113 local file="$dir/category${cat}-*.txt"
114 # shellcheck disable=SC2086
115 if ls $file 1>/dev/null 2>&1; then
116 # shellcheck disable=SC2086
117 cat $file | while IFS= read -r line; do
118 key=$(echo "$line" | extract_key)
119 echo "${key}|cat${cat}|${line}"
120 done
121 fi
122 done | sort -t'|' -k1,2 > "$output"
123}
124
125# Main
126main() {
127 if [[ $# -ne 3 ]]; then
128 usage
129 fi
130
131 local prod_dir="$1"
132 local archive_dir="$2"
133 local output_dir="$3"
134
135 # Validate input directories
136 for dir in "$prod_dir" "$archive_dir"; do
137 if [[ ! -d "$dir" ]]; then
138 log_error "Directory not found: $dir"
139 exit 1
140 fi
141 if [[ ! -f "$dir/category1-complete-match.txt" ]]; then
142 log_error "Missing category1-complete-match.txt in $dir"
143 exit 1
144 fi
145 done
146
147 log_info "Comparing relay categories"
148 log_info "Prod: $prod_dir"
149 log_info "Archive: $archive_dir"
150 log_info "Output: $output_dir"
151
152 # Create output directory
153 mkdir -p "$output_dir"
154
155 # Create temp files for processing
156 local tmp_dir
157 tmp_dir=$(mktemp -d)
158 # shellcheck disable=SC2064
159 trap "rm -rf '$tmp_dir'" EXIT
160
161 log_info "Building lookup tables..."
162
163 # Build lookup tables: key|category|full_line
164 build_lookup "$prod_dir" "$tmp_dir/prod_lookup.txt"
165 build_lookup "$archive_dir" "$tmp_dir/archive_lookup.txt"
166
167 # Extract just keys for comparison
168 cut -d'|' -f1,2 "$tmp_dir/prod_lookup.txt" | sort -u > "$tmp_dir/prod_keys.txt"
169 cut -d'|' -f1,2 "$tmp_dir/archive_lookup.txt" | sort -u > "$tmp_dir/archive_keys.txt"
170
171 log_info "Comparing categories..."
172
173 # Initialize output files
174 > "$output_dir/complete-in-both.txt"
175 > "$output_dir/complete-prod-missing-archive.txt"
176 > "$output_dir/complete-prod-incomplete-archive.txt"
177 > "$output_dir/incomplete-in-both.txt"
178 > "$output_dir/in-archive-not-prod.txt"
179
180 # Process prod category 1 (complete) entries
181 while IFS='|' read -r repo npub cat full_line; do
182 key="${repo}|${npub}"
183
184 # Look up in archive
185 archive_entry=$(grep "^${key}|" "$tmp_dir/archive_lookup.txt" 2>/dev/null | head -1 || echo "")
186
187 if [[ -z "$archive_entry" ]]; then
188 # Not in archive at all
189 echo "$repo | $npub | prod=complete | archive=missing" >> "$output_dir/complete-prod-missing-archive.txt"
190 else
191 archive_cat=$(echo "$archive_entry" | cut -d'|' -f3)
192 if [[ "$archive_cat" == "cat1" ]]; then
193 # Complete in both
194 echo "$repo | $npub | prod=complete | archive=complete" >> "$output_dir/complete-in-both.txt"
195 else
196 # Complete in prod, incomplete in archive
197 echo "$repo | $npub | prod=complete | archive=$archive_cat" >> "$output_dir/complete-prod-incomplete-archive.txt"
198 fi
199 fi
200 done < <(grep '|cat1|' "$tmp_dir/prod_lookup.txt" | sed 's/|cat1|/|cat1|/')
201
202 # Process prod categories 2-4 (incomplete) entries
203 for cat in cat2 cat3 cat4; do
204 while IFS='|' read -r repo npub _ full_line; do
205 key="${repo}|${npub}"
206
207 # Look up in archive
208 archive_entry=$(grep "^${key}|" "$tmp_dir/archive_lookup.txt" 2>/dev/null | head -1 || echo "")
209
210 if [[ -z "$archive_entry" ]]; then
211 # Incomplete in prod, missing in archive
212 echo "$repo | $npub | prod=$cat | archive=missing" >> "$output_dir/incomplete-in-both.txt"
213 else
214 archive_cat=$(echo "$archive_entry" | cut -d'|' -f3)
215 if [[ "$archive_cat" != "cat1" ]]; then
216 # Incomplete in both
217 echo "$repo | $npub | prod=$cat | archive=$archive_cat" >> "$output_dir/incomplete-in-both.txt"
218 fi
219 # If archive is complete but prod is not, that's unusual but not an error
220 fi
221 done < <(grep "|${cat}|" "$tmp_dir/prod_lookup.txt")
222 done
223
224 # Find entries in archive but not in prod
225 comm -23 "$tmp_dir/archive_keys.txt" "$tmp_dir/prod_keys.txt" | while IFS='|' read -r repo npub; do
226 key="${repo}|${npub}"
227 archive_entry=$(grep "^${key}|" "$tmp_dir/archive_lookup.txt" 2>/dev/null | head -1 || echo "")
228 archive_cat=$(echo "$archive_entry" | cut -d'|' -f3)
229 echo "$repo | $npub | prod=missing | archive=$archive_cat" >> "$output_dir/in-archive-not-prod.txt"
230 done
231
232 # Count results
233 local count_both count_missing count_incomplete count_both_incomplete count_archive_only
234 count_both=$(wc -l < "$output_dir/complete-in-both.txt" | tr -d ' ')
235 count_missing=$(wc -l < "$output_dir/complete-prod-missing-archive.txt" | tr -d ' ')
236 count_incomplete=$(wc -l < "$output_dir/complete-prod-incomplete-archive.txt" | tr -d ' ')
237 count_both_incomplete=$(wc -l < "$output_dir/incomplete-in-both.txt" | tr -d ' ')
238 count_archive_only=$(wc -l < "$output_dir/in-archive-not-prod.txt" | tr -d ' ')
239
240 # Generate summary
241 cat > "$output_dir/summary.txt" << EOF
242# Relay Comparison Summary
243Generated: $(date -Iseconds)
244
245## Input
246- Prod: $prod_dir
247- Archive: $archive_dir
248
249## Results
250
251### No Action Required
252- Complete in both relays: $count_both
253
254### Action/Decision Required
255- Complete in prod, MISSING from archive: $count_missing
256- Complete in prod, INCOMPLETE in archive: $count_incomplete
257- Incomplete in BOTH relays: $count_both_incomplete
258
259### For Reference
260- In archive but not in prod: $count_archive_only
261
262## Files
263- complete-in-both.txt: Repos successfully migrated (no action)
264- complete-prod-missing-archive.txt: Need investigation - why not in archive?
265- complete-prod-incomplete-archive.txt: Archive sync may still be in progress
266- incomplete-in-both.txt: Git data incomplete on both relays
267- in-archive-not-prod.txt: May be deleted from prod or new to archive
268
269## Next Steps
2701. Review complete-prod-missing-archive.txt - these repos need attention
2712. Check if archive sync is still running for incomplete entries
2723. Cross-reference with deletion events (kind 5) from Phase 1
2734. Use Phase 4 logs to understand parse failures and purgatory expiry
274EOF
275
276 # Display summary
277 echo ""
278 log_info "=== Comparison Summary ==="
279 log_success "Complete in both: $count_both (no action needed)"
280 log_error "Complete in prod, MISSING from archive: $count_missing"
281 log_warn "Complete in prod, incomplete in archive: $count_incomplete"
282 log_warn "Incomplete in both: $count_both_incomplete"
283 log_info "In archive only: $count_archive_only"
284 echo ""
285 log_info "Output files:"
286 echo " $output_dir/complete-in-both.txt"
287 echo " $output_dir/complete-prod-missing-archive.txt"
288 echo " $output_dir/complete-prod-incomplete-archive.txt"
289 echo " $output_dir/incomplete-in-both.txt"
290 echo " $output_dir/in-archive-not-prod.txt"
291 echo " $output_dir/summary.txt"
292}
293
294main "$@"
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/22-compare-git-data.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/22-compare-git-data.sh
new file mode 100755
index 0000000..76521d4
--- /dev/null
+++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/22-compare-git-data.sh
@@ -0,0 +1,390 @@
1#!/usr/bin/env bash
2#
3# 22-compare-git-data.sh - Compare actual git data between prod and archive relays
4#
5# PHASE 3c of the GRASP relay to ngit-grasp migration analysis pipeline.
6# Compares actual git commits between prod and archive to determine which is ahead.
7#
8# KEY INSIGHT:
9# Archive (ngit-grasp) enforces GRASP - git data ALWAYS matches a state event.
10# If archive has different/newer data than prod, it means:
11# - A state event authorized those commits at some point
12# - Archive is actually MORE up-to-date than prod
13# - Migration should use archive data (it's already correct)
14#
15# USAGE:
16# ./22-compare-git-data.sh <prod-git-base> <archive-git-base> <repo-list> <output-dir>
17#
18# EXAMPLES:
19# ./22-compare-git-data.sh /var/lib/grasp-relay/git /var/lib/ngit-grasp/git \
20# output/comparison/complete-prod-incomplete-archive.txt output/comparison
21#
22# INPUT:
23# prod-git-base Base directory for prod git repos (e.g., /var/lib/grasp-relay/git)
24# archive-git-base Base directory for archive git repos (e.g., /var/lib/ngit-grasp/git)
25# repo-list File with repos to compare (format: "repo | npub | ...")
26#
27# OUTPUT:
28# <output-dir>/git-ancestry.tsv - Tab-separated values:
29# repo<TAB>npub<TAB>relationship<TAB>details
30#
31# Relationship values:
32# archive-ahead - Archive has all prod commits plus more (GOOD - use archive)
33# in-sync - Both have identical commits
34# prod-ahead - Prod has commits archive is missing (needs re-sync)
35# diverged - Both have unique commits (manual review)
36# archive-only - Only archive has git data
37# prod-only - Only prod has git data
38# both-empty - Neither has git data
39#
40# PREREQUISITES:
41# - git (for ref comparison)
42# - Read access to both git directories (may need sudo)
43#
44# RUNTIME: Depends on number of repos to compare
45#
46# SEE ALSO:
47# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide
48# 21-compare-relays.sh - Phase 3b script that identifies repos to compare
49#
50
51set -euo pipefail
52
53# Colors for output (disabled if not a terminal)
54if [[ -t 1 ]]; then
55 RED='\033[0;31m'
56 GREEN='\033[0;32m'
57 YELLOW='\033[0;33m'
58 BLUE='\033[0;34m'
59 NC='\033[0m'
60else
61 RED=''
62 GREEN=''
63 YELLOW=''
64 BLUE=''
65 NC=''
66fi
67
68log_info() {
69 echo -e "${BLUE}[INFO]${NC} $*" >&2
70}
71
72log_success() {
73 echo -e "${GREEN}[OK]${NC} $*" >&2
74}
75
76log_warn() {
77 echo -e "${YELLOW}[WARN]${NC} $*" >&2
78}
79
80log_error() {
81 echo -e "${RED}[ERROR]${NC} $*" >&2
82}
83
84log_progress() {
85 echo -ne "\r${BLUE}[PROGRESS]${NC} $*" >&2
86}
87
88usage() {
89 echo "Usage: $0 <prod-git-base> <archive-git-base> <repo-list> <output-dir>"
90 echo ""
91 echo "Arguments:"
92 echo " prod-git-base Base directory for prod git repos"
93 echo " archive-git-base Base directory for archive git repos"
94 echo " repo-list File with repos to compare (format: 'repo | npub | ...')"
95 echo " output-dir Directory to store output files"
96 echo ""
97 echo "Examples:"
98 echo " $0 /var/lib/grasp-relay/git /var/lib/ngit-grasp/git \\"
99 echo " output/comparison/complete-prod-incomplete-archive.txt output/comparison"
100 echo ""
101 echo "Output:"
102 echo " git-ancestry.tsv - TSV with: repo, npub, relationship, details"
103 exit 1
104}
105
106# Get all branch refs from a git directory
107# Args: $1=git_dir
108# Returns: sorted list of "ref_name commit_hash" lines
109get_git_refs() {
110 local git_dir="$1"
111
112 if [[ ! -d "$git_dir" ]]; then
113 return
114 fi
115
116 git --git-dir="$git_dir" show-ref --heads 2>/dev/null | sort || true
117}
118
119# Check if commit A is ancestor of commit B
120# Args: $1=git_dir, $2=commit_a, $3=commit_b
121# Returns: 0 if A is ancestor of B, 1 otherwise
122is_ancestor() {
123 local git_dir="$1"
124 local commit_a="$2"
125 local commit_b="$3"
126
127 git --git-dir="$git_dir" merge-base --is-ancestor "$commit_a" "$commit_b" 2>/dev/null
128}
129
130# Compare git data between prod and archive for a single repo
131# Args: $1=prod_git_dir, $2=archive_git_dir
132# Returns: relationship string
133compare_repo_git() {
134 local prod_git="$1"
135 local archive_git="$2"
136
137 local prod_exists=false
138 local archive_exists=false
139
140 [[ -d "$prod_git" ]] && prod_exists=true
141 [[ -d "$archive_git" ]] && archive_exists=true
142
143 # Handle cases where one or both don't exist
144 if [[ "$prod_exists" == "false" && "$archive_exists" == "false" ]]; then
145 echo "both-empty"
146 return
147 fi
148
149 if [[ "$prod_exists" == "false" ]]; then
150 echo "archive-only"
151 return
152 fi
153
154 if [[ "$archive_exists" == "false" ]]; then
155 echo "prod-only"
156 return
157 fi
158
159 # Both exist - get refs
160 local prod_refs archive_refs
161 prod_refs=$(get_git_refs "$prod_git")
162 archive_refs=$(get_git_refs "$archive_git")
163
164 # Handle empty refs
165 if [[ -z "$prod_refs" && -z "$archive_refs" ]]; then
166 echo "both-empty"
167 return
168 fi
169
170 if [[ -z "$prod_refs" ]]; then
171 echo "archive-only"
172 return
173 fi
174
175 if [[ -z "$archive_refs" ]]; then
176 echo "prod-only"
177 return
178 fi
179
180 # Compare refs - check if they're identical
181 if [[ "$prod_refs" == "$archive_refs" ]]; then
182 echo "in-sync"
183 return
184 fi
185
186 # Refs differ - need to check ancestry
187 # Strategy: For each branch, check if one is ancestor of the other
188 # If all archive branches are ahead of or equal to prod branches, archive is ahead
189 # If all prod branches are ahead of or equal to archive branches, prod is ahead
190 # Otherwise, they've diverged
191
192 local archive_ahead=true
193 local prod_ahead=true
194 local has_common_branch=false
195
196 # Create temporary file to use archive as reference repo for ancestry checks
197 # We need a repo that has both sets of commits to check ancestry
198 # Use archive since it's the target and should have the superset
199
200 # Check each prod branch against archive
201 while read -r prod_hash prod_ref; do
202 [[ -z "$prod_hash" ]] && continue
203
204 # Get the same branch from archive
205 local archive_hash
206 archive_hash=$(echo "$archive_refs" | grep " $prod_ref$" | awk '{print $1}' || echo "")
207
208 if [[ -z "$archive_hash" ]]; then
209 # Branch exists in prod but not archive - prod has something archive doesn't
210 # But this could be a deleted branch, so don't immediately say prod is ahead
211 continue
212 fi
213
214 has_common_branch=true
215
216 if [[ "$prod_hash" == "$archive_hash" ]]; then
217 # Same commit - neither ahead for this branch
218 continue
219 fi
220
221 # Different commits - check ancestry
222 # First, try to check if prod is ancestor of archive (archive ahead)
223 if is_ancestor "$archive_git" "$prod_hash" "$archive_hash" 2>/dev/null; then
224 # Prod commit is ancestor of archive commit - archive is ahead for this branch
225 prod_ahead=false
226 elif is_ancestor "$archive_git" "$archive_hash" "$prod_hash" 2>/dev/null; then
227 # Archive commit is ancestor of prod commit - prod is ahead for this branch
228 archive_ahead=false
229 else
230 # Neither is ancestor - diverged
231 archive_ahead=false
232 prod_ahead=false
233 fi
234 done <<< "$prod_refs"
235
236 # Also check for branches only in archive (archive has extra branches)
237 while read -r archive_hash archive_ref; do
238 [[ -z "$archive_hash" ]] && continue
239
240 local prod_hash
241 prod_hash=$(echo "$prod_refs" | grep " $archive_ref$" | awk '{print $1}' || echo "")
242
243 if [[ -z "$prod_hash" ]]; then
244 # Branch exists in archive but not prod - archive has something prod doesn't
245 # This means archive is ahead (has extra branches)
246 prod_ahead=false
247 fi
248 done <<< "$archive_refs"
249
250 # Determine final relationship
251 if [[ "$has_common_branch" == "false" ]]; then
252 # No common branches - completely different
253 echo "diverged"
254 return
255 fi
256
257 if [[ "$archive_ahead" == "true" && "$prod_ahead" == "false" ]]; then
258 echo "archive-ahead"
259 elif [[ "$prod_ahead" == "true" && "$archive_ahead" == "false" ]]; then
260 echo "prod-ahead"
261 elif [[ "$archive_ahead" == "true" && "$prod_ahead" == "true" ]]; then
262 # Both true means all common branches are identical
263 # But one might have extra branches
264 echo "in-sync"
265 else
266 echo "diverged"
267 fi
268}
269
270# Main
271main() {
272 if [[ $# -ne 4 ]]; then
273 usage
274 fi
275
276 local prod_git_base="$1"
277 local archive_git_base="$2"
278 local repo_list="$3"
279 local output_dir="$4"
280
281 # Validate inputs
282 if [[ ! -d "$prod_git_base" ]]; then
283 log_error "Prod git base directory not found: $prod_git_base"
284 exit 1
285 fi
286
287 if [[ ! -d "$archive_git_base" ]]; then
288 log_error "Archive git base directory not found: $archive_git_base"
289 exit 1
290 fi
291
292 if [[ ! -f "$repo_list" ]]; then
293 log_error "Repo list file not found: $repo_list"
294 exit 1
295 fi
296
297 log_info "=== Git Data Comparison ==="
298 log_info "Prod git base: $prod_git_base"
299 log_info "Archive git base: $archive_git_base"
300 log_info "Repo list: $repo_list"
301 log_info "Output: $output_dir"
302 log_info "Started: $(date)"
303 echo ""
304
305 # Create output directory
306 mkdir -p "$output_dir"
307
308 # Output file
309 local tsv_file="$output_dir/git-ancestry.tsv"
310
311 # Initialize TSV with header
312 echo -e "repo\tnpub\trelationship\tdetails" > "$tsv_file"
313
314 # Count repos
315 local total_repos
316 total_repos=$(grep -c -v '^#' "$repo_list" 2>/dev/null || echo "0")
317 log_info "Processing $total_repos repos..."
318 echo ""
319
320 # Counters
321 local count=0
322 local count_archive_ahead=0
323 local count_in_sync=0
324 local count_prod_ahead=0
325 local count_diverged=0
326 local count_archive_only=0
327 local count_prod_only=0
328 local count_both_empty=0
329
330 # Process each repo
331 while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do
332 # Skip comments and empty lines
333 [[ "$repo" =~ ^# ]] && continue
334 [[ -z "$repo" ]] && continue
335
336 # Clean up whitespace
337 repo="${repo// /}"
338 npub="${npub// /}"
339
340 [[ -z "$repo" || -z "$npub" ]] && continue
341
342 count=$((count + 1))
343
344 # Build git paths
345 local prod_git="$prod_git_base/${npub}/${repo}.git"
346 local archive_git="$archive_git_base/${npub}/${repo}.git"
347
348 # Compare
349 local relationship details=""
350 relationship=$(compare_repo_git "$prod_git" "$archive_git")
351
352 # Count by relationship
353 case "$relationship" in
354 archive-ahead) count_archive_ahead=$((count_archive_ahead + 1)) ;;
355 in-sync) count_in_sync=$((count_in_sync + 1)) ;;
356 prod-ahead) count_prod_ahead=$((count_prod_ahead + 1)) ;;
357 diverged) count_diverged=$((count_diverged + 1)) ;;
358 archive-only) count_archive_only=$((count_archive_only + 1)) ;;
359 prod-only) count_prod_only=$((count_prod_only + 1)) ;;
360 both-empty) count_both_empty=$((count_both_empty + 1)) ;;
361 esac
362
363 # Output TSV line
364 printf '%s\t%s\t%s\t%s\n' "$repo" "$npub" "$relationship" "$details" >> "$tsv_file"
365
366 # Progress indicator every 10 repos
367 if [[ $((count % 10)) -eq 0 ]]; then
368 log_progress "Processed $count/$total_repos repos..."
369 fi
370 done < "$repo_list"
371
372 # Clear progress line
373 echo "" >&2
374
375 # Summary
376 echo ""
377 log_info "=== Comparison Summary ==="
378 log_success "Archive ahead (use archive data): $count_archive_ahead"
379 log_success "In sync: $count_in_sync"
380 log_warn "Prod ahead (needs re-sync): $count_prod_ahead"
381 log_error "Diverged (manual review): $count_diverged"
382 log_info "Archive only: $count_archive_only"
383 log_info "Prod only: $count_prod_only"
384 log_info "Both empty: $count_both_empty"
385 echo ""
386 log_info "Total: $count repos"
387 log_info "Output: $tsv_file"
388}
389
390main "$@"
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/30-extract-parse-failures.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/30-extract-parse-failures.sh
new file mode 100755
index 0000000..d762aae
--- /dev/null
+++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/30-extract-parse-failures.sh
@@ -0,0 +1,774 @@
1#!/usr/bin/env bash
2#
3# 30-extract-parse-failures.sh - Extract parse failure events from systemd logs
4#
5# PHASE 4a of the GRASP relay to ngit-grasp migration analysis pipeline.
6# Extracts structured [PARSE_FAIL] log entries AND "Invalid announcement"
7# rejections from journalctl.
8#
9# USAGE:
10# ./30-extract-parse-failures.sh <service-name> <output-dir> [options]
11#
12# EXAMPLES:
13# # Extract from ngit-grasp service (last 30 days, default)
14# ./30-extract-parse-failures.sh ngit-grasp.service output/logs
15#
16# # Extract with custom time range
17# ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-01"
18#
19# # Extract from specific time window
20# ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-15" --until "2026-01-22"
21#
22# OPTIONS:
23# --since <date> Start date for log extraction (default: 30 days ago)
24# --until <date> End date for log extraction (default: now)
25# --dry-run Show what would be extracted without writing files
26#
27# ENRICHMENT:
28# The script automatically enriches parse failures with repo/npub information
29# by extracting from "Added rejected announcement" log entries which include
30# pubkey and identifier fields. Hex pubkeys are converted to npub format using
31# `nak encode npub <hex-pubkey>` if the nak tool is available.
32#
33# OUTPUT:
34# <output-dir>/parse-failures.txt
35#
36# OUTPUT FORMAT (TSV):
37# event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub
38#
39# EXPECTED LOG FORMATS:
40# The script looks for three types of log entries:
41#
42# 1. Structured [PARSE_FAIL] entries:
43# 2026-01-22T10:30:45Z ngit-grasp[1234]: [PARSE_FAIL] kind=30618 event_id=abc123... reason="invalid refs format" repo=myrepo npub=npub1...
44#
45# 2. "Invalid announcement" rejections (write policy):
46# Event rejected by write policy event_id=abc123... relay=wss://... kind=30617 reason=Invalid announcement: multiple clone tags found...
47#
48# 3. "Added rejected announcement" entries (for enrichment):
49# Added rejected announcement to two-tier index event_id=abc123... kind=30617 identifier=myrepo pubkey=hex...
50# These entries provide pubkey and identifier for enriching write policy rejections.
51#
52# NOTE: Builder logs ("Rejected repository announcement note1xxx:") are NOT extracted
53# because they use bech32 (note1) IDs while write policy logs use hex IDs. Extracting
54# both would cause double-counting since deduplication only works within each format.
55# Write policy logs contain the same events, so we don't lose any data.
56#
57# Required fields: kind, event_id, reason
58# Enrichment fields: repo (identifier), npub (converted from hex pubkey)
59#
60# DEPENDENCY:
61# This script requires logging improvements in ngit-grasp to emit structured
62# [PARSE_FAIL] log entries. Until those are implemented, this script will
63# find no matching entries (which is handled gracefully).
64#
65# "Invalid announcement" rejections are logged by the write policy and
66# should be present in any ngit-grasp deployment.
67#
68# See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)
69#
70# Expected Rust logging code for [PARSE_FAIL]:
71# tracing::warn!(
72# target: "migration",
73# "[PARSE_FAIL] kind={} event_id={} reason=\"{}\" repo={} npub={}",
74# event.kind, event.id, reason, identifier, npub
75# );
76#
77# PREREQUISITES:
78# - journalctl (systemd)
79# - grep, awk, sed (standard Unix tools)
80# - Access to systemd journal (may require sudo or journal group membership)
81#
82# RUNTIME: Depends on log volume, typically < 30 seconds
83#
84# SEE ALSO:
85# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide
86# 31-extract-purgatory-expiry.sh - Companion script for purgatory expiry logs
87#
88
89set -euo pipefail
90
91# Get script directory for sourcing helpers
92SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
93
94# Source the service validation helper
95if [[ -f "$SCRIPT_DIR/validate-service.sh" ]]; then
96 source "$SCRIPT_DIR/validate-service.sh"
97fi
98
99# Colors for output (disabled if not a terminal)
100if [[ -t 1 ]]; then
101 RED='\033[0;31m'
102 GREEN='\033[0;32m'
103 YELLOW='\033[0;33m'
104 BLUE='\033[0;34m'
105 NC='\033[0m'
106else
107 RED=''
108 GREEN=''
109 YELLOW=''
110 BLUE=''
111 NC=''
112fi
113
114log_info() {
115 echo -e "${BLUE}[INFO]${NC} $*" >&2
116}
117
118log_success() {
119 echo -e "${GREEN}[OK]${NC} $*" >&2
120}
121
122log_warn() {
123 echo -e "${YELLOW}[WARN]${NC} $*" >&2
124}
125
126log_error() {
127 echo -e "${RED}[ERROR]${NC} $*" >&2
128}
129
130usage() {
131 echo "Usage: $0 <service-name> <output-dir> [options]"
132 echo ""
133 echo "Arguments:"
134 echo " service-name Systemd service name (e.g., ngit-grasp.service)"
135 echo " output-dir Directory to store extracted log data"
136 echo ""
137 echo "Options:"
138 echo " --since <date> Start date (default: 30 days ago)"
139 echo " --until <date> End date (default: now)"
140 echo " --dry-run Show what would be extracted without writing"
141 echo ""
142 echo "Examples:"
143 echo " $0 ngit-grasp.service output/logs"
144 echo " $0 ngit-grasp.service output/logs --since '2026-01-01'"
145 echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'"
146 echo ""
147 echo "Expected log formats:"
148 echo " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..."
149 echo " Event rejected by write policy event_id=abc123 ... kind=30617 reason=Invalid announcement: ..."
150 echo ""
151 echo "Enrichment:"
152 echo " Parse failures are automatically enriched with repo/npub from"
153 echo " 'Added rejected announcement' log entries. Hex pubkeys are converted"
154 echo " to npub format using 'nak encode npub' if available."
155 exit 1
156}
157
158# =============================================================================
159# AWK-BASED BATCH PARSING FUNCTIONS
160# =============================================================================
161# These functions use awk for efficient batch processing instead of per-line
162# grep calls. This provides ~400x speedup for large log files.
163#
164# NOTE: parse_builder_rejection_line() was removed to fix double-counting bug.
165# Builder logs use bech32 (note1) IDs while write policy logs use hex IDs.
166# Since deduplication only works within each format, extracting both caused
167# the same event to be counted twice. Write policy logs contain the same
168# events, so we don't lose any data by only extracting from that source.
169
170# Parse [PARSE_FAIL] log lines in batch using awk
171# Input: file containing log lines with [PARSE_FAIL]
172# Output: TSV lines: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub
173parse_parse_fail_batch() {
174 local input_file="$1"
175 awk '
176 {
177 # Extract kind=VALUE
178 kind = ""
179 if (match($0, /kind=([0-9]+)/, m)) kind = m[1]
180
181 # Extract event_id=VALUE (hex string)
182 event_id = ""
183 if (match($0, /event_id=([a-f0-9]+)/, m)) event_id = m[1]
184
185 # Extract reason="VALUE" (quoted string)
186 reason = ""
187 if (match($0, /reason="([^"]*)"/, m)) reason = m[1]
188
189 # Extract repo=VALUE (optional)
190 repo = ""
191 if (match($0, /repo=([^ ]+)/, m)) repo = m[1]
192
193 # Extract npub=VALUE (optional)
194 npub = ""
195 if (match($0, /npub=([^ ]+)/, m)) npub = m[1]
196
197 # Output if we have required fields
198 if (kind != "" && event_id != "" && reason != "") {
199 print event_id "\t" kind "\t" reason "\t" repo "\t" npub
200 }
201 }
202 ' "$input_file"
203}
204
205# Parse "Invalid announcement" rejection log lines in batch using awk
206# Input: file containing "Event rejected by write policy" log lines
207# Output: TSV lines: event_id<TAB>kind<TAB>reason<TAB><empty><TAB><empty>
208parse_write_policy_rejection_batch() {
209 local input_file="$1"
210 awk '
211 {
212 # Extract event_id=VALUE (hex string)
213 event_id = ""
214 if (match($0, /event_id=([a-f0-9]+)/, m)) event_id = m[1]
215
216 # Extract kind=VALUE
217 kind = ""
218 if (match($0, /kind=([0-9]+)/, m)) kind = m[1]
219
220 # Extract reason=VALUE (everything after "reason=")
221 reason = ""
222 if (match($0, /reason=(.*)$/, m)) reason = m[1]
223
224 # Output if we have required fields (repo and npub are empty)
225 if (kind != "" && event_id != "" && reason != "") {
226 print event_id "\t" kind "\t" reason "\t\t"
227 }
228 }
229 ' "$input_file"
230}
231
232# Parse "Added rejected announcement" log lines in batch using awk
233# Input: file containing "Added rejected announcement to two-tier index" log lines
234# Output: TSV lines: event_id<TAB>identifier<TAB>pubkey_hex
235parse_rejected_announcement_batch() {
236 local input_file="$1"
237 awk '
238 {
239 # Extract event_id=VALUE (hex string)
240 event_id = ""
241 if (match($0, /event_id=([a-f0-9]+)/, m)) event_id = m[1]
242
243 # Extract identifier=VALUE (repo name)
244 identifier = ""
245 if (match($0, /identifier=([^ ]+)/, m)) identifier = m[1]
246
247 # Extract pubkey=VALUE (hex string)
248 pubkey = ""
249 if (match($0, /pubkey=([a-f0-9]+)/, m)) pubkey = m[1]
250
251 # Output if we have all required fields
252 if (event_id != "" && identifier != "" && pubkey != "") {
253 print event_id "\t" identifier "\t" pubkey
254 }
255 }
256 ' "$input_file"
257}
258
259# Enrich parse failures with repo/npub by looking up event_id in "Added rejected announcement" log entries
260# This is critical because "Invalid announcement" rejections only log event_id and kind,
261# not the repo name or npub. Without enrichment, Phase 5 shows event_id|kind instead
262# of repo|npub in action-required.txt, making the output unusable.
263#
264# Arguments:
265# $1 - parse failures file to enrich (modified in place)
266# $2 - lookup file containing event_id -> identifier|pubkey mappings from logs
267#
268# The function:
269# 1. Uses the lookup table built from "Added rejected announcement" log entries
270# 2. For each parse failure with empty repo/npub, looks up the event_id
271# 3. Populates repo and npub columns from the lookup
272# 4. Converts hex pubkeys to npub format using `nak encode npub` if available
273#
274# OPTIMIZATION: This function uses batch processing for efficiency:
275# - Uses awk for O(n) join instead of per-line grep (O(n*m))
276# - Batches all pubkey->npub conversions in a single nak call
277# - This reduces runtime from minutes to seconds for large datasets
278enrich_with_repo_npub() {
279 local parse_failures_file="$1"
280 local lookup_file="$2"
281
282 # Validate lookup file exists and has content
283 if [[ ! -f "$lookup_file" ]] || [[ ! -s "$lookup_file" ]]; then
284 log_warn "No enrichment data available - repo/npub columns will remain empty"
285 return 0
286 fi
287
288 log_info "Enriching parse failures with repo/npub from log entries..."
289
290 # Check if we have nak for pubkey->npub conversion
291 local can_convert_npub=false
292 if command -v nak &> /dev/null; then
293 can_convert_npub=true
294 log_info " Using 'nak' for pubkey->npub conversion"
295 else
296 log_warn " 'nak' not found - will use hex pubkeys instead of npub"
297 fi
298
299 local lookup_count
300 lookup_count=$(wc -l < "$lookup_file")
301 lookup_count="${lookup_count//[^0-9]/}"
302 log_info " Lookup table has $lookup_count entries"
303
304 # STEP 1: Extract unique pubkeys that need conversion
305 # Get pubkeys from lookup file (column 3), deduplicate
306 local unique_pubkeys_file npub_map_file
307 unique_pubkeys_file=$(mktemp)
308 npub_map_file=$(mktemp)
309
310 cut -f3 "$lookup_file" | sort -u > "$unique_pubkeys_file"
311 local unique_pubkey_count
312 unique_pubkey_count=$(wc -l < "$unique_pubkeys_file")
313 unique_pubkey_count="${unique_pubkey_count//[^0-9]/}"
314 log_info " Converting $unique_pubkey_count unique pubkeys to npub format..."
315
316 # STEP 2: Batch convert all pubkeys to npub in a single nak call
317 # nak reads hex pubkeys from stdin (one per line) and outputs npubs
318 if [[ "$can_convert_npub" == true && "$unique_pubkey_count" -gt 0 ]]; then
319 # Create mapping file: pubkey_hex<TAB>npub
320 # nak encode npub reads from stdin and outputs one npub per line
321 paste "$unique_pubkeys_file" <(nak encode npub < "$unique_pubkeys_file" 2>/dev/null) > "$npub_map_file" || {
322 # Fallback: if batch conversion fails, use hex pubkeys
323 log_warn " Batch npub conversion failed, using hex pubkeys"
324 awk '{print $1 "\t" $1}' "$unique_pubkeys_file" > "$npub_map_file"
325 }
326 else
327 # No nak available, use hex pubkeys as-is
328 awk '{print $1 "\t" $1}' "$unique_pubkeys_file" > "$npub_map_file"
329 fi
330
331 rm -f "$unique_pubkeys_file"
332
333 # STEP 3: Use awk for efficient join (O(n) instead of O(n*m) grep per line)
334 # This joins parse_failures with lookup_file on event_id, then with npub_map on pubkey
335 local enriched_file
336 enriched_file=$(mktemp)
337
338 # Copy header lines
339 grep '^#' "$parse_failures_file" > "$enriched_file" 2>/dev/null || true
340
341 # Use awk to perform the join efficiently
342 # Input files (order matters for ARGIND):
343 # 1. npub_map_file: pubkey_hex<TAB>npub
344 # 2. lookup_file: event_id<TAB>identifier<TAB>pubkey_hex
345 # 3. parse_failures_file: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub
346 awk -F'\t' -v OFS='\t' '
347 # Track which file we are processing
348 FNR==1 { file_num++ }
349
350 # First file: npub_map (pubkey_hex -> npub)
351 file_num==1 {
352 npub_map[$1] = $2
353 next
354 }
355 # Second file: lookup (event_id -> identifier, pubkey_hex)
356 file_num==2 {
357 lookup_repo[$1] = $2
358 lookup_pubkey[$1] = $3
359 next
360 }
361 # Third file: parse_failures
362 /^#/ { next } # Skip headers (already copied)
363 {
364 event_id = $1
365 kind = $2
366 reason = $3
367 repo = $4
368 npub = $5
369
370 # If repo/npub empty, try to enrich from lookup
371 if (repo == "" && event_id in lookup_repo) {
372 repo = lookup_repo[event_id]
373 }
374 if (npub == "" && event_id in lookup_pubkey) {
375 pubkey = lookup_pubkey[event_id]
376 if (pubkey in npub_map) {
377 npub = npub_map[pubkey]
378 } else {
379 npub = pubkey # Fallback to hex
380 }
381 }
382
383 print event_id, kind, reason, repo, npub
384 }
385 ' "$npub_map_file" "$lookup_file" "$parse_failures_file" >> "$enriched_file"
386
387 rm -f "$npub_map_file"
388
389 # Count enriched entries
390 local enriched_count total_count
391 total_count=$(grep -v '^#' "$parse_failures_file" | wc -l)
392 total_count="${total_count//[^0-9]/}"
393 # Count entries that have non-empty repo AND npub after enrichment
394 enriched_count=$(grep -v '^#' "$enriched_file" | awk -F'\t' '$4 != "" && $5 != ""' | wc -l)
395 enriched_count="${enriched_count//[^0-9]/}"
396
397 # Replace original with enriched version
398 mv "$enriched_file" "$parse_failures_file"
399
400 log_info " Enriched $enriched_count of $total_count parse failures with repo/npub"
401 log_success "Enrichment complete"
402}
403
404# Parse "Added rejected announcement" log entries to build enrichment lookup table
405# Input: log line containing "Added rejected announcement to two-tier index"
406# Output: TSV line: event_id<TAB>identifier<TAB>pubkey_hex
407parse_rejected_announcement_line() {
408 local line="$1"
409
410 local event_id identifier pubkey_hex
411
412 # Extract event_id=VALUE (hex string)
413 event_id=$(echo "$line" | grep -oP 'event_id=\K[a-f0-9]+' || echo "")
414
415 # Extract identifier=VALUE (repo name)
416 identifier=$(echo "$line" | grep -oP 'identifier=\K[^ ]+' || echo "")
417
418 # Extract pubkey=VALUE (hex string)
419 pubkey_hex=$(echo "$line" | grep -oP 'pubkey=\K[a-f0-9]+' || echo "")
420
421 # Only output if we have all required fields
422 if [[ -n "$event_id" && -n "$identifier" && -n "$pubkey_hex" ]]; then
423 printf '%s\t%s\t%s\n' "$event_id" "$identifier" "$pubkey_hex"
424 fi
425}
426
427# Main
428main() {
429 if [[ $# -lt 2 ]]; then
430 usage
431 fi
432
433 local service="$1"
434 local output_dir="$2"
435 shift 2
436
437 # Default time range: last 30 days
438 local since_date
439 since_date=$(date -d "30 days ago" "+%Y-%m-%d" 2>/dev/null || date -v-30d "+%Y-%m-%d" 2>/dev/null || echo "")
440 local until_date=""
441 local dry_run=false
442
443 # Parse options
444 while [[ $# -gt 0 ]]; do
445 case "$1" in
446 --since)
447 since_date="$2"
448 shift 2
449 ;;
450 --until)
451 until_date="$2"
452 shift 2
453 ;;
454 --dry-run)
455 dry_run=true
456 shift
457 ;;
458 *)
459 log_error "Unknown option: $1"
460 usage
461 ;;
462 esac
463 done
464
465 # Validate service name format
466 if [[ ! "$service" =~ \.service$ ]]; then
467 service="${service}.service"
468 fi
469
470 # Validate service is appropriate for structured logging
471 # This prevents the common mistake of using ngit-relay instead of ngit-grasp
472 if type validate_service_for_structured_logging &>/dev/null; then
473 # Use non-interactive mode if not a terminal, skip log check (we'll do our own)
474 local interactive="true"
475 [[ ! -t 0 ]] && interactive="false"
476
477 if ! validate_service_for_structured_logging "$service" "false" "$interactive"; then
478 log_error "Service validation failed. Use an ngit-grasp service for structured logging."
479 exit 1
480 fi
481 else
482 # Fallback validation if helper not available
483 if [[ "$service" == *"ngit-relay"* ]]; then
484 log_error "Service name appears to be ngit-relay: $service"
485 log_error "Structured logging ([PARSE_FAIL]) only exists in ngit-grasp services."
486 log_error "Please use the ngit-grasp archive service instead."
487 log_error ""
488 log_error "To find the correct service:"
489 log_error " systemctl list-units 'ngit-grasp*' --all"
490 exit 1
491 fi
492 fi
493
494 log_info "Extracting parse failures from systemd logs"
495 log_info "Service: $service"
496 log_info "Output: $output_dir"
497 log_info "Time range: ${since_date:-beginning} to ${until_date:-now}"
498
499 # Check if journalctl is available
500 if ! command -v journalctl &> /dev/null; then
501 log_error "journalctl not found. This script requires systemd."
502 exit 1
503 fi
504
505 # Validate service exists (check if journalctl can find any logs for it)
506 # Note: We don't require the service to be running, just that it has logs
507 if ! journalctl --no-pager -u "$service" -n 1 &>/dev/null; then
508 log_warn "Could not query logs for service: $service"
509 log_warn "This may indicate the service doesn't exist or you lack permissions."
510 log_warn ""
511 log_warn "To list available ngit-grasp services:"
512 log_warn " systemctl list-units 'ngit-grasp*' --all"
513 log_warn " journalctl --list-boots # Check if you have journal access"
514 log_warn ""
515 # Continue anyway - the service might exist but have no logs yet
516 fi
517
518 # Build journalctl command
519 local journal_cmd="journalctl -u $service --no-pager -o short-iso"
520
521 if [[ -n "$since_date" ]]; then
522 journal_cmd="$journal_cmd --since '$since_date'"
523 fi
524
525 if [[ -n "$until_date" ]]; then
526 journal_cmd="$journal_cmd --until '$until_date'"
527 fi
528
529 log_info "Running: $journal_cmd | grep '[PARSE_FAIL]' or 'Invalid announcement'"
530
531 if [[ "$dry_run" == true ]]; then
532 log_info "[DRY RUN] Would extract to: $output_dir/parse-failures.txt"
533
534 # Show sample of what would be extracted
535 log_info "Checking for matching log entries..."
536 local parse_fail_count invalid_announcement_count
537 parse_fail_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0")
538 parse_fail_count="${parse_fail_count//[^0-9]/}" # Strip non-numeric characters
539 parse_fail_count="${parse_fail_count:-0}"
540
541 invalid_announcement_count=$(eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep -c 'Invalid announcement' || echo "0")
542 invalid_announcement_count="${invalid_announcement_count//[^0-9]/}"
543 invalid_announcement_count="${invalid_announcement_count:-0}"
544
545 log_info "Found $parse_fail_count [PARSE_FAIL] entries"
546 log_info "Found $invalid_announcement_count 'Invalid announcement' rejections"
547
548 if [[ "$parse_fail_count" -eq 0 && "$invalid_announcement_count" -eq 0 ]]; then
549 log_warn "No matching entries found in logs."
550 log_warn "This is expected if ngit-grasp logging improvements are not yet deployed."
551 log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)"
552 fi
553
554 exit 0
555 fi
556
557 # Create output directory
558 mkdir -p "$output_dir"
559
560 local output_file="$output_dir/parse-failures.txt"
561 local temp_file
562 temp_file=$(mktemp)
563
564 # Extract and parse log entries using streaming (avoids loading all logs into memory)
565 log_info "Extracting log entries..."
566
567 # Create temp files for intermediate results
568 local temp_stderr temp_parse_fail temp_write_policy_rejection temp_rejected_announcement
569 temp_stderr=$(mktemp)
570 temp_parse_fail=$(mktemp)
571 temp_write_policy_rejection=$(mktemp)
572 temp_rejected_announcement=$(mktemp)
573
574 # Extract [PARSE_FAIL] entries directly to temp file (streaming)
575 log_info " Searching for [PARSE_FAIL] entries..."
576 eval "$journal_cmd" 2>"$temp_stderr" | grep '\[PARSE_FAIL\]' > "$temp_parse_fail" || true
577
578 local journal_stderr
579 journal_stderr=$(cat "$temp_stderr" 2>/dev/null || true)
580 if [[ -n "$journal_stderr" ]]; then
581 log_warn "journalctl reported: $journal_stderr"
582 fi
583
584 # Extract "Event rejected by write policy" with "Invalid announcement" (streaming)
585 # NOTE: We only extract from write policy logs (hex IDs), not builder logs (note1 IDs)
586 # to avoid double-counting. Both log sources contain the same events.
587 log_info " Searching for write policy rejections..."
588 eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep 'Invalid announcement' > "$temp_write_policy_rejection" || true
589
590 # Extract "Added rejected announcement" entries for enrichment (streaming)
591 # These contain pubkey and identifier which we use to enrich write policy rejections
592 log_info " Searching for rejected announcement entries (for enrichment)..."
593 eval "$journal_cmd" 2>/dev/null | grep 'Added rejected announcement to two-tier index' > "$temp_rejected_announcement" || true
594
595 rm -f "$temp_stderr"
596
597 # Check if we found anything
598 local parse_fail_line_count write_policy_line_count rejected_announcement_line_count
599 parse_fail_line_count=$(wc -l < "$temp_parse_fail")
600 parse_fail_line_count="${parse_fail_line_count//[^0-9]/}"
601 write_policy_line_count=$(wc -l < "$temp_write_policy_rejection")
602 write_policy_line_count="${write_policy_line_count//[^0-9]/}"
603 rejected_announcement_line_count=$(wc -l < "$temp_rejected_announcement")
604 rejected_announcement_line_count="${rejected_announcement_line_count//[^0-9]/}"
605
606 log_info " Found $parse_fail_line_count [PARSE_FAIL] log lines"
607 log_info " Found $write_policy_line_count write policy rejection log lines"
608 log_info " Found $rejected_announcement_line_count rejected announcement log lines (for enrichment)"
609
610 local total_invalid_announcement_lines=$write_policy_line_count
611
612 if [[ "$parse_fail_line_count" -eq 0 && "$total_invalid_announcement_lines" -eq 0 ]]; then
613 log_warn "No matching entries found in logs."
614 log_warn ""
615 log_warn "This is expected if ngit-grasp logging improvements are not yet deployed."
616 log_warn "The script looks for:"
617 log_warn ""
618 log_warn " 1. [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..."
619 log_warn " 2. Event rejected by write policy event_id=... kind=30617 reason=Invalid announcement: ..."
620 log_warn ""
621 log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)"
622 log_warn ""
623
624 # Create empty output file with header comment
625 {
626 echo "# Parse failures and invalid announcements extracted from $service"
627 echo "# Time range: ${since_date:-beginning} to ${until_date:-now}"
628 echo "# Extracted: $(date -Iseconds)"
629 echo "#"
630 echo "# Includes:"
631 echo "# - [PARSE_FAIL] structured log entries"
632 echo "# - \"Invalid announcement\" rejections"
633 echo "#"
634 echo "# Format: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub"
635 echo "# Note: repo and npub may be empty for some entries"
636 echo "#"
637 echo "# NOTE: No matching entries found."
638 echo "# This is expected if ngit-grasp logging improvements are not yet deployed."
639 } > "$output_file"
640
641 rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_rejected_announcement"
642 log_info "Created empty output file: $output_file"
643 exit 0
644 fi
645
646 # Write header
647 {
648 echo "# Parse failures and invalid announcements extracted from $service"
649 echo "# Time range: ${since_date:-beginning} to ${until_date:-now}"
650 echo "# Extracted: $(date -Iseconds)"
651 echo "#"
652 echo "# Includes:"
653 echo "# - [PARSE_FAIL] structured log entries"
654 echo "# - \"Invalid announcement\" rejections"
655 echo "#"
656 echo "# Format: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub"
657 echo "# Note: repo and npub may be empty for some entries"
658 } > "$output_file"
659
660 # Parse [PARSE_FAIL] entries using batch awk processing
661 log_info " Parsing [PARSE_FAIL] entries..."
662 local parse_fail_count=0
663 if [[ "$parse_fail_line_count" -gt 0 ]]; then
664 parse_parse_fail_batch "$temp_parse_fail" >> "$output_file"
665 parse_fail_count=$(grep -v '^#' "$output_file" | wc -l)
666 parse_fail_count="${parse_fail_count//[^0-9]/}"
667 fi
668
669 # Parse write policy rejection entries using batch awk processing
670 log_info " Parsing write policy rejection entries..."
671 local write_policy_count=0
672 if [[ "$write_policy_line_count" -gt 0 ]]; then
673 local before_count
674 before_count=$(grep -v '^#' "$output_file" 2>/dev/null | wc -l || echo "0")
675 before_count="${before_count//[^0-9]/}"
676 before_count="${before_count:-0}"
677 parse_write_policy_rejection_batch "$temp_write_policy_rejection" >> "$output_file"
678 local after_count
679 after_count=$(grep -v '^#' "$output_file" 2>/dev/null | wc -l || echo "0")
680 after_count="${after_count//[^0-9]/}"
681 after_count="${after_count:-0}"
682 write_policy_count=$((after_count - before_count))
683 fi
684
685 local invalid_announcement_count=$write_policy_count
686
687 # Build enrichment lookup table from "Added rejected announcement" entries
688 local enrichment_lookup_file
689 enrichment_lookup_file=$(mktemp)
690
691 log_info " Building enrichment lookup table..."
692 if [[ "$rejected_announcement_line_count" -gt 0 ]]; then
693 parse_rejected_announcement_batch "$temp_rejected_announcement" > "$enrichment_lookup_file"
694 fi
695
696 rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_rejected_announcement"
697
698 # Deduplicate by event_id (first column) - keep first occurrence
699 log_info " Deduplicating entries..."
700 local deduped_file
701 deduped_file=$(mktemp)
702 # Preserve header lines (starting with #) and deduplicate data lines
703 grep '^#' "$output_file" > "$deduped_file"
704 grep -v '^#' "$output_file" | sort -t$'\t' -k1,1 -u >> "$deduped_file"
705 mv "$deduped_file" "$output_file"
706
707 # Deduplicate enrichment lookup table by event_id
708 if [[ -s "$enrichment_lookup_file" ]]; then
709 sort -t$'\t' -k1,1 -u "$enrichment_lookup_file" > "$enrichment_lookup_file.deduped"
710 mv "$enrichment_lookup_file.deduped" "$enrichment_lookup_file"
711 fi
712
713 # Enrich with repo/npub from "Added rejected announcement" log entries
714 # This is critical for usability - without it, action-required.txt shows
715 # event_id|kind instead of repo|npub, making parse failures unidentifiable
716 enrich_with_repo_npub "$output_file" "$enrichment_lookup_file"
717
718 rm -f "$enrichment_lookup_file"
719
720 # Count final entries (excluding header lines)
721 local count
722 count=$(grep -v '^#' "$output_file" | wc -l)
723 count="${count//[^0-9]/}" # Strip whitespace
724 count="${count:-0}"
725
726 rm -f "$temp_file"
727
728 # Summary
729 echo ""
730 log_info "=== Extraction Summary ==="
731 log_info "Service: $service"
732 log_info "Time range: ${since_date:-beginning} to ${until_date:-now}"
733 log_success "Extracted $count total entries"
734 log_info " - [PARSE_FAIL] entries: $parse_fail_count"
735 log_info " - Invalid announcement rejections: $invalid_announcement_count"
736 echo ""
737 log_info "Output file: $output_file"
738
739 if [[ $count -gt 0 ]]; then
740 echo ""
741 log_info "Sample entries (first 5):"
742 # Use a subshell to avoid SIGPIPE issues with set -e
743 # New format: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub
744 (grep -v '^#' "$output_file" | head -5 | while IFS=$'\t' read -r event_id kind reason repo npub; do
745 echo " kind=$kind event_id=${event_id:0:16}... reason=\"${reason:0:60}...\""
746 done) || true
747 fi
748
749 # Breakdown by kind
750 if [[ $count -gt 0 ]]; then
751 echo ""
752 log_info "Breakdown by event kind:"
753 # Use a subshell to avoid SIGPIPE issues with set -e
754 # kind is now column 2
755 (grep -v '^#' "$output_file" | awk -F'\t' '{print $2}' | sort | uniq -c | sort -rn | while read -r cnt kind; do
756 echo " kind $kind: $cnt failures"
757 done) || true
758 fi
759
760 # Breakdown by reason pattern (for invalid announcements)
761 if [[ $invalid_announcement_count -gt 0 ]]; then
762 echo ""
763 log_info "Breakdown by reason pattern:"
764 # Extract the main reason type (before the colon details)
765 (grep -v '^#' "$output_file" | awk -F'\t' '{print $3}' | sed 's/:.*//' | sort | uniq -c | sort -rn | head -10 | while read -r cnt reason; do
766 echo " $reason: $cnt"
767 done) || true
768 fi
769
770 # Explicit success exit
771 exit 0
772}
773
774main "$@"
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/31-extract-purgatory-expiry.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/31-extract-purgatory-expiry.sh
new file mode 100755
index 0000000..a0c8ad0
--- /dev/null
+++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/31-extract-purgatory-expiry.sh
@@ -0,0 +1,408 @@
1#!/usr/bin/env bash
2#
3# 31-extract-purgatory-expiry.sh - Extract purgatory expiry events from systemd logs
4#
5# PHASE 4b of the GRASP relay to ngit-grasp migration analysis pipeline.
6# Extracts structured [PURGATORY_EXPIRED] log entries from journalctl.
7#
8# USAGE:
9# ./31-extract-purgatory-expiry.sh <service-name> <output-dir> [options]
10#
11# EXAMPLES:
12# # Extract from ngit-grasp service (last 30 days, default)
13# ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs
14#
15# # Extract with custom time range
16# ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs --since "2026-01-01"
17#
18# # Extract from specific time window
19# ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs --since "2026-01-15" --until "2026-01-22"
20#
21# OPTIONS:
22# --since <date> Start date for log extraction (default: 30 days ago)
23# --until <date> End date for log extraction (default: now)
24# --dry-run Show what would be extracted without writing files
25#
26# OUTPUT:
27# <output-dir>/purgatory-expired.txt
28#
29# OUTPUT FORMAT (TSV):
30# repo<TAB>npub<TAB>timestamp<TAB>reason
31#
32# EXPECTED LOG FORMAT:
33# The script looks for structured log entries in this format:
34#
35# 2026-01-22T10:30:45Z ngit-grasp[1234]: [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason="clone URL unreachable after 7 days"
36#
37# Required fields: repo, npub
38# Optional fields: reason (explains why purgatory expired)
39#
40# BACKGROUND:
41# "Purgatory" is the state where ngit-grasp has received an announcement event
42# but cannot yet sync the git data (e.g., clone URL unreachable, git server down).
43# After a configurable timeout (default 7 days), the repository is marked as
44# expired and removed from purgatory.
45#
46# Purgatory expiry during migration analysis indicates repositories that:
47# - Had valid announcements on the production relay
48# - Could not be synced to the archive relay
49# - May need manual intervention or investigation
50#
51# DEPENDENCY:
52# This script requires logging improvements in ngit-grasp to emit structured
53# [PURGATORY_EXPIRED] log entries. Until those are implemented, this script
54# will find no matching entries (which is handled gracefully).
55#
56# See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)
57#
58# Expected Rust logging code:
59# tracing::warn!(
60# target: "migration",
61# "[PURGATORY_EXPIRED] repo={} npub={} reason=\"{}\"",
62# identifier, npub, reason
63# );
64#
65# PREREQUISITES:
66# - journalctl (systemd)
67# - grep, awk (standard Unix tools)
68# - Access to systemd journal (may require sudo or journal group membership)
69#
70# RUNTIME: Depends on log volume, typically < 30 seconds
71#
72# SEE ALSO:
73# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide
74# 30-extract-parse-failures.sh - Companion script for parse failure logs
75#
76
77set -euo pipefail
78
79# Get script directory for sourcing helpers
80SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
81
82# Source the service validation helper
83if [[ -f "$SCRIPT_DIR/validate-service.sh" ]]; then
84 source "$SCRIPT_DIR/validate-service.sh"
85fi
86
87# Colors for output (disabled if not a terminal)
88if [[ -t 1 ]]; then
89 RED='\033[0;31m'
90 GREEN='\033[0;32m'
91 YELLOW='\033[0;33m'
92 BLUE='\033[0;34m'
93 NC='\033[0m'
94else
95 RED=''
96 GREEN=''
97 YELLOW=''
98 BLUE=''
99 NC=''
100fi
101
102log_info() {
103 echo -e "${BLUE}[INFO]${NC} $*" >&2
104}
105
106log_success() {
107 echo -e "${GREEN}[OK]${NC} $*" >&2
108}
109
110log_warn() {
111 echo -e "${YELLOW}[WARN]${NC} $*" >&2
112}
113
114log_error() {
115 echo -e "${RED}[ERROR]${NC} $*" >&2
116}
117
118usage() {
119 echo "Usage: $0 <service-name> <output-dir> [options]"
120 echo ""
121 echo "Arguments:"
122 echo " service-name Systemd service name (e.g., ngit-grasp.service)"
123 echo " output-dir Directory to store extracted log data"
124 echo ""
125 echo "Options:"
126 echo " --since <date> Start date (default: 30 days ago)"
127 echo " --until <date> End date (default: now)"
128 echo " --dry-run Show what would be extracted without writing"
129 echo ""
130 echo "Examples:"
131 echo " $0 ngit-grasp.service output/logs"
132 echo " $0 ngit-grasp.service output/logs --since '2026-01-01'"
133 echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'"
134 echo ""
135 echo "Expected log format:"
136 echo " [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason=\"...\""
137 exit 1
138}
139
140# Parse a single log line and extract fields
141# Input: log line containing [PURGATORY_EXPIRED]
142# Output: TSV line: repo<TAB>npub<TAB>timestamp<TAB>reason
143parse_log_line() {
144 local line="$1"
145
146 # Extract timestamp from the beginning of the log line
147 # Format: 2026-01-22T10:30:45+0000 or similar ISO format
148 local timestamp repo npub reason
149
150 # Extract ISO timestamp from beginning of line
151 timestamp=$(echo "$line" | grep -oP '^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}' || echo "")
152
153 # Extract repo=VALUE (unquoted identifier)
154 repo=$(echo "$line" | grep -oP 'repo=\K[^ ]+' || echo "")
155
156 # Extract npub=VALUE (npub1... format)
157 npub=$(echo "$line" | grep -oP 'npub=\K[^ ]+' || echo "")
158
159 # Extract reason="VALUE" (quoted string, optional)
160 reason=$(echo "$line" | grep -oP 'reason="\K[^"]*' || echo "")
161
162 # Only output if we have the required fields
163 if [[ -n "$repo" && -n "$npub" ]]; then
164 printf '%s\t%s\t%s\t%s\n' "$repo" "$npub" "$timestamp" "$reason"
165 fi
166}
167
168# Main
169main() {
170 if [[ $# -lt 2 ]]; then
171 usage
172 fi
173
174 local service="$1"
175 local output_dir="$2"
176 shift 2
177
178 # Default time range: last 30 days
179 local since_date
180 since_date=$(date -d "30 days ago" "+%Y-%m-%d" 2>/dev/null || date -v-30d "+%Y-%m-%d" 2>/dev/null || echo "")
181 local until_date=""
182 local dry_run=false
183
184 # Parse options
185 while [[ $# -gt 0 ]]; do
186 case "$1" in
187 --since)
188 since_date="$2"
189 shift 2
190 ;;
191 --until)
192 until_date="$2"
193 shift 2
194 ;;
195 --dry-run)
196 dry_run=true
197 shift
198 ;;
199 *)
200 log_error "Unknown option: $1"
201 usage
202 ;;
203 esac
204 done
205
206 # Validate service name format
207 if [[ ! "$service" =~ \.service$ ]]; then
208 service="${service}.service"
209 fi
210
211 # Validate service is appropriate for structured logging
212 # This prevents the common mistake of using ngit-relay instead of ngit-grasp
213 if type validate_service_for_structured_logging &>/dev/null; then
214 # Use non-interactive mode if not a terminal, skip log check (we'll do our own)
215 local interactive="true"
216 [[ ! -t 0 ]] && interactive="false"
217
218 if ! validate_service_for_structured_logging "$service" "false" "$interactive"; then
219 log_error "Service validation failed. Use an ngit-grasp service for structured logging."
220 exit 1
221 fi
222 else
223 # Fallback validation if helper not available
224 if [[ "$service" == *"ngit-relay"* ]]; then
225 log_error "Service name appears to be ngit-relay: $service"
226 log_error "Structured logging ([PURGATORY_EXPIRED]) only exists in ngit-grasp services."
227 log_error "Please use the ngit-grasp archive service instead."
228 log_error ""
229 log_error "To find the correct service:"
230 log_error " systemctl list-units 'ngit-grasp*' --all"
231 exit 1
232 fi
233 fi
234
235 log_info "Extracting purgatory expiry events from systemd logs"
236 log_info "Service: $service"
237 log_info "Output: $output_dir"
238 log_info "Time range: ${since_date:-beginning} to ${until_date:-now}"
239
240 # Check if journalctl is available
241 if ! command -v journalctl &> /dev/null; then
242 log_error "journalctl not found. This script requires systemd."
243 exit 1
244 fi
245
246 # Validate service exists (check if journalctl can find any logs for it)
247 # Note: We don't require the service to be running, just that it has logs
248 if ! journalctl --no-pager -u "$service" -n 1 &>/dev/null; then
249 log_warn "Could not query logs for service: $service"
250 log_warn "This may indicate the service doesn't exist or you lack permissions."
251 log_warn ""
252 log_warn "To list available ngit-grasp services:"
253 log_warn " systemctl list-units 'ngit-grasp*' --all"
254 log_warn " journalctl --list-boots # Check if you have journal access"
255 log_warn ""
256 # Continue anyway - the service might exist but have no logs yet
257 fi
258
259 # Build journalctl command
260 local journal_cmd="journalctl -u $service --no-pager -o short-iso"
261
262 if [[ -n "$since_date" ]]; then
263 journal_cmd="$journal_cmd --since '$since_date'"
264 fi
265
266 if [[ -n "$until_date" ]]; then
267 journal_cmd="$journal_cmd --until '$until_date'"
268 fi
269
270 log_info "Running: $journal_cmd | grep '\\[PURGATORY_EXPIRED\\]'"
271
272 if [[ "$dry_run" == true ]]; then
273 log_info "[DRY RUN] Would extract to: $output_dir/purgatory-expired.txt"
274
275 # Show sample of what would be extracted
276 log_info "Checking for matching log entries..."
277 local sample_count
278 sample_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PURGATORY_EXPIRED\]' || echo "0")
279 sample_count="${sample_count//[^0-9]/}" # Strip non-numeric characters
280 sample_count="${sample_count:-0}"
281 log_info "Found $sample_count matching log entries"
282
283 if [[ "$sample_count" -eq 0 ]]; then
284 log_warn "No [PURGATORY_EXPIRED] entries found in logs."
285 log_warn "This is expected if ngit-grasp logging improvements are not yet deployed."
286 log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)"
287 fi
288
289 exit 0
290 fi
291
292 # Create output directory
293 mkdir -p "$output_dir"
294
295 local output_file="$output_dir/purgatory-expired.txt"
296 local temp_file
297 temp_file=$(mktemp)
298
299 # Extract and parse log entries
300 log_info "Extracting log entries..."
301
302 # Get raw log lines containing [PURGATORY_EXPIRED]
303 # Capture stderr separately to detect journalctl errors
304 local raw_lines journal_stderr journal_exit
305 local temp_stderr
306 temp_stderr=$(mktemp)
307
308 raw_lines=$(eval "$journal_cmd" 2>"$temp_stderr" | grep '\[PURGATORY_EXPIRED\]' || true)
309 journal_exit=$?
310 journal_stderr=$(cat "$temp_stderr" 2>/dev/null || true)
311 rm -f "$temp_stderr"
312
313 # Report any journalctl errors (but don't fail - empty logs are valid)
314 if [[ -n "$journal_stderr" ]]; then
315 log_warn "journalctl reported: $journal_stderr"
316 fi
317
318 if [[ -z "$raw_lines" ]]; then
319 log_warn "No [PURGATORY_EXPIRED] entries found in logs."
320 log_warn ""
321 log_warn "This is expected if ngit-grasp logging improvements are not yet deployed."
322 log_warn "The structured log format required by this script:"
323 log_warn ""
324 log_warn " [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason=\"...\""
325 log_warn ""
326 log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)"
327 log_warn ""
328
329 # Create empty output file with header comment
330 {
331 echo "# Purgatory expiry events extracted from $service"
332 echo "# Time range: ${since_date:-beginning} to ${until_date:-now}"
333 echo "# Extracted: $(date -Iseconds)"
334 echo "# Format: repo<TAB>npub<TAB>timestamp<TAB>reason"
335 echo "#"
336 echo "# NOTE: No [PURGATORY_EXPIRED] entries found."
337 echo "# This is expected if ngit-grasp logging improvements are not yet deployed."
338 } > "$output_file"
339
340 log_info "Created empty output file: $output_file"
341 exit 0
342 fi
343
344 # Write header
345 {
346 echo "# Purgatory expiry events extracted from $service"
347 echo "# Time range: ${since_date:-beginning} to ${until_date:-now}"
348 echo "# Extracted: $(date -Iseconds)"
349 echo "# Format: repo<TAB>npub<TAB>timestamp<TAB>reason"
350 } > "$output_file"
351
352 # Parse each line
353 local count=0
354 while IFS= read -r line; do
355 local parsed
356 parsed=$(parse_log_line "$line")
357 if [[ -n "$parsed" ]]; then
358 echo "$parsed" >> "$output_file"
359 count=$((count + 1))
360 fi
361 done <<< "$raw_lines"
362
363 rm -f "$temp_file"
364
365 # Summary
366 echo ""
367 log_info "=== Extraction Summary ==="
368 log_info "Service: $service"
369 log_info "Time range: ${since_date:-beginning} to ${until_date:-now}"
370 log_success "Extracted $count purgatory expiry entries"
371 echo ""
372 log_info "Output file: $output_file"
373
374 if [[ $count -gt 0 ]]; then
375 echo ""
376 log_info "Sample entries (first 5):"
377 # Use a subshell to avoid SIGPIPE issues with set -e
378 (tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub timestamp reason; do
379 echo " repo=$repo npub=${npub:0:20}... timestamp=$timestamp"
380 done) || true
381 fi
382
383 # Show unique repos affected
384 if [[ $count -gt 0 ]]; then
385 echo ""
386 local unique_repos
387 unique_repos=$(tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort -u | wc -l)
388 log_info "Unique repositories affected: $unique_repos"
389
390 echo ""
391 log_info "Repositories with purgatory expiry:"
392 # Use a subshell to avoid SIGPIPE issues with set -e
393 (tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort | uniq -c | sort -rn | head -10 | while read -r cnt repo; do
394 echo " $repo: $cnt expiry events"
395 done) || true
396
397 local total_repos
398 total_repos=$(tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort -u | wc -l)
399 if [[ $total_repos -gt 10 ]]; then
400 echo " ... and $((total_repos - 10)) more repositories"
401 fi
402 fi
403
404 # Explicit success exit
405 exit 0
406}
407
408main "$@"
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/40-classify-actions.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/40-classify-actions.sh
new file mode 100755
index 0000000..8b61636
--- /dev/null
+++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/40-classify-actions.sh
@@ -0,0 +1,662 @@
1#!/usr/bin/env bash
2#
3# 40-classify-actions.sh - Classify repos by migration action required
4#
5# Implements the redesigned classification system (Option B) with user feedback:
6#
7# Tier 1: No Action Required (ready-for-migration.txt)
8# - Complete in both (prod=cat1, archive=cat1)
9# - Deleted by user (kind 5 event)
10# - Empty in prod (prod=cat2, any archive status)
11# - Archive-only (archive=any, prod=missing)
12# - Not in prod (purgatory-only, prod=missing)
13# - Archive ahead (archive has newer git data than prod - GRASP enforced)
14#
15# Tier 2: Action Required (needs-resync.txt)
16# - Complete in prod, missing from archive (with purgatory context)
17# - Complete in prod, incomplete in archive AND prod is ahead (with purgatory context)
18#
19# Tier 3: Manual Investigation (manual-review.txt)
20# - Partial in prod (prod=cat3)
21# - No-match in prod (prod=cat4)
22# - Parse failures
23# - Conflicting states
24# - Diverged git history (both have unique commits)
25#
26# KEY INSIGHT:
27# Archive (ngit-grasp) enforces GRASP - git data ALWAYS matches a state event.
28# If archive has different/newer data than prod, it means:
29# - A state event authorized those commits at some point
30# - Archive is actually MORE up-to-date than prod
31# - Migration should use archive data (it's already correct)
32#
33# Usage: ./40-classify-actions.sh <analysis-dir>
34#
35# Output format: repo | npub | prod_status | archive_status | context | action
36#
37
38set -euo pipefail
39
40# Colors for output
41RED='\033[0;31m'
42GREEN='\033[0;32m'
43YELLOW='\033[1;33m'
44BLUE='\033[0;34m'
45NC='\033[0m' # No Color
46
47log_info() { echo -e "${BLUE}[INFO]${NC} $*"; }
48log_success() { echo -e "${GREEN}[OK]${NC} $*"; }
49log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
50log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; }
51
52# Check arguments
53if [[ $# -lt 1 ]]; then
54 echo "Usage: $0 <analysis-dir>"
55 echo "Example: $0 work/migration-analysis-20260123-200701"
56 exit 1
57fi
58
59ANALYSIS_DIR="$1"
60
61# Validate analysis directory
62if [[ ! -d "$ANALYSIS_DIR" ]]; then
63 log_error "Analysis directory not found: $ANALYSIS_DIR"
64 exit 1
65fi
66
67# Define paths
68PROD_DIR="$ANALYSIS_DIR/prod"
69ARCHIVE_DIR="$ANALYSIS_DIR/archive"
70COMPARISON_DIR="$ANALYSIS_DIR/comparison"
71LOGS_DIR="$ANALYSIS_DIR/logs"
72RESULTS_DIR="$ANALYSIS_DIR/results"
73
74# Validate required directories
75for dir in "$PROD_DIR" "$ARCHIVE_DIR" "$COMPARISON_DIR" "$LOGS_DIR"; do
76 if [[ ! -d "$dir" ]]; then
77 log_error "Required directory not found: $dir"
78 exit 1
79 fi
80done
81
82# Create results directory
83mkdir -p "$RESULTS_DIR"
84
85# Output files
86READY_FILE="$RESULTS_DIR/ready-for-migration.txt"
87RESYNC_FILE="$RESULTS_DIR/needs-resync.txt"
88REVIEW_FILE="$RESULTS_DIR/manual-review.txt"
89SUMMARY_FILE="$RESULTS_DIR/summary.txt"
90
91# Temporary files for processing
92TMP_DIR=$(mktemp -d)
93trap 'rm -rf "$TMP_DIR"' EXIT
94
95log_info "Starting classification with revised system (Option B)"
96log_info "Analysis directory: $ANALYSIS_DIR"
97
98# ============================================================================
99# Phase 1: Build lookup tables from source data
100# ============================================================================
101
102log_info "Building lookup tables..."
103
104# Build prod category lookup: repo|npub -> category
105declare -A PROD_CAT
106while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do
107 repo="${repo// /}" # Remove all spaces
108 npub="${npub// /}" # Remove all spaces
109 [[ -z "$repo" || -z "$npub" ]] && continue
110 PROD_CAT["$repo|$npub"]="cat1"
111done < "$PROD_DIR/category1-complete-match.txt"
112
113while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do
114 repo="${repo// /}"
115 npub="${npub// /}"
116 [[ -z "$repo" || -z "$npub" ]] && continue
117 PROD_CAT["$repo|$npub"]="cat2"
118done < "$PROD_DIR/category2-empty-blank.txt"
119
120while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do
121 repo="${repo// /}"
122 npub="${npub// /}"
123 [[ -z "$repo" || -z "$npub" ]] && continue
124 PROD_CAT["$repo|$npub"]="cat3"
125done < "$PROD_DIR/category3-partial-match.txt"
126
127while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do
128 repo="${repo// /}"
129 npub="${npub// /}"
130 [[ -z "$repo" || -z "$npub" ]] && continue
131 PROD_CAT["$repo|$npub"]="cat4"
132done < "$PROD_DIR/category4-no-match.txt"
133
134log_info "Loaded ${#PROD_CAT[@]} prod entries"
135
136# Build archive category lookup: repo|npub -> category
137declare -A ARCHIVE_CAT
138while IFS='|' read -r repo npub rest; do
139 repo="${repo// /}"
140 npub="${npub// /}"
141 [[ -z "$repo" || -z "$npub" ]] && continue
142 ARCHIVE_CAT["$repo|$npub"]="cat1"
143done < "$ARCHIVE_DIR/category1-complete-match.txt"
144
145while IFS='|' read -r repo npub rest; do
146 repo="${repo// /}"
147 npub="${npub// /}"
148 [[ -z "$repo" || -z "$npub" ]] && continue
149 ARCHIVE_CAT["$repo|$npub"]="cat2"
150done < "$ARCHIVE_DIR/category2-empty-blank.txt"
151
152while IFS='|' read -r repo npub rest; do
153 repo="${repo// /}"
154 npub="${npub// /}"
155 [[ -z "$repo" || -z "$npub" ]] && continue
156 ARCHIVE_CAT["$repo|$npub"]="cat3"
157done < "$ARCHIVE_DIR/category3-partial-match.txt"
158
159while IFS='|' read -r repo npub rest; do
160 repo="${repo// /}"
161 npub="${npub// /}"
162 [[ -z "$repo" || -z "$npub" ]] && continue
163 ARCHIVE_CAT["$repo|$npub"]="cat4"
164done < "$ARCHIVE_DIR/category4-no-match.txt"
165
166log_info "Loaded ${#ARCHIVE_CAT[@]} archive entries"
167
168# Build purgatory lookup: repo|npub -> 1 (if purgatory expired)
169declare -A PURGATORY
170PURGATORY_COUNT=0
171if [[ -f "$LOGS_DIR/purgatory-expired.txt" ]]; then
172 while IFS=$'\t' read -r repo npub timestamp reason || [[ -n "$repo" ]]; do
173 # Skip comments and empty lines
174 [[ "$repo" =~ ^# ]] && continue
175 [[ -z "$repo" || -z "$npub" ]] && continue
176 PURGATORY["$repo|$npub"]=1
177 PURGATORY_COUNT=$((PURGATORY_COUNT + 1))
178 done < "$LOGS_DIR/purgatory-expired.txt"
179fi
180log_info "Loaded $PURGATORY_COUNT purgatory entries"
181
182# Build parse failure lookup: repo|npub -> 1 (if parse failure logged)
183# Parse failures file format: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub
184declare -A PARSE_FAIL
185PARSE_FAIL_COUNT=0
186if [[ -f "$LOGS_DIR/parse-failures.txt" ]]; then
187 while IFS=$'\t' read -r event_id kind reason repo npub || [[ -n "$event_id" ]]; do
188 # Skip comments and empty lines
189 [[ "$event_id" =~ ^# ]] && continue
190 [[ -z "$repo" || -z "$npub" ]] && continue
191 PARSE_FAIL["$repo|$npub"]=1
192 PARSE_FAIL_COUNT=$((PARSE_FAIL_COUNT + 1))
193 done < "$LOGS_DIR/parse-failures.txt"
194fi
195log_info "Loaded $PARSE_FAIL_COUNT parse failure entries"
196
197# Build deletion lookup: repo|npub -> 1 (if kind 5 deletion event)
198# Deletions are in NDJSON format with "a" tags like "30617:pubkey_hex:repo"
199# We need to convert hex pubkeys to npub format using nak
200declare -A DELETED
201
202# Helper function to process deletion file (NDJSON format)
203# Extracts unique pubkey_hex:repo pairs and converts to npub
204process_deletions() {
205 local file="$1"
206 [[ ! -f "$file" ]] && return
207
208 # Extract unique pubkey_hex|repo pairs from NDJSON
209 # Each line is a JSON object, extract "a" tags
210 local pairs
211 pairs=$(jq -r '.tags[] | select(.[0] == "a") | .[1]' "$file" 2>/dev/null | \
212 sed 's/^30617://' | awk -F: '{print $1 "|" $2}' | sort -u)
213
214 # Get unique hex pubkeys for batch conversion
215 local hex_keys
216 hex_keys=$(echo "$pairs" | cut -d'|' -f1 | sort -u)
217
218 # Build hex->npub lookup via batch nak call
219 declare -A HEX_TO_NPUB
220 while read -r hex; do
221 [[ -z "$hex" ]] && continue
222 local npub
223 npub=$(nak encode npub "$hex" 2>/dev/null || echo "")
224 [[ -n "$npub" ]] && HEX_TO_NPUB["$hex"]="$npub"
225 done <<< "$hex_keys"
226
227 # Now process pairs with cached npub values
228 while IFS='|' read -r pubkey_hex repo; do
229 [[ -z "$repo" || -z "$pubkey_hex" ]] && continue
230 local npub="${HEX_TO_NPUB[$pubkey_hex]:-}"
231 [[ -z "$npub" ]] && continue
232 DELETED["$repo|$npub"]=1
233 done <<< "$pairs"
234}
235
236# Process prod and archive deletions
237process_deletions "$PROD_DIR/raw/deletions.json"
238process_deletions "$ARCHIVE_DIR/raw/deletions.json"
239DELETED_COUNT=0
240[[ ${#DELETED[@]} -gt 0 ]] && DELETED_COUNT=${#DELETED[@]}
241log_info "Loaded $DELETED_COUNT deletion entries"
242
243# Build git ancestry lookup: repo|npub -> relationship (archive-ahead, prod-ahead, diverged, etc.)
244# This data comes from 22-compare-git-data.sh which compares actual git commits
245declare -A GIT_ANCESTRY
246GIT_ANCESTRY_COUNT=0
247if [[ -f "$COMPARISON_DIR/git-ancestry.tsv" ]]; then
248 while IFS=$'\t' read -r repo npub relationship details || [[ -n "$repo" ]]; do
249 # Skip header and comments
250 [[ "$repo" == "repo" ]] && continue
251 [[ "$repo" =~ ^# ]] && continue
252 [[ -z "$repo" || -z "$npub" ]] && continue
253 GIT_ANCESTRY["$repo|$npub"]="$relationship"
254 GIT_ANCESTRY_COUNT=$((GIT_ANCESTRY_COUNT + 1))
255 done < "$COMPARISON_DIR/git-ancestry.tsv"
256 log_info "Loaded $GIT_ANCESTRY_COUNT git ancestry entries"
257else
258 log_warn "No git-ancestry.tsv found - will not check if archive is ahead of prod"
259 log_warn "Run 22-compare-git-data.sh to enable archive-ahead detection"
260fi
261
262# ============================================================================
263# Phase 2: Build unique repo list from all sources
264# ============================================================================
265
266log_info "Building unique repo list..."
267
268declare -A ALL_REPOS
269for key in "${!PROD_CAT[@]}"; do
270 ALL_REPOS["$key"]=1
271done
272for key in "${!ARCHIVE_CAT[@]}"; do
273 ALL_REPOS["$key"]=1
274done
275for key in "${!PURGATORY[@]}"; do
276 ALL_REPOS["$key"]=1
277done
278
279log_info "Total unique repos: ${#ALL_REPOS[@]}"
280
281# ============================================================================
282# Phase 3: Classify each repo according to revised decision tree
283# ============================================================================
284
285log_info "Classifying repos..."
286
287# Counters for summary
288declare -A COUNTS
289COUNTS[ready_complete_both]=0
290COUNTS[ready_deleted]=0
291COUNTS[ready_empty_prod]=0
292COUNTS[ready_archive_only]=0
293COUNTS[ready_not_in_prod]=0
294COUNTS[ready_archive_ahead]=0
295COUNTS[resync_missing_archive]=0
296COUNTS[resync_incomplete_archive]=0
297COUNTS[review_partial_prod]=0
298COUNTS[review_nomatch_prod]=0
299COUNTS[review_parse_failure]=0
300COUNTS[review_conflicting]=0
301COUNTS[review_diverged]=0
302
303# Output arrays
304declare -a READY_LINES
305declare -a RESYNC_LINES
306declare -a REVIEW_LINES
307
308# Helper function to get context string
309get_context() {
310 local key="$1"
311 local prod_status="$2"
312 local archive_status="$3"
313 local context=""
314
315 # Check purgatory
316 if [[ -n "${PURGATORY[$key]:-}" ]]; then
317 context="purgatory-expired"
318 fi
319
320 # Check parse failure
321 if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then
322 if [[ -n "$context" ]]; then
323 context="$context, parse-failure"
324 else
325 context="parse-failure"
326 fi
327 fi
328
329 # Add archive context for unexpected states
330 if [[ "$prod_status" == "empty" && "$archive_status" != "missing" && "$archive_status" != "empty" ]]; then
331 if [[ -n "$context" ]]; then
332 context="$context, archive-has-data"
333 else
334 context="archive-has-data"
335 fi
336 fi
337
338 echo "${context:-none}"
339}
340
341# Helper to convert category to human-readable status
342cat_to_status() {
343 case "$1" in
344 cat1) echo "complete" ;;
345 cat2) echo "empty" ;;
346 cat3) echo "partial" ;;
347 cat4) echo "no-match" ;;
348 missing) echo "missing" ;;
349 *) echo "$1" ;;
350 esac
351}
352
353LOOP_COUNT=0
354for key in "${!ALL_REPOS[@]}"; do
355 LOOP_COUNT=$((LOOP_COUNT + 1))
356 [[ $((LOOP_COUNT % 100)) -eq 0 ]] && log_info "Processed $LOOP_COUNT repos..."
357 IFS='|' read -r repo npub <<< "$key"
358
359 prod_cat="${PROD_CAT[$key]:-missing}"
360 archive_cat="${ARCHIVE_CAT[$key]:-missing}"
361 prod_status=$(cat_to_status "$prod_cat")
362 archive_status=$(cat_to_status "$archive_cat")
363
364 # Decision tree implementation
365
366 # 1. Is there a kind 5 deletion event?
367 if [[ -n "${DELETED[$key]:-}" ]]; then
368 context=$(get_context "$key" "$prod_status" "$archive_status")
369 READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | deleted by user")
370 COUNTS[ready_deleted]=$((COUNTS[ready_deleted] + 1))
371 continue
372 fi
373
374 # 2. What is the prod status?
375 case "$prod_cat" in
376 missing)
377 # Not in prod
378 if [[ "$archive_cat" != "missing" ]]; then
379 # In archive but not in prod -> no action (archive-only)
380 context=$(get_context "$key" "$prod_status" "$archive_status")
381 READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive-only (not in prod)")
382 COUNTS[ready_archive_only]=$((COUNTS[ready_archive_only] + 1))
383 elif [[ -n "${PURGATORY[$key]:-}" ]]; then
384 # Purgatory only, not in prod -> no action
385 context="purgatory-expired"
386 READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | purgatory-only (not in prod)")
387 COUNTS[ready_not_in_prod]=$((COUNTS[ready_not_in_prod] + 1))
388 fi
389 # Otherwise skip (not a real repo - no data anywhere)
390 ;;
391
392 cat2)
393 # Empty in prod -> ALWAYS no action required
394 context=$(get_context "$key" "$prod_status" "$archive_status")
395 READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | empty in prod (user never pushed)")
396 COUNTS[ready_empty_prod]=$((COUNTS[ready_empty_prod] + 1))
397 ;;
398
399 cat1)
400 # Complete in prod
401 if [[ "$archive_cat" == "cat1" ]]; then
402 # Complete in both -> no action
403 context=$(get_context "$key" "$prod_status" "$archive_status")
404 READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in both")
405 COUNTS[ready_complete_both]=$((COUNTS[ready_complete_both] + 1))
406 else
407 # Complete in prod, missing/incomplete in archive
408 # Check for parse failure - if so, needs manual review
409 if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then
410 context=$(get_context "$key" "$prod_status" "$archive_status")
411 REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in prod with parse failure")
412 COUNTS[review_parse_failure]=$((COUNTS[review_parse_failure] + 1))
413 else
414 # Check git ancestry to see if archive is actually ahead
415 git_relationship="${GIT_ANCESTRY[$key]:-unknown}"
416
417 if [[ "$git_relationship" == "archive-ahead" || "$git_relationship" == "in-sync" ]]; then
418 # Archive has newer/same git data - this is GOOD
419 # Archive's git data was authorized by a state event (GRASP enforced)
420 context=$(get_context "$key" "$prod_status" "$archive_status")
421 if [[ -n "$context" && "$context" != "none" ]]; then
422 context="$context, git=$git_relationship"
423 else
424 context="git=$git_relationship"
425 fi
426 READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive ahead (use archive data)")
427 COUNTS[ready_archive_ahead]=$((COUNTS[ready_archive_ahead] + 1))
428 elif [[ "$git_relationship" == "diverged" ]]; then
429 # Git histories diverged - needs manual review
430 context=$(get_context "$key" "$prod_status" "$archive_status")
431 if [[ -n "$context" && "$context" != "none" ]]; then
432 context="$context, git=diverged"
433 else
434 context="git=diverged"
435 fi
436 REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | git histories diverged (manual review)")
437 COUNTS[review_diverged]=$((COUNTS[review_diverged] + 1))
438 else
439 # prod-ahead, archive-only, prod-only, both-empty, or unknown
440 # These need resync - include purgatory context
441 context=$(get_context "$key" "$prod_status" "$archive_status")
442 if [[ "$git_relationship" != "unknown" ]]; then
443 if [[ -n "$context" && "$context" != "none" ]]; then
444 context="$context, git=$git_relationship"
445 else
446 context="git=$git_relationship"
447 fi
448 fi
449 if [[ "$archive_cat" == "missing" ]]; then
450 RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync to archive")
451 COUNTS[resync_missing_archive]=$((COUNTS[resync_missing_archive] + 1))
452 else
453 RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync (archive incomplete)")
454 COUNTS[resync_incomplete_archive]=$((COUNTS[resync_incomplete_archive] + 1))
455 fi
456 fi
457 fi
458 fi
459 ;;
460
461 cat3)
462 # Partial in prod -> ALWAYS manual investigation
463 context=$(get_context "$key" "$prod_status" "$archive_status")
464 REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | partial in prod (investigate git data)")
465 COUNTS[review_partial_prod]=$((COUNTS[review_partial_prod] + 1))
466 ;;
467
468 cat4)
469 # No-match in prod -> ALWAYS manual investigation
470 context=$(get_context "$key" "$prod_status" "$archive_status")
471 REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | no-match in prod (git corruption)")
472 COUNTS[review_nomatch_prod]=$((COUNTS[review_nomatch_prod] + 1))
473 ;;
474 esac
475done
476
477# ============================================================================
478# Phase 4: Write output files
479# ============================================================================
480
481log_info "Writing output files..."
482
483TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%S+00:00")
484
485# Write ready-for-migration.txt
486{
487 echo "# Ready for Migration - No action required"
488 echo "# Generated: $TIMESTAMP"
489 echo "# Format: repo | npub | prod_status | archive_status | context | reason"
490 echo "#"
491 for line in "${READY_LINES[@]}"; do
492 echo "$line"
493 done
494} > "$READY_FILE"
495
496# Write needs-resync.txt
497{
498 echo "# Needs Re-sync - Action required"
499 echo "# Generated: $TIMESTAMP"
500 echo "# Format: repo | npub | prod_status | archive_status | context | action"
501 echo "#"
502 echo "# Context meanings:"
503 echo "# purgatory-expired = archive tried to sync but failed (30min timeout)"
504 echo "# none = archive never tried or announcement missing"
505 echo "#"
506 for line in "${RESYNC_LINES[@]}"; do
507 echo "$line"
508 done
509} > "$RESYNC_FILE"
510
511# Write manual-review.txt
512{
513 echo "# Manual Review Required - Investigation needed"
514 echo "# Generated: $TIMESTAMP"
515 echo "# Format: repo | npub | prod_status | archive_status | context | reason"
516 echo "#"
517 for line in "${REVIEW_LINES[@]}"; do
518 echo "$line"
519 done
520} > "$REVIEW_FILE"
521
522# ============================================================================
523# Phase 5: Generate summary
524# ============================================================================
525
526log_info "Generating summary..."
527
528TOTAL_READY="${#READY_LINES[@]}"
529TOTAL_RESYNC="${#RESYNC_LINES[@]}"
530TOTAL_REVIEW="${#REVIEW_LINES[@]}"
531TOTAL=$((TOTAL_READY + TOTAL_RESYNC + TOTAL_REVIEW))
532
533# Calculate percentages
534if [[ $TOTAL -gt 0 ]]; then
535 PCT_READY=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_READY / $TOTAL) * 100}")
536 PCT_RESYNC=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_RESYNC / $TOTAL) * 100}")
537 PCT_REVIEW=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_REVIEW / $TOTAL) * 100}")
538else
539 PCT_READY="0.0"
540 PCT_RESYNC="0.0"
541 PCT_REVIEW="0.0"
542fi
543
544{
545 echo "# Migration Classification Summary"
546 echo "Generated: $TIMESTAMP"
547 echo "Analysis Directory: $ANALYSIS_DIR"
548 echo ""
549 echo "## Overview"
550 echo ""
551 echo "| Category | Count | Percentage |"
552 echo "|----------|-------|------------|"
553 echo "| Ready for Migration | $TOTAL_READY | $PCT_READY% |"
554 echo "| Needs Re-sync | $TOTAL_RESYNC | $PCT_RESYNC% |"
555 echo "| Manual Review | $TOTAL_REVIEW | $PCT_REVIEW% |"
556 echo "| **Total** | **$TOTAL** | **100%** |"
557 echo ""
558 echo "## Tier 1: Ready for Migration ($TOTAL_READY repos)"
559 echo ""
560 echo "These repositories are ready for migration or don't need migration:"
561 echo ""
562 echo "| Reason | Count |"
563 echo "|--------|-------|"
564 echo "| complete in both prod and archive | ${COUNTS[ready_complete_both]} |"
565 echo "| archive ahead (has newer git data) | ${COUNTS[ready_archive_ahead]} |"
566 echo "| deleted by user | ${COUNTS[ready_deleted]} |"
567 echo "| empty in prod (user never pushed) | ${COUNTS[ready_empty_prod]} |"
568 echo "| archive-only (not in prod) | ${COUNTS[ready_archive_only]} |"
569 echo "| purgatory-only (not in prod) | ${COUNTS[ready_not_in_prod]} |"
570 echo ""
571 echo "## Tier 2: Needs Re-sync ($TOTAL_RESYNC repos)"
572 echo ""
573 echo "These repositories need re-sync to archive before migration:"
574 echo ""
575 echo "| Reason | Count | Action |"
576 echo "|--------|-------|--------|"
577 echo "| complete in prod, missing from archive | ${COUNTS[resync_missing_archive]} | trigger re-sync |"
578 echo "| complete in prod, incomplete in archive | ${COUNTS[resync_incomplete_archive]} | trigger re-sync |"
579 echo ""
580 echo "### Purgatory Context"
581 echo ""
582 echo "Repos in needs-resync.txt include purgatory context:"
583 echo "- **purgatory-expired**: Archive tried to sync but failed (30min timeout)"
584 echo "- **none**: Archive never tried or announcement missing"
585 echo ""
586 echo "## Tier 3: Manual Review ($TOTAL_REVIEW repos)"
587 echo ""
588 echo "These repositories require human investigation:"
589 echo ""
590 echo "| Reason | Count |"
591 echo "|--------|-------|"
592 echo "| partial in prod (cat3) | ${COUNTS[review_partial_prod]} |"
593 echo "| no-match in prod (cat4) | ${COUNTS[review_nomatch_prod]} |"
594 echo "| complete in prod with parse failure | ${COUNTS[review_parse_failure]} |"
595 echo "| git histories diverged | ${COUNTS[review_diverged]} |"
596 echo ""
597 echo "## Input Data Summary"
598 echo ""
599 echo "### Prod Categories"
600 echo "- Category 1 (complete): $(wc -l < "$PROD_DIR/category1-complete-match.txt")"
601 echo "- Category 2 (empty): $(wc -l < "$PROD_DIR/category2-empty-blank.txt")"
602 echo "- Category 3 (partial): $(wc -l < "$PROD_DIR/category3-partial-match.txt")"
603 echo "- Category 4 (no match): $(wc -l < "$PROD_DIR/category4-no-match.txt")"
604 echo ""
605 echo "### Archive Categories"
606 echo "- Category 1 (complete): $(wc -l < "$ARCHIVE_DIR/category1-complete-match.txt")"
607 echo "- Category 2 (empty): $(wc -l < "$ARCHIVE_DIR/category2-empty-blank.txt")"
608 echo "- Category 3 (partial): $(wc -l < "$ARCHIVE_DIR/category3-partial-match.txt")"
609 echo "- Category 4 (no match): $(wc -l < "$ARCHIVE_DIR/category4-no-match.txt")"
610 echo ""
611 echo "### Logs"
612 echo "- Parse failures: $(grep -c -v '^#' "$LOGS_DIR/parse-failures.txt" 2>/dev/null || echo 0)"
613 echo "- Purgatory expired: $(grep -c -v '^#' "$LOGS_DIR/purgatory-expired.txt" 2>/dev/null || echo 0)"
614 echo ""
615 echo "## Output Files"
616 echo ""
617 echo "- \`results/ready-for-migration.txt\` - $TOTAL_READY repos ready for migration"
618 echo "- \`results/needs-resync.txt\` - $TOTAL_RESYNC repos needing re-sync"
619 echo "- \`results/manual-review.txt\` - $TOTAL_REVIEW repos needing investigation"
620 echo "- \`results/summary.txt\` - This summary file"
621 echo ""
622 echo "## Recommended Next Steps"
623 echo ""
624 echo "1. **Review needs-resync.txt** - Trigger re-sync for these repos"
625 echo "2. **Review manual-review.txt** - Investigate unusual states"
626 echo "3. **Verify ready-for-migration.txt** - Spot-check a few repos"
627 echo "4. **Plan migration window** - Schedule cutover when action items resolved"
628} > "$SUMMARY_FILE"
629
630# ============================================================================
631# Phase 6: Print summary to console
632# ============================================================================
633
634echo ""
635log_success "Classification complete!"
636echo ""
637echo "=== Summary ==="
638echo "Ready for Migration: $TOTAL_READY ($PCT_READY%)"
639echo " - Complete in both: ${COUNTS[ready_complete_both]}"
640echo " - Archive ahead: ${COUNTS[ready_archive_ahead]}"
641echo " - Deleted by user: ${COUNTS[ready_deleted]}"
642echo " - Empty in prod: ${COUNTS[ready_empty_prod]}"
643echo " - Archive-only: ${COUNTS[ready_archive_only]}"
644echo " - Purgatory-only: ${COUNTS[ready_not_in_prod]}"
645echo ""
646echo "Needs Re-sync: $TOTAL_RESYNC ($PCT_RESYNC%)"
647echo " - Missing from archive: ${COUNTS[resync_missing_archive]}"
648echo " - Incomplete in archive: ${COUNTS[resync_incomplete_archive]}"
649echo ""
650echo "Manual Review: $TOTAL_REVIEW ($PCT_REVIEW%)"
651echo " - Partial in prod: ${COUNTS[review_partial_prod]}"
652echo " - No-match in prod: ${COUNTS[review_nomatch_prod]}"
653echo " - Parse failures: ${COUNTS[review_parse_failure]}"
654echo " - Git diverged: ${COUNTS[review_diverged]}"
655echo ""
656echo "Total: $TOTAL repos"
657echo ""
658echo "Output files:"
659echo " $READY_FILE"
660echo " $RESYNC_FILE"
661echo " $REVIEW_FILE"
662echo " $SUMMARY_FILE"
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/run-migration-analysis.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/run-migration-analysis.sh
new file mode 100755
index 0000000..acc5e44
--- /dev/null
+++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/run-migration-analysis.sh
@@ -0,0 +1,779 @@
1#!/usr/bin/env bash
2#
3# run-migration-analysis.sh - Orchestrate the complete GRASP relay to ngit-grasp migration analysis
4#
5# This script runs all 5 phases of the migration analysis pipeline in sequence,
6# with proper error handling, progress reporting, and timing information.
7#
8# QUICK START:
9# # Basic usage (local analysis only - Phases 1, 3, 5)
10# ./run-migration-analysis.sh --prod-relay wss://relay.ngit.dev --archive-relay wss://archive.relay.ngit.dev
11#
12# # Full analysis including git sync check (requires VPS access)
13# ./run-migration-analysis.sh \
14# --prod-relay wss://relay.ngit.dev \
15# --archive-relay wss://archive.relay.ngit.dev \
16# --prod-git /var/lib/grasp-relay/git \
17# --archive-git /var/lib/ngit-grasp/git
18#
19# USAGE:
20# ./run-migration-analysis.sh [options]
21#
22# REQUIRED OPTIONS:
23# --prod-relay <url> Production relay WebSocket URL (e.g., wss://relay.ngit.dev)
24# --archive-relay <url> Archive relay WebSocket URL (e.g., wss://archive.relay.ngit.dev)
25#
26# OPTIONAL OPTIONS:
27# --prod-git <path> Git base directory for prod (enables Phase 2)
28# --archive-git <path> Git base directory for archive (enables Phase 2)
29# --service <name> Systemd service name for log extraction (enables Phase 4)
30# --output <dir> Output directory (default: work/migration-analysis-YYYYMMDD-HHMM)
31# --since <date> Start date for log extraction (default: 30 days ago)
32# --until <date> End date for log extraction (default: now)
33#
34# PHASE CONTROL:
35# --skip-phase-1 Skip event fetching (use existing data)
36# --skip-phase-2 Skip git sync check (use existing data)
37# --skip-phase-3 Skip categorization (use existing data)
38# --skip-phase-4 Skip log extraction (use existing data)
39# --skip-phase-5 Skip final classification
40# --only-phase-N Run only phase N (1-5)
41# --from-phase-N Start from phase N (skip earlier phases)
42#
43# OTHER OPTIONS:
44# --dry-run Show what would be executed without running
45# --continue-on-error Continue to next phase even if current phase fails
46# --help Show this help message
47#
48# PHASES:
49# Phase 1: Fetch events from both relays (~30s each, local)
50# Phase 2: Check git sync status (~20 min each, requires VPS)
51# Phase 3: Categorize and compare results (fast, local)
52# Phase 4: Extract logs from systemd (requires VPS)
53# Phase 5: Final classification (fast, local)
54#
55# EXAMPLES:
56# # Dry run to see what would happen
57# ./run-migration-analysis.sh --prod-relay wss://relay.ngit.dev --archive-relay wss://archive.relay.ngit.dev --dry-run
58#
59# # Run only Phase 1 (fetch events)
60# ./run-migration-analysis.sh --prod-relay wss://relay.ngit.dev --archive-relay wss://archive.relay.ngit.dev --only-phase-1
61#
62# # Resume from Phase 3 using existing Phase 1-2 data
63# ./run-migration-analysis.sh --prod-relay wss://relay.ngit.dev --archive-relay wss://archive.relay.ngit.dev --from-phase-3 --output work/migration-analysis-20260122-1430
64#
65# # Full analysis on VPS with all features
66# ./run-migration-analysis.sh \
67# --prod-relay wss://relay.ngit.dev \
68# --archive-relay wss://archive.relay.ngit.dev \
69# --prod-git /var/lib/grasp-relay/git \
70# --archive-git /var/lib/ngit-grasp/git \
71# --service ngit-grasp.service
72#
73# SEE ALSO:
74# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide
75#
76
77set -euo pipefail
78
79# Get script directory for finding other scripts
80SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
81
82# Colors for output (disabled if not a terminal)
83if [[ -t 1 ]]; then
84 RED='\033[0;31m'
85 GREEN='\033[0;32m'
86 YELLOW='\033[0;33m'
87 BLUE='\033[0;34m'
88 CYAN='\033[0;36m'
89 BOLD='\033[1m'
90 NC='\033[0m'
91else
92 RED=''
93 GREEN=''
94 YELLOW=''
95 BLUE=''
96 CYAN=''
97 BOLD=''
98 NC=''
99fi
100
101# Logging functions
102log_header() {
103 echo ""
104 echo -e "${BOLD}${CYAN}════════════════════════════════════════════════════════════════${NC}"
105 echo -e "${BOLD}${CYAN} $*${NC}"
106 echo -e "${BOLD}${CYAN}════════════════════════════════════════════════════════════════${NC}"
107 echo ""
108}
109
110log_phase() {
111 echo ""
112 echo -e "${BOLD}${BLUE}┌──────────────────────────────────────────────────────────────┐${NC}"
113 echo -e "${BOLD}${BLUE}│ $*${NC}"
114 echo -e "${BOLD}${BLUE}└──────────────────────────────────────────────────────────────┘${NC}"
115}
116
117log_info() {
118 echo -e "${BLUE}[INFO]${NC} $*" >&2
119}
120
121log_success() {
122 echo -e "${GREEN}[OK]${NC} $*" >&2
123}
124
125log_warn() {
126 echo -e "${YELLOW}[WARN]${NC} $*" >&2
127}
128
129log_error() {
130 echo -e "${RED}[ERROR]${NC} $*" >&2
131}
132
133log_step() {
134 echo -e "${CYAN} →${NC} $*" >&2
135}
136
137# Default values
138PROD_RELAY=""
139ARCHIVE_RELAY=""
140PROD_GIT=""
141ARCHIVE_GIT=""
142SERVICE_NAME=""
143OUTPUT_DIR=""
144DRY_RUN=false
145CONTINUE_ON_ERROR=false
146LOG_SINCE=""
147LOG_UNTIL=""
148
149# Phase control
150SKIP_PHASE_1=false
151SKIP_PHASE_2=false
152SKIP_PHASE_3=false
153SKIP_PHASE_4=false
154SKIP_PHASE_5=false
155ONLY_PHASE=""
156FROM_PHASE=""
157
158# Timing
159declare -A PHASE_TIMES
160
161usage() {
162 head -73 "$0" | tail -n +3 | sed 's/^# //' | sed 's/^#//'
163 exit 0
164}
165
166# Parse command line arguments
167parse_args() {
168 while [[ $# -gt 0 ]]; do
169 case "$1" in
170 --prod-relay)
171 PROD_RELAY="$2"
172 shift 2
173 ;;
174 --archive-relay)
175 ARCHIVE_RELAY="$2"
176 shift 2
177 ;;
178 --prod-git)
179 PROD_GIT="$2"
180 shift 2
181 ;;
182 --archive-git)
183 ARCHIVE_GIT="$2"
184 shift 2
185 ;;
186 --service)
187 SERVICE_NAME="$2"
188 shift 2
189 ;;
190 --output)
191 OUTPUT_DIR="$2"
192 shift 2
193 ;;
194 --skip-phase-1)
195 SKIP_PHASE_1=true
196 shift
197 ;;
198 --skip-phase-2)
199 SKIP_PHASE_2=true
200 shift
201 ;;
202 --skip-phase-3)
203 SKIP_PHASE_3=true
204 shift
205 ;;
206 --skip-phase-4)
207 SKIP_PHASE_4=true
208 shift
209 ;;
210 --skip-phase-5)
211 SKIP_PHASE_5=true
212 shift
213 ;;
214 --only-phase-1|--only-phase-2|--only-phase-3|--only-phase-4|--only-phase-5)
215 ONLY_PHASE="${1#--only-phase-}"
216 shift
217 ;;
218 --from-phase-1|--from-phase-2|--from-phase-3|--from-phase-4|--from-phase-5)
219 FROM_PHASE="${1#--from-phase-}"
220 shift
221 ;;
222 --dry-run)
223 DRY_RUN=true
224 shift
225 ;;
226 --continue-on-error)
227 CONTINUE_ON_ERROR=true
228 shift
229 ;;
230 --since)
231 LOG_SINCE="$2"
232 shift 2
233 ;;
234 --until)
235 LOG_UNTIL="$2"
236 shift 2
237 ;;
238 --help|-h)
239 usage
240 ;;
241 *)
242 log_error "Unknown option: $1"
243 echo "Use --help for usage information."
244 exit 1
245 ;;
246 esac
247 done
248}
249
250# Validate required arguments
251validate_args() {
252 local errors=0
253
254 if [[ -z "$PROD_RELAY" ]]; then
255 log_error "Missing required option: --prod-relay"
256 errors=1
257 fi
258
259 if [[ -z "$ARCHIVE_RELAY" ]]; then
260 log_error "Missing required option: --archive-relay"
261 errors=1
262 fi
263
264 # Validate relay URLs
265 if [[ -n "$PROD_RELAY" && ! "$PROD_RELAY" =~ ^wss?:// ]]; then
266 log_error "Invalid prod relay URL: $PROD_RELAY (must start with ws:// or wss://)"
267 errors=1
268 fi
269
270 if [[ -n "$ARCHIVE_RELAY" && ! "$ARCHIVE_RELAY" =~ ^wss?:// ]]; then
271 log_error "Invalid archive relay URL: $ARCHIVE_RELAY (must start with ws:// or wss://)"
272 errors=1
273 fi
274
275 # Validate git paths if provided
276 if [[ -n "$PROD_GIT" && ! -d "$PROD_GIT" ]]; then
277 log_warn "Prod git directory not found: $PROD_GIT"
278 log_warn "Phase 2 will fail unless running on VPS with access to this path."
279 fi
280
281 if [[ -n "$ARCHIVE_GIT" && ! -d "$ARCHIVE_GIT" ]]; then
282 log_warn "Archive git directory not found: $ARCHIVE_GIT"
283 log_warn "Phase 2 will fail unless running on VPS with access to this path."
284 fi
285
286 if [[ $errors -eq 1 ]]; then
287 echo ""
288 echo "Use --help for usage information."
289 exit 1
290 fi
291}
292
293# Check prerequisites
294check_prerequisites() {
295 local missing=0
296
297 log_info "Checking prerequisites..."
298
299 # Required tools
300 for tool in git nak jq awk sort; do
301 if command -v "$tool" &> /dev/null; then
302 log_step "$tool: found"
303 else
304 log_error "$tool: NOT FOUND"
305 missing=1
306 fi
307 done
308
309 # Optional tools
310 if command -v journalctl &> /dev/null; then
311 log_step "journalctl: found (Phase 4 available)"
312 else
313 log_step "journalctl: not found (Phase 4 will be skipped)"
314 SKIP_PHASE_4=true
315 fi
316
317 if [[ $missing -eq 1 ]]; then
318 log_error "Missing required tools. Install them and try again."
319 exit 1
320 fi
321
322 # Check scripts exist
323 for script in 01-fetch-events.sh 10-check-git-sync.sh 20-categorize.sh 21-compare-relays.sh 22-compare-git-data.sh 30-extract-parse-failures.sh 31-extract-purgatory-expiry.sh 40-classify-actions.sh; do
324 if [[ ! -x "$SCRIPT_DIR/$script" ]]; then
325 log_error "Script not found or not executable: $SCRIPT_DIR/$script"
326 missing=1
327 fi
328 done
329
330 if [[ $missing -eq 1 ]]; then
331 exit 1
332 fi
333
334 log_success "All prerequisites satisfied"
335}
336
337# Determine which phases to run
338determine_phases() {
339 # Handle --only-phase-N
340 if [[ -n "$ONLY_PHASE" ]]; then
341 for i in 1 2 3 4 5; do
342 if [[ "$i" != "$ONLY_PHASE" ]]; then
343 eval "SKIP_PHASE_$i=true"
344 fi
345 done
346 fi
347
348 # Handle --from-phase-N
349 if [[ -n "$FROM_PHASE" ]]; then
350 for i in 1 2 3 4 5; do
351 if [[ "$i" -lt "$FROM_PHASE" ]]; then
352 eval "SKIP_PHASE_$i=true"
353 fi
354 done
355 fi
356
357 # Auto-skip Phase 2 if git paths not provided
358 if [[ -z "$PROD_GIT" && -z "$ARCHIVE_GIT" ]]; then
359 if [[ "$SKIP_PHASE_2" != "true" ]]; then
360 log_warn "No git paths provided. Phase 2 (git sync check) will be skipped."
361 log_warn "Use --prod-git and --archive-git to enable Phase 2."
362 SKIP_PHASE_2=true
363 fi
364 fi
365
366 # Auto-skip Phase 4 if service not provided
367 if [[ -z "$SERVICE_NAME" ]]; then
368 if [[ "$SKIP_PHASE_4" != "true" ]]; then
369 log_warn "No service name provided. Phase 4 (log extraction) will be skipped."
370 log_warn "Use --service to enable Phase 4."
371 SKIP_PHASE_4=true
372 fi
373 fi
374}
375
376# Setup output directory
377setup_output_dir() {
378 if [[ -z "$OUTPUT_DIR" ]]; then
379 OUTPUT_DIR="work/migration-analysis-$(date +%Y%m%d-%H%M)"
380 fi
381
382 log_info "Output directory: $OUTPUT_DIR"
383
384 if [[ "$DRY_RUN" == "true" ]]; then
385 log_info "[DRY RUN] Would create directory structure"
386 return
387 fi
388
389 mkdir -p "$OUTPUT_DIR"/{prod/raw,archive/raw,comparison,logs,results}
390
391 # Save configuration
392 cat > "$OUTPUT_DIR/config.txt" << EOF
393# Migration Analysis Configuration
394# Generated: $(date -Iseconds)
395
396PROD_RELAY=$PROD_RELAY
397ARCHIVE_RELAY=$ARCHIVE_RELAY
398PROD_GIT=$PROD_GIT
399ARCHIVE_GIT=$ARCHIVE_GIT
400SERVICE_NAME=$SERVICE_NAME
401OUTPUT_DIR=$OUTPUT_DIR
402EOF
403
404 log_success "Created output directory structure"
405}
406
407# Run a phase with timing and error handling
408run_phase() {
409 local phase_num="$1"
410 local phase_name="$2"
411 shift 2
412 local cmd=("$@")
413
414 local skip_var="SKIP_PHASE_$phase_num"
415 if [[ "${!skip_var}" == "true" ]]; then
416 log_phase "Phase $phase_num: $phase_name [SKIPPED]"
417 return 0
418 fi
419
420 log_phase "Phase $phase_num: $phase_name"
421
422 if [[ "$DRY_RUN" == "true" ]]; then
423 log_info "[DRY RUN] Would execute:"
424 for c in "${cmd[@]}"; do
425 echo " $c"
426 done
427 return 0
428 fi
429
430 local start_time
431 start_time=$(date +%s)
432
433 local exit_code=0
434
435 # Execute the command(s)
436 for c in "${cmd[@]}"; do
437 log_step "Running: $c"
438 if ! eval "$c"; then
439 exit_code=1
440 if [[ "$CONTINUE_ON_ERROR" == "true" ]]; then
441 log_warn "Command failed, continuing due to --continue-on-error"
442 else
443 log_error "Command failed"
444 break
445 fi
446 fi
447 done
448
449 local end_time
450 end_time=$(date +%s)
451 local duration=$((end_time - start_time))
452 PHASE_TIMES[$phase_num]=$duration
453
454 if [[ $exit_code -eq 0 ]]; then
455 log_success "Phase $phase_num completed in ${duration}s"
456 else
457 log_error "Phase $phase_num failed after ${duration}s"
458 if [[ "$CONTINUE_ON_ERROR" != "true" ]]; then
459 return 1
460 fi
461 fi
462
463 return $exit_code
464}
465
466# Phase 1: Fetch events
467run_phase_1() {
468 local cmds=()
469
470 # Fetch from prod relay
471 cmds+=("'$SCRIPT_DIR/01-fetch-events.sh' '$PROD_RELAY' '$OUTPUT_DIR/prod'")
472
473 # Fetch from archive relay
474 cmds+=("'$SCRIPT_DIR/01-fetch-events.sh' '$ARCHIVE_RELAY' '$OUTPUT_DIR/archive'")
475
476 run_phase 1 "Fetch Events (~30s each)" "${cmds[@]}"
477}
478
479# Phase 2: Git sync check
480run_phase_2() {
481 local cmds=()
482
483 if [[ -n "$PROD_GIT" ]]; then
484 cmds+=("'$SCRIPT_DIR/10-check-git-sync.sh' '$OUTPUT_DIR/prod/raw/state-events.json' '$PROD_GIT' '$OUTPUT_DIR/prod' --categorize")
485 else
486 log_warn "Skipping prod git sync check (no --prod-git provided)"
487 fi
488
489 if [[ -n "$ARCHIVE_GIT" ]]; then
490 cmds+=("'$SCRIPT_DIR/10-check-git-sync.sh' '$OUTPUT_DIR/archive/raw/state-events.json' '$ARCHIVE_GIT' '$OUTPUT_DIR/archive' --categorize")
491 else
492 log_warn "Skipping archive git sync check (no --archive-git provided)"
493 fi
494
495 if [[ ${#cmds[@]} -eq 0 ]]; then
496 log_warn "No git paths provided, skipping Phase 2"
497 return 0
498 fi
499
500 run_phase 2 "Git Sync Check (~20 min each)" "${cmds[@]}"
501}
502
503# Phase 3: Categorize and compare
504run_phase_3() {
505 local cmds=()
506
507 # Check if we have git-sync-status.tsv files (from Phase 2)
508 # If not, we can't run categorization
509 local has_prod_sync=false
510 local has_archive_sync=false
511
512 if [[ -f "$OUTPUT_DIR/prod/git-sync-status.tsv" ]]; then
513 has_prod_sync=true
514 fi
515
516 if [[ -f "$OUTPUT_DIR/archive/git-sync-status.tsv" ]]; then
517 has_archive_sync=true
518 fi
519
520 # Run categorization if we have sync data but no category files
521 if [[ "$has_prod_sync" == "true" && ! -f "$OUTPUT_DIR/prod/category1-complete-match.txt" ]]; then
522 cmds+=("'$SCRIPT_DIR/20-categorize.sh' '$OUTPUT_DIR/prod/git-sync-status.tsv' '$OUTPUT_DIR/prod'")
523 fi
524
525 if [[ "$has_archive_sync" == "true" && ! -f "$OUTPUT_DIR/archive/category1-complete-match.txt" ]]; then
526 cmds+=("'$SCRIPT_DIR/20-categorize.sh' '$OUTPUT_DIR/archive/git-sync-status.tsv' '$OUTPUT_DIR/archive'")
527 fi
528
529 # Run comparison if we have category files
530 if [[ -f "$OUTPUT_DIR/prod/category1-complete-match.txt" && -f "$OUTPUT_DIR/archive/category1-complete-match.txt" ]]; then
531 cmds+=("'$SCRIPT_DIR/21-compare-relays.sh' '$OUTPUT_DIR/prod' '$OUTPUT_DIR/archive' '$OUTPUT_DIR/comparison'")
532 else
533 log_warn "Missing category files for comparison."
534 log_warn "Phase 2 must complete successfully before Phase 3 can compare relays."
535
536 # Create placeholder comparison files if they don't exist
537 if [[ "$DRY_RUN" != "true" ]]; then
538 mkdir -p "$OUTPUT_DIR/comparison"
539 for f in complete-in-both.txt complete-prod-missing-archive.txt complete-prod-incomplete-archive.txt incomplete-in-both.txt in-archive-not-prod.txt; do
540 if [[ ! -f "$OUTPUT_DIR/comparison/$f" ]]; then
541 echo "# Placeholder - Phase 2 data not available" > "$OUTPUT_DIR/comparison/$f"
542 fi
543 done
544 echo "# Comparison not available - Phase 2 data missing" > "$OUTPUT_DIR/comparison/summary.txt"
545 fi
546 fi
547
548 if [[ ${#cmds[@]} -eq 0 ]]; then
549 log_warn "No categorization or comparison needed (already done or missing input)"
550 return 0
551 fi
552
553 run_phase 3 "Categorize & Compare (fast)" "${cmds[@]}"
554
555 # Phase 3c: Compare git data between relays (requires git paths)
556 # This determines if archive is ahead of prod for repos with mismatched state
557 if [[ -n "$PROD_GIT" && -n "$ARCHIVE_GIT" ]]; then
558 # Build list of repos to compare: those where prod=complete but archive is not
559 local repos_to_compare="$OUTPUT_DIR/comparison/complete-prod-incomplete-archive.txt"
560 if [[ -f "$repos_to_compare" ]] && [[ ! -f "$OUTPUT_DIR/comparison/git-ancestry.tsv" ]]; then
561 log_info "Running git ancestry comparison (Phase 3c)..."
562 run_phase 3 "Git Ancestry Comparison" "'$SCRIPT_DIR/22-compare-git-data.sh' '$PROD_GIT' '$ARCHIVE_GIT' '$repos_to_compare' '$OUTPUT_DIR/comparison'"
563 fi
564 else
565 log_warn "Git paths not provided - skipping git ancestry comparison"
566 log_warn "Without git comparison, repos where archive is ahead will be incorrectly flagged as needing re-sync"
567 fi
568}
569
570# Phase 4: Extract logs
571run_phase_4() {
572 if [[ -z "$SERVICE_NAME" ]]; then
573 log_warn "No service name provided, skipping Phase 4"
574 return 0
575 fi
576
577 # Validate service name before running Phase 4
578 # Structured logging only exists in ngit-grasp, not ngit-relay
579 if [[ "$SERVICE_NAME" == *"ngit-relay"* ]]; then
580 log_error "SERVICE_NAME appears to be ngit-relay: $SERVICE_NAME"
581 log_error ""
582 log_error "Phase 4 requires an ngit-grasp service with structured logging."
583 log_error "Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) only exists"
584 log_error "in ngit-grasp services, NOT in ngit-relay services."
585 log_error ""
586 log_error "Please update --service to use the ngit-grasp archive service."
587 log_error ""
588 log_error "To find the correct service name:"
589 log_error " systemctl list-units 'ngit-grasp*' --all"
590 log_error ""
591 log_error "Common ngit-grasp service names:"
592 log_error " - ngit-grasp.service"
593 log_error " - ngit-grasp-relay-ngit-dev.service (NixOS multi-instance)"
594 log_error " - ngit-grasp-archive.service"
595 return 1
596 fi
597
598 # Warn if service name doesn't look like ngit-grasp
599 if [[ "$SERVICE_NAME" != *"ngit-grasp"* && "$SERVICE_NAME" != *"grasp"* ]]; then
600 log_warn "SERVICE_NAME doesn't contain 'ngit-grasp': $SERVICE_NAME"
601 log_warn "Structured logging only exists in ngit-grasp services."
602 log_warn "If this is not an ngit-grasp service, Phase 4 will find no logs."
603 fi
604
605 local cmds=()
606
607 # Build log extraction options
608 local log_opts=""
609 if [[ -n "$LOG_SINCE" ]]; then
610 log_opts="$log_opts --since '$LOG_SINCE'"
611 fi
612 if [[ -n "$LOG_UNTIL" ]]; then
613 log_opts="$log_opts --until '$LOG_UNTIL'"
614 fi
615
616 cmds+=("'$SCRIPT_DIR/30-extract-parse-failures.sh' '$SERVICE_NAME' '$OUTPUT_DIR/logs' $log_opts")
617 cmds+=("'$SCRIPT_DIR/31-extract-purgatory-expiry.sh' '$SERVICE_NAME' '$OUTPUT_DIR/logs' $log_opts")
618
619 run_phase 4 "Extract Logs (VPS required)" "${cmds[@]}"
620}
621
622# Phase 5: Final classification
623run_phase_5() {
624 # Check if we have the minimum required files
625 local can_run=true
626
627 if [[ ! -d "$OUTPUT_DIR/prod" ]]; then
628 log_warn "Missing prod directory"
629 can_run=false
630 fi
631
632 if [[ ! -d "$OUTPUT_DIR/archive" ]]; then
633 log_warn "Missing archive directory"
634 can_run=false
635 fi
636
637 if [[ ! -d "$OUTPUT_DIR/comparison" ]]; then
638 log_warn "Missing comparison directory"
639 can_run=false
640 fi
641
642 # Create logs directory with empty files if missing
643 if [[ "$DRY_RUN" != "true" ]]; then
644 mkdir -p "$OUTPUT_DIR/logs"
645 for f in parse-failures.txt purgatory-expired.txt; do
646 if [[ ! -f "$OUTPUT_DIR/logs/$f" ]]; then
647 echo "# No data - Phase 4 not run" > "$OUTPUT_DIR/logs/$f"
648 fi
649 done
650 fi
651
652 if [[ "$can_run" == "false" ]]; then
653 log_error "Cannot run Phase 5 - missing required input directories"
654 return 1
655 fi
656
657 run_phase 5 "Final Classification (fast)" "'$SCRIPT_DIR/40-classify-actions.sh' '$OUTPUT_DIR'"
658}
659
660# Display summary
661display_summary() {
662 log_header "Migration Analysis Complete"
663
664 echo "Output Directory: $OUTPUT_DIR"
665 echo ""
666
667 # Phase timing summary
668 echo "Phase Timing:"
669 local total_time=0
670 for phase in 1 2 3 4 5; do
671 local skip_var="SKIP_PHASE_$phase"
672 if [[ "${!skip_var}" == "true" ]]; then
673 echo " Phase $phase: SKIPPED"
674 elif [[ -n "${PHASE_TIMES[$phase]:-}" ]]; then
675 local t="${PHASE_TIMES[$phase]}"
676 echo " Phase $phase: ${t}s"
677 total_time=$((total_time + t))
678 else
679 echo " Phase $phase: N/A"
680 fi
681 done
682 echo " ─────────────"
683 echo " Total: ${total_time}s"
684 echo ""
685
686 # Results summary
687 if [[ -f "$OUTPUT_DIR/results/summary.txt" ]]; then
688 echo "Results Summary:"
689 echo ""
690 # Extract key metrics from summary
691 if grep -q "No Action Required" "$OUTPUT_DIR/results/summary.txt"; then
692 grep -A1 "No Action Required" "$OUTPUT_DIR/results/summary.txt" | head -2
693 fi
694 if grep -q "Action Required" "$OUTPUT_DIR/results/summary.txt"; then
695 grep -A1 "Action Required" "$OUTPUT_DIR/results/summary.txt" | head -2
696 fi
697 if grep -q "Manual Investigation" "$OUTPUT_DIR/results/summary.txt"; then
698 grep -A1 "Manual Investigation" "$OUTPUT_DIR/results/summary.txt" | head -2
699 fi
700 echo ""
701 fi
702
703 # Output files
704 echo "Output Files:"
705 echo " $OUTPUT_DIR/results/no-action-required.txt"
706 echo " $OUTPUT_DIR/results/action-required.txt"
707 echo " $OUTPUT_DIR/results/manual-investigation.txt"
708 echo " $OUTPUT_DIR/results/summary.txt"
709 echo ""
710
711 # Next steps
712 echo "Next Steps:"
713 echo " 1. Review results/summary.txt for overview"
714 echo " 2. Address items in results/action-required.txt"
715 echo " 3. Investigate items in results/manual-investigation.txt"
716 echo " 4. Plan migration window when action items are resolved"
717 echo ""
718}
719
720# Main
721main() {
722 parse_args "$@"
723
724 log_header "GRASP Relay to ngit-grasp Migration Analysis"
725
726 validate_args
727 check_prerequisites
728 determine_phases
729 setup_output_dir
730
731 # Show configuration
732 log_info "Configuration:"
733 log_step "Prod relay: $PROD_RELAY"
734 log_step "Archive relay: $ARCHIVE_RELAY"
735 [[ -n "$PROD_GIT" ]] && log_step "Prod git: $PROD_GIT"
736 [[ -n "$ARCHIVE_GIT" ]] && log_step "Archive git: $ARCHIVE_GIT"
737 [[ -n "$SERVICE_NAME" ]] && log_step "Service: $SERVICE_NAME"
738 log_step "Output: $OUTPUT_DIR"
739 echo ""
740
741 # Show phase plan
742 log_info "Phase Plan:"
743 for phase in 1 2 3 4 5; do
744 local skip_var="SKIP_PHASE_$phase"
745 if [[ "${!skip_var}" == "true" ]]; then
746 log_step "Phase $phase: SKIP"
747 else
748 log_step "Phase $phase: RUN"
749 fi
750 done
751 echo ""
752
753 if [[ "$DRY_RUN" == "true" ]]; then
754 log_warn "DRY RUN MODE - No changes will be made"
755 echo ""
756 fi
757
758 # Run phases
759 local overall_exit=0
760
761 run_phase_1 || overall_exit=1
762 run_phase_2 || overall_exit=1
763 run_phase_3 || overall_exit=1
764 run_phase_4 || overall_exit=1
765 run_phase_5 || overall_exit=1
766
767 # Display summary
768 if [[ "$DRY_RUN" != "true" ]]; then
769 display_summary
770 fi
771
772 if [[ $overall_exit -ne 0 ]]; then
773 log_warn "Some phases failed. Review output for details."
774 fi
775
776 exit $overall_exit
777}
778
779main "$@"
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/validate-service.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/validate-service.sh
new file mode 100755
index 0000000..6988af3
--- /dev/null
+++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/validate-service.sh
@@ -0,0 +1,151 @@
1#!/usr/bin/env bash
2#
3# validate-service.sh - Validate service name for structured logging
4#
5# This helper script validates that a service name is appropriate for
6# Phase 4 log extraction. Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED])
7# only exists in ngit-grasp services, NOT in ngit-relay services.
8#
9# USAGE:
10# Source this script and call the validation function:
11#
12# source validate-service.sh
13# validate_service_for_structured_logging "$SERVICE_NAME" || exit 1
14#
15# BACKGROUND:
16# Phase 4 of the migration analysis extracts structured log entries from
17# journald. These log entries only exist in ngit-grasp services. If you
18# accidentally specify an ngit-relay service, Phase 4 will find no logs
19# and produce empty results.
20#
21# This validation prevents that common mistake by:
22# 1. Checking if the service name contains "ngit-relay" (error)
23# 2. Warning if the service name doesn't contain "ngit-grasp"
24# 3. Optionally checking if structured logs actually exist
25#
26# SEE ALSO:
27# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide
28# 30-extract-parse-failures.sh - Uses this validation
29# 31-extract-purgatory-expiry.sh - Uses this validation
30#
31
32# Colors for output (disabled if not a terminal)
33if [[ -t 1 ]]; then
34 _VS_RED='\033[0;31m'
35 _VS_YELLOW='\033[0;33m'
36 _VS_NC='\033[0m'
37else
38 _VS_RED=''
39 _VS_YELLOW=''
40 _VS_NC=''
41fi
42
43# Validates that the service name is appropriate for structured logging
44#
45# Arguments:
46# $1 - service_name: The systemd service name to validate
47# $2 - check_logs: Whether to check if logs actually exist (default: "true")
48# $3 - interactive: Whether to prompt for confirmation (default: "true")
49#
50# Returns:
51# 0 - Service is valid for structured logging
52# 1 - Service is invalid or user declined to continue
53#
54# Example:
55# validate_service_for_structured_logging "ngit-grasp.service" || exit 1
56# validate_service_for_structured_logging "ngit-grasp.service" "false" # Skip log check
57# validate_service_for_structured_logging "ngit-grasp.service" "true" "false" # Non-interactive
58#
59validate_service_for_structured_logging() {
60 local service_name="$1"
61 local check_logs="${2:-true}"
62 local interactive="${3:-true}"
63
64 # Check if service name looks like ngit-relay (ERROR - wrong service type)
65 if [[ "$service_name" == *"ngit-relay"* ]]; then
66 echo -e "${_VS_RED}ERROR: Service name appears to be ngit-relay: $service_name${_VS_NC}" >&2
67 echo "" >&2
68 echo "Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) only exists in" >&2
69 echo "ngit-grasp services, NOT in ngit-relay services." >&2
70 echo "" >&2
71 echo "Please use the ngit-grasp archive service instead." >&2
72 echo "" >&2
73 echo "To find the correct service name:" >&2
74 echo " systemctl list-units 'ngit-grasp*' --all" >&2
75 echo "" >&2
76 echo "Common ngit-grasp service names:" >&2
77 echo " - ngit-grasp.service" >&2
78 echo " - ngit-grasp-relay-ngit-dev.service (NixOS multi-instance)" >&2
79 echo " - ngit-grasp-archive.service" >&2
80 return 1
81 fi
82
83 # Check if service name looks like ngit-grasp (WARNING if not)
84 if [[ "$service_name" != *"ngit-grasp"* && "$service_name" != *"grasp"* ]]; then
85 echo -e "${_VS_YELLOW}WARNING: Service name doesn't contain 'ngit-grasp': $service_name${_VS_NC}" >&2
86 echo "" >&2
87 echo "Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) only exists in" >&2
88 echo "ngit-grasp services." >&2
89 echo "" >&2
90
91 if [[ "$interactive" == "true" ]]; then
92 read -p "Continue anyway? (y/N) " -n 1 -r
93 echo
94 if [[ ! $REPLY =~ ^[Yy]$ ]]; then
95 return 1
96 fi
97 else
98 echo "Non-interactive mode: proceeding despite warning" >&2
99 fi
100 fi
101
102 # Optionally check if structured logs actually exist
103 if [[ "$check_logs" == "true" ]]; then
104 # Check if journalctl is available
105 if ! command -v journalctl &> /dev/null; then
106 echo -e "${_VS_YELLOW}WARNING: journalctl not available, cannot verify logs exist${_VS_NC}" >&2
107 return 0
108 fi
109
110 # Check for structured log entries
111 # IMPORTANT: Use --no-pager to prevent hanging when run non-interactively (e.g., via SSH)
112 local has_parse_fail has_purgatory
113 has_parse_fail=$(journalctl --no-pager -u "$service_name" --since "7 days ago" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0")
114 has_purgatory=$(journalctl --no-pager -u "$service_name" --since "7 days ago" 2>/dev/null | grep -c '\[PURGATORY_EXPIRED\]' || echo "0")
115
116 # Strip any non-numeric characters (grep -c can have trailing whitespace)
117 has_parse_fail="${has_parse_fail//[^0-9]/}"
118 has_purgatory="${has_purgatory//[^0-9]/}"
119 has_parse_fail="${has_parse_fail:-0}"
120 has_purgatory="${has_purgatory:-0}"
121
122 if [[ "$has_parse_fail" -eq 0 && "$has_purgatory" -eq 0 ]]; then
123 echo -e "${_VS_YELLOW}WARNING: No structured logs found in $service_name (last 7 days)${_VS_NC}" >&2
124 echo "" >&2
125 echo "This may indicate:" >&2
126 echo " 1. Wrong service (should be ngit-grasp archive service, not ngit-relay)" >&2
127 echo " 2. Structured logging not yet deployed to this ngit-grasp instance" >&2
128 echo " 3. No parse failures or purgatory expiry events in the time window" >&2
129 echo "" >&2
130 echo "To verify you have the right service:" >&2
131 echo " systemctl list-units 'ngit-grasp*' --all" >&2
132 echo " journalctl -u <service> | grep -E '\\[PARSE_FAIL\\]|\\[PURGATORY_EXPIRED\\]' | head -5" >&2
133 echo "" >&2
134
135 if [[ "$interactive" == "true" ]]; then
136 read -p "Continue anyway? (y/N) " -n 1 -r
137 echo
138 if [[ ! $REPLY =~ ^[Yy]$ ]]; then
139 return 1
140 fi
141 else
142 echo "Non-interactive mode: proceeding despite warning" >&2
143 fi
144 fi
145 fi
146
147 return 0
148}
149
150# Export the function so it can be used after sourcing
151export -f validate_service_for_structured_logging