upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh
diff options
context:
space:
mode:
authorDanConwayDev <DanConwayDev@protonmail.com>2026-02-03 14:41:46 +0000
committerDanConwayDev <DanConwayDev@protonmail.com>2026-02-03 14:46:09 +0000
commit92a9a3bfe0bc522e8ae411991a366a3a6310d525 (patch)
tree9fc5045a9df0ef56cc8ad37afaef09fad37d95ed /docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh
parentf148b3a0e4b032c0acf835cda6d2935e19b9f67e (diff)
docs: archive relay.ngit.dev migration materials for reference
Move migration guide and scripts to docs/archive/2026-01-relay-ngit-dev-migration/ with clear warnings that these are reference-only materials from a specific migration context, not general-purpose tools. These materials document the relay.ngit.dev migration from ngit-relay to ngit-grasp in January 2026. The scripts were developed iteratively during the migration and are specific to that context. They are preserved for: - Historical reference - Context for production fixes in this branch - Inspiration for future migrations (not direct reuse) The migration uncovered critical bugs now fixed in this branch: - Git protocol error handling - Naughty list false positives - Purgatory event tracking - Sync startup issues - Configuration management
Diffstat (limited to 'docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh')
-rwxr-xr-xdocs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh564
1 files changed, 564 insertions, 0 deletions
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh
new file mode 100755
index 0000000..b4536cb
--- /dev/null
+++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh
@@ -0,0 +1,564 @@
1#!/usr/bin/env bash
2#
3# 10-check-git-sync.sh - Compare state events to actual git data on disk
4#
5# PHASE 2 of the GRASP relay to ngit-grasp migration analysis pipeline.
6# Compares kind 30618 state events against actual git refs on disk.
7#
8# USAGE:
9# ./10-check-git-sync.sh <state-events.json> <git-base-dir> <output-dir> [--categorize]
10#
11# EXAMPLES:
12# # Check source relay against source git data
13# ./10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod
14#
15# # Check target relay against target git data
16# ./10-check-git-sync.sh output/archive/raw/state-events.json /var/lib/ngit-grasp/git output/archive
17#
18# # Check and categorize in one step (convenience mode)
19# ./10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod --categorize
20#
21# INPUT:
22# state-events.json - JSONL file from Phase 1 (01-fetch-events.sh)
23# One kind 30618 event per line
24# git-base-dir - Base directory containing git repos
25# Structure: <git-base>/<npub>/<repo>.git/
26#
27# OUTPUT:
28# <output-dir>/git-sync-status.tsv - Tab-separated values:
29# repo<TAB>npub<TAB>state_refs<TAB>git_refs<TAB>matches<TAB>reason
30#
31# With --categorize flag, also outputs:
32# <output-dir>/category1-complete-match.txt
33# <output-dir>/category2-empty-blank.txt
34# <output-dir>/category3-partial-match.txt
35# <output-dir>/category4-no-match.txt
36#
37# CATEGORIES:
38# 1. Complete Match - All refs in state event match git data perfectly
39# 2. Empty/Blank - No git data available (directory missing or empty)
40# 3. Partial Match - Some refs match, some don't
41# 4. No Match - Git data exists but commit hashes don't match
42#
43# PREREQUISITES:
44# - nak (for npub encoding) - https://github.com/fiatjaf/nak
45# - jq (for JSON parsing)
46# - Read access to git directories (may need sudo)
47#
48# RUNTIME: ~20 minutes on VPS (git operations are slow)
49#
50# NOTES:
51# - Must run on VPS with access to git directories
52# - Progress indicator updates every 10 events
53# - Handles packed refs (git show-ref) and loose refs
54#
55# SEE ALSO:
56# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide
57# 01-fetch-events.sh - Phase 1 script that produces input for this script
58# 20-categorize.sh - Phase 3a script that consumes output from this script
59#
60
61set -euo pipefail
62
63# Colors for output (disabled if not a terminal)
64if [[ -t 1 ]]; then
65 RED='\033[0;31m'
66 GREEN='\033[0;32m'
67 YELLOW='\033[0;33m'
68 BLUE='\033[0;34m'
69 NC='\033[0m'
70else
71 RED=''
72 GREEN=''
73 YELLOW=''
74 BLUE=''
75 NC=''
76fi
77
78log_info() {
79 echo -e "${BLUE}[INFO]${NC} $*" >&2
80}
81
82log_success() {
83 echo -e "${GREEN}[OK]${NC} $*" >&2
84}
85
86log_warn() {
87 echo -e "${YELLOW}[WARN]${NC} $*" >&2
88}
89
90log_error() {
91 echo -e "${RED}[ERROR]${NC} $*" >&2
92}
93
94log_progress() {
95 # Overwrite current line for progress updates
96 echo -ne "\r${BLUE}[PROGRESS]${NC} $*" >&2
97}
98
99usage() {
100 echo "Usage: $0 <state-events.json> <git-base-dir> <output-dir> [--categorize]"
101 echo ""
102 echo "Arguments:"
103 echo " state-events.json JSONL file from Phase 1 (kind 30618 events)"
104 echo " git-base-dir Base directory for git repos (e.g., /var/lib/grasp-relay/git)"
105 echo " output-dir Directory to store output files"
106 echo " --categorize Optional: also output category files (like Phase 3)"
107 echo ""
108 echo "Examples:"
109 echo " $0 output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod"
110 echo " $0 output/archive/raw/state-events.json /var/lib/ngit-grasp/git output/archive"
111 echo ""
112 echo "Output:"
113 echo " git-sync-status.tsv - TSV with: repo, npub, state_refs, git_refs, matches, reason"
114 exit 1
115}
116
117# Check prerequisites
118check_prerequisites() {
119 local missing=0
120
121 if ! command -v git &> /dev/null; then
122 log_error "git not found. Install with your package manager."
123 missing=1
124 fi
125
126 if ! command -v nak &> /dev/null; then
127 log_error "nak not found. Install from: https://github.com/fiatjaf/nak"
128 log_error "Or run: nix-shell -p nak jq --run \"$0 $*\""
129 missing=1
130 fi
131
132 if ! command -v jq &> /dev/null; then
133 log_error "jq not found. Install with your package manager."
134 missing=1
135 fi
136
137 if [[ $missing -eq 1 ]]; then
138 exit 1
139 fi
140}
141
142# Convert hex pubkey to npub
143# Args: $1=hex_pubkey
144# Returns: npub string or empty on error
145hex_to_npub() {
146 local hex="$1"
147 nak encode npub "$hex" 2>/dev/null || echo ""
148}
149
150# Count refs in state event (only refs/heads/)
151# Args: $1=event_json
152# Returns: count
153count_state_refs() {
154 local event="$1"
155 echo "$event" | jq '[.tags[] | select(.[0] | startswith("refs/heads/"))] | length' 2>/dev/null || echo "0"
156}
157
158# Get git refs from disk
159# Args: $1=git_dir
160# Returns: count of refs/heads/ refs
161count_git_refs() {
162 local git_dir="$1"
163
164 if [[ ! -d "$git_dir" ]]; then
165 echo "0"
166 return
167 fi
168
169 # Try git show-ref first (handles packed refs correctly)
170 # Note: We capture output separately to avoid pipefail issues
171 local count
172 if count=$(git --git-dir="$git_dir" show-ref --heads 2>/dev/null | wc -l); then
173 echo "$count" | tr -d ' '
174 return
175 fi
176
177 # Fallback: count loose refs (when git is not available or fails)
178 if [[ -d "$git_dir/refs/heads" ]]; then
179 find "$git_dir/refs/heads" -type f 2>/dev/null | wc -l | tr -d ' '
180 else
181 echo "0"
182 fi
183}
184
185# Get ref hash from git directory
186# Args: $1=git_dir, $2=ref_path (e.g., refs/heads/main)
187# Returns: commit hash or empty
188get_git_ref_hash() {
189 local git_dir="$1"
190 local ref_path="$2"
191
192 # Try git show-ref first (handles packed refs)
193 local hash
194 hash=$(git --git-dir="$git_dir" show-ref --hash "$ref_path" 2>/dev/null | head -1 || echo "")
195
196 if [[ -n "$hash" ]]; then
197 echo "$hash"
198 return
199 fi
200
201 # Fallback: read loose ref file
202 local ref_file="$git_dir/$ref_path"
203 if [[ -f "$ref_file" ]]; then
204 cat "$ref_file" 2>/dev/null | tr -d '\n' || echo ""
205 else
206 echo ""
207 fi
208}
209
210# Compare state event refs to git refs
211# Args: $1=event_json, $2=git_dir
212# Returns: count of matching refs
213count_matching_refs() {
214 local event="$1"
215 local git_dir="$2"
216 local matching=0
217
218 # Extract refs/heads/ tags and compare
219 while IFS= read -r ref_tag; do
220 [[ -z "$ref_tag" ]] && continue
221
222 local ref_path expected_hash
223 ref_path=$(echo "$ref_tag" | jq -r '.[0]' 2>/dev/null || echo "")
224 expected_hash=$(echo "$ref_tag" | jq -r '.[1]' 2>/dev/null || echo "")
225
226 # Skip if not a heads ref or hash is missing
227 [[ ! "$ref_path" =~ ^refs/heads/ ]] && continue
228 [[ -z "$expected_hash" || "$expected_hash" == "null" ]] && continue
229
230 # Get actual hash from git
231 local actual_hash
232 actual_hash=$(get_git_ref_hash "$git_dir" "$ref_path")
233
234 if [[ "$expected_hash" == "$actual_hash" ]]; then
235 matching=$((matching + 1))
236 fi
237 done < <(echo "$event" | jq -c '.tags[] | select(.[0] | startswith("refs/heads/"))' 2>/dev/null)
238
239 echo "$matching"
240}
241
242# Categorize a single entry
243# Args: $1=state_refs, $2=git_refs, $3=matches, $4=reason
244# Returns: category number (1-4)
245categorize_entry() {
246 local state_refs="$1"
247 local git_refs="$2"
248 local matches="$3"
249 local reason="$4"
250
251 # Category 2: Empty/Blank
252 if [[ -n "$reason" ]] || [[ "$git_refs" -eq 0 ]]; then
253 echo "2"
254 return
255 fi
256
257 # Category 1: Complete Match
258 if [[ "$state_refs" -gt 0 ]] && [[ "$state_refs" -eq "$git_refs" ]] && [[ "$matches" -eq "$state_refs" ]]; then
259 echo "1"
260 return
261 fi
262
263 # Category 4: No Match
264 if [[ "$git_refs" -gt 0 ]] && [[ "$matches" -eq 0 ]]; then
265 echo "4"
266 return
267 fi
268
269 # Category 3: Partial Match (default for anything else with matches > 0)
270 if [[ "$matches" -gt 0 ]]; then
271 echo "3"
272 return
273 fi
274
275 # Fallback to category 2
276 echo "2"
277}
278
279# Format entry for category file
280# Args: $1=repo, $2=npub, $3=state_refs, $4=git_refs, $5=matches, $6=reason
281format_category_line() {
282 local repo="$1"
283 local npub="$2"
284 local state_refs="$3"
285 local git_refs="$4"
286 local matches="$5"
287 local reason="$6"
288
289 if [[ -n "$reason" ]]; then
290 echo "$repo | $npub | state_refs=$state_refs | git_refs=$git_refs | matches=$matches | reason=$reason"
291 else
292 echo "$repo | $npub | state_refs=$state_refs | git_refs=$git_refs | matches=$matches"
293 fi
294}
295
296# Process a single state event
297# Args: $1=event_json, $2=git_base
298# Outputs: TSV line to stdout
299process_event() {
300 local event="$1"
301 local git_base="$2"
302
303 # Extract repository identifier (d tag)
304 local identifier
305 identifier=$(echo "$event" | jq -r '.tags[] | select(.[0] == "d") | .[1]' 2>/dev/null | head -1 || echo "")
306
307 if [[ -z "$identifier" ]]; then
308 return 1
309 fi
310
311 # Extract maintainer pubkey (hex)
312 local hex_pubkey
313 hex_pubkey=$(echo "$event" | jq -r '.pubkey' 2>/dev/null || echo "")
314
315 if [[ -z "$hex_pubkey" ]]; then
316 return 1
317 fi
318
319 # Convert to npub
320 local npub
321 npub=$(hex_to_npub "$hex_pubkey")
322
323 if [[ -z "$npub" ]]; then
324 return 1
325 fi
326
327 # Count state refs
328 local state_refs
329 state_refs=$(count_state_refs "$event")
330
331 # Find git directory
332 local git_dir="$git_base/${npub}/${identifier}.git"
333
334 # Check git directory status
335 local git_refs=0
336 local matches=0
337 local reason=""
338
339 if [[ ! -d "$git_dir" ]]; then
340 reason="no_git_dir"
341 elif [[ ! -d "$git_dir/refs/heads" ]] && [[ ! -f "$git_dir/packed-refs" ]]; then
342 reason="empty_refs"
343 else
344 git_refs=$(count_git_refs "$git_dir")
345
346 if [[ "$git_refs" -eq 0 ]]; then
347 reason="empty_refs"
348 elif [[ "$state_refs" -eq 0 ]]; then
349 reason="no_state_refs"
350 else
351 matches=$(count_matching_refs "$event" "$git_dir")
352 fi
353 fi
354
355 # Output TSV line: repo, npub, state_refs, git_refs, matches, reason
356 printf '%s\t%s\t%s\t%s\t%s\t%s\n' "$identifier" "$npub" "$state_refs" "$git_refs" "$matches" "$reason"
357}
358
359# Main
360main() {
361 local do_categorize=0
362 local args=()
363
364 # Parse arguments
365 for arg in "$@"; do
366 if [[ "$arg" == "--categorize" ]]; then
367 do_categorize=1
368 else
369 args+=("$arg")
370 fi
371 done
372
373 if [[ ${#args[@]} -ne 3 ]]; then
374 usage
375 fi
376
377 local state_events_file="${args[0]}"
378 local git_base="${args[1]}"
379 local output_dir="${args[2]}"
380
381 # Validate inputs
382 if [[ ! -f "$state_events_file" ]]; then
383 log_error "State events file not found: $state_events_file"
384 exit 1
385 fi
386
387 if [[ ! -d "$git_base" ]]; then
388 log_error "Git base directory not found: $git_base"
389 log_error "This script must run on the VPS with access to git directories."
390 exit 1
391 fi
392
393 # Check read permissions
394 if ! ls "$git_base" >/dev/null 2>&1; then
395 log_error "Cannot read git base directory (permission denied): $git_base"
396 log_error "Try running with sudo or grant read permissions."
397 exit 1
398 fi
399
400 check_prerequisites
401
402 log_info "=== Git State Synchronization Check ==="
403 log_info "State events: $state_events_file"
404 log_info "Git base: $git_base"
405 log_info "Output: $output_dir"
406 if [[ $do_categorize -eq 1 ]]; then
407 log_info "Mode: TSV + categorization"
408 else
409 log_info "Mode: TSV only (use 20-categorize.sh for categories)"
410 fi
411 log_info "Started: $(date)"
412 echo ""
413
414 # Create output directory
415 mkdir -p "$output_dir"
416
417 # Output files
418 local tsv_file="$output_dir/git-sync-status.tsv"
419
420 # Initialize TSV with header
421 echo -e "repo\tnpub\tstate_refs\tgit_refs\tmatches\treason" > "$tsv_file"
422
423 # Initialize category files if categorizing
424 local cat1="" cat2="" cat3="" cat4=""
425 if [[ $do_categorize -eq 1 ]]; then
426 cat1="$output_dir/category1-complete-match.txt"
427 cat2="$output_dir/category2-empty-blank.txt"
428 cat3="$output_dir/category3-partial-match.txt"
429 cat4="$output_dir/category4-no-match.txt"
430 > "$cat1"
431 > "$cat2"
432 > "$cat3"
433 > "$cat4"
434 fi
435
436 # Count total events
437 local total_events
438 total_events=$(wc -l < "$state_events_file" | tr -d ' ')
439 log_info "Processing $total_events state events..."
440 echo ""
441
442 # Process each event
443 local count=0
444 local processed=0
445 local skipped=0
446 local count_cat1=0 count_cat2=0 count_cat3=0 count_cat4=0
447 local start_time
448 start_time=$(date +%s)
449
450 while IFS= read -r event; do
451 count=$((count + 1))
452
453 # Skip empty lines
454 [[ -z "$event" ]] && continue
455
456 # Process event
457 local result
458 if result=$(process_event "$event" "$git_base"); then
459 processed=$((processed + 1))
460
461 # Write to TSV (skip header line)
462 echo "$result" >> "$tsv_file"
463
464 # Categorize if requested
465 if [[ $do_categorize -eq 1 ]]; then
466 # Parse result
467 IFS=$'\t' read -r repo npub state_refs git_refs matches reason <<< "$result"
468
469 local category
470 category=$(categorize_entry "$state_refs" "$git_refs" "$matches" "$reason")
471
472 local cat_line
473 cat_line=$(format_category_line "$repo" "$npub" "$state_refs" "$git_refs" "$matches" "$reason")
474
475 case "$category" in
476 1) echo "$cat_line" >> "$cat1"; count_cat1=$((count_cat1 + 1)) ;;
477 2) echo "$cat_line" >> "$cat2"; count_cat2=$((count_cat2 + 1)) ;;
478 3) echo "$cat_line" >> "$cat3"; count_cat3=$((count_cat3 + 1)) ;;
479 4) echo "$cat_line" >> "$cat4"; count_cat4=$((count_cat4 + 1)) ;;
480 esac
481 fi
482 else
483 skipped=$((skipped + 1))
484 fi
485
486 # Progress indicator every 10 events
487 if [[ $((count % 10)) -eq 0 ]]; then
488 local elapsed=$(($(date +%s) - start_time))
489 local rate=0
490 if [[ $elapsed -gt 0 ]]; then
491 rate=$((count / elapsed))
492 fi
493 local eta="?"
494 if [[ $rate -gt 0 ]]; then
495 eta=$(( (total_events - count) / rate ))
496 fi
497 log_progress "Processed $count/$total_events events (~${rate}/s, ETA: ${eta}s)..."
498 fi
499 done < "$state_events_file"
500
501 # Clear progress line
502 echo "" >&2
503
504 local end_time
505 end_time=$(date +%s)
506 local duration=$((end_time - start_time))
507
508 # Summary
509 echo ""
510 log_info "=== Analysis Complete ==="
511 log_info "Finished: $(date)"
512 log_info "Duration: ${duration}s"
513 log_info "Processed: $processed events"
514 if [[ $skipped -gt 0 ]]; then
515 log_warn "Skipped: $skipped events (missing identifier or pubkey)"
516 fi
517 echo ""
518
519 if [[ $do_categorize -eq 1 ]]; then
520 # Calculate percentages
521 local total=$((count_cat1 + count_cat2 + count_cat3 + count_cat4))
522 local pct1=0 pct2=0 pct3=0 pct4=0
523 if [[ $total -gt 0 ]]; then
524 pct1=$(awk "BEGIN {printf \"%.1f\", ($count_cat1/$total)*100}")
525 pct2=$(awk "BEGIN {printf \"%.1f\", ($count_cat2/$total)*100}")
526 pct3=$(awk "BEGIN {printf \"%.1f\", ($count_cat3/$total)*100}")
527 pct4=$(awk "BEGIN {printf \"%.1f\", ($count_cat4/$total)*100}")
528 fi
529
530 log_info "=== Category Summary ==="
531 log_success "Category 1 (Complete Match): $count_cat1 ($pct1%)"
532 log_warn "Category 2 (Empty/Blank): $count_cat2 ($pct2%)"
533 log_warn "Category 3 (Partial Match): $count_cat3 ($pct3%)"
534 log_error "Category 4 (No Match): $count_cat4 ($pct4%)"
535 echo ""
536
537 # Validation warning
538 if [[ $count_cat2 -eq $total ]] && [[ $total -gt 0 ]]; then
539 log_error "WARNING: 100% of repos categorized as Empty/Blank"
540 log_error "This usually indicates a permission or path issue."
541 echo ""
542 log_info "Troubleshooting:"
543 echo " 1. Verify git data exists: sudo ls -la $git_base | head -10"
544 echo " 2. Check sample repo: sudo find $git_base -name '*.git' -type d | head -1"
545 echo " 3. Re-run with sudo if not already using it"
546 echo ""
547 fi
548 fi
549
550 log_info "Output files:"
551 echo " $tsv_file"
552 if [[ $do_categorize -eq 1 ]]; then
553 echo " $cat1"
554 echo " $cat2"
555 echo " $cat3"
556 echo " $cat4"
557 else
558 echo ""
559 log_info "Next step: Run 20-categorize.sh to categorize results"
560 echo " ./20-categorize.sh $tsv_file $output_dir"
561 fi
562}
563
564main "$@"