From 92a9a3bfe0bc522e8ae411991a366a3a6310d525 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 3 Feb 2026 14:41:46 +0000 Subject: docs: archive relay.ngit.dev migration materials for reference Move migration guide and scripts to docs/archive/2026-01-relay-ngit-dev-migration/ with clear warnings that these are reference-only materials from a specific migration context, not general-purpose tools. These materials document the relay.ngit.dev migration from ngit-relay to ngit-grasp in January 2026. The scripts were developed iteratively during the migration and are specific to that context. They are preserved for: - Historical reference - Context for production fixes in this branch - Inspiration for future migrations (not direct reuse) The migration uncovered critical bugs now fixed in this branch: - Git protocol error handling - Naughty list false positives - Purgatory event tracking - Sync startup issues - Configuration management --- docs/how-to/migration-scripts/10-check-git-sync.sh | 564 --------------------- 1 file changed, 564 deletions(-) delete mode 100755 docs/how-to/migration-scripts/10-check-git-sync.sh (limited to 'docs/how-to/migration-scripts/10-check-git-sync.sh') diff --git a/docs/how-to/migration-scripts/10-check-git-sync.sh b/docs/how-to/migration-scripts/10-check-git-sync.sh deleted file mode 100755 index b4536cb..0000000 --- a/docs/how-to/migration-scripts/10-check-git-sync.sh +++ /dev/null @@ -1,564 +0,0 @@ -#!/usr/bin/env bash -# -# 10-check-git-sync.sh - Compare state events to actual git data on disk -# -# PHASE 2 of the GRASP relay to ngit-grasp migration analysis pipeline. -# Compares kind 30618 state events against actual git refs on disk. -# -# USAGE: -# ./10-check-git-sync.sh [--categorize] -# -# EXAMPLES: -# # Check source relay against source git data -# ./10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod -# -# # Check target relay against target git data -# ./10-check-git-sync.sh output/archive/raw/state-events.json /var/lib/ngit-grasp/git output/archive -# -# # Check and categorize in one step (convenience mode) -# ./10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod --categorize -# -# INPUT: -# state-events.json - JSONL file from Phase 1 (01-fetch-events.sh) -# One kind 30618 event per line -# git-base-dir - Base directory containing git repos -# Structure: //.git/ -# -# OUTPUT: -# /git-sync-status.tsv - Tab-separated values: -# reponpubstate_refsgit_refsmatchesreason -# -# With --categorize flag, also outputs: -# /category1-complete-match.txt -# /category2-empty-blank.txt -# /category3-partial-match.txt -# /category4-no-match.txt -# -# CATEGORIES: -# 1. Complete Match - All refs in state event match git data perfectly -# 2. Empty/Blank - No git data available (directory missing or empty) -# 3. Partial Match - Some refs match, some don't -# 4. No Match - Git data exists but commit hashes don't match -# -# PREREQUISITES: -# - nak (for npub encoding) - https://github.com/fiatjaf/nak -# - jq (for JSON parsing) -# - Read access to git directories (may need sudo) -# -# RUNTIME: ~20 minutes on VPS (git operations are slow) -# -# NOTES: -# - Must run on VPS with access to git directories -# - Progress indicator updates every 10 events -# - Handles packed refs (git show-ref) and loose refs -# -# SEE ALSO: -# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide -# 01-fetch-events.sh - Phase 1 script that produces input for this script -# 20-categorize.sh - Phase 3a script that consumes output from this script -# - -set -euo pipefail - -# Colors for output (disabled if not a terminal) -if [[ -t 1 ]]; then - RED='\033[0;31m' - GREEN='\033[0;32m' - YELLOW='\033[0;33m' - BLUE='\033[0;34m' - NC='\033[0m' -else - RED='' - GREEN='' - YELLOW='' - BLUE='' - NC='' -fi - -log_info() { - echo -e "${BLUE}[INFO]${NC} $*" >&2 -} - -log_success() { - echo -e "${GREEN}[OK]${NC} $*" >&2 -} - -log_warn() { - echo -e "${YELLOW}[WARN]${NC} $*" >&2 -} - -log_error() { - echo -e "${RED}[ERROR]${NC} $*" >&2 -} - -log_progress() { - # Overwrite current line for progress updates - echo -ne "\r${BLUE}[PROGRESS]${NC} $*" >&2 -} - -usage() { - echo "Usage: $0 [--categorize]" - echo "" - echo "Arguments:" - echo " state-events.json JSONL file from Phase 1 (kind 30618 events)" - echo " git-base-dir Base directory for git repos (e.g., /var/lib/grasp-relay/git)" - echo " output-dir Directory to store output files" - echo " --categorize Optional: also output category files (like Phase 3)" - echo "" - echo "Examples:" - echo " $0 output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod" - echo " $0 output/archive/raw/state-events.json /var/lib/ngit-grasp/git output/archive" - echo "" - echo "Output:" - echo " git-sync-status.tsv - TSV with: repo, npub, state_refs, git_refs, matches, reason" - exit 1 -} - -# Check prerequisites -check_prerequisites() { - local missing=0 - - if ! command -v git &> /dev/null; then - log_error "git not found. Install with your package manager." - missing=1 - fi - - if ! command -v nak &> /dev/null; then - log_error "nak not found. Install from: https://github.com/fiatjaf/nak" - log_error "Or run: nix-shell -p nak jq --run \"$0 $*\"" - missing=1 - fi - - if ! command -v jq &> /dev/null; then - log_error "jq not found. Install with your package manager." - missing=1 - fi - - if [[ $missing -eq 1 ]]; then - exit 1 - fi -} - -# Convert hex pubkey to npub -# Args: $1=hex_pubkey -# Returns: npub string or empty on error -hex_to_npub() { - local hex="$1" - nak encode npub "$hex" 2>/dev/null || echo "" -} - -# Count refs in state event (only refs/heads/) -# Args: $1=event_json -# Returns: count -count_state_refs() { - local event="$1" - echo "$event" | jq '[.tags[] | select(.[0] | startswith("refs/heads/"))] | length' 2>/dev/null || echo "0" -} - -# Get git refs from disk -# Args: $1=git_dir -# Returns: count of refs/heads/ refs -count_git_refs() { - local git_dir="$1" - - if [[ ! -d "$git_dir" ]]; then - echo "0" - return - fi - - # Try git show-ref first (handles packed refs correctly) - # Note: We capture output separately to avoid pipefail issues - local count - if count=$(git --git-dir="$git_dir" show-ref --heads 2>/dev/null | wc -l); then - echo "$count" | tr -d ' ' - return - fi - - # Fallback: count loose refs (when git is not available or fails) - if [[ -d "$git_dir/refs/heads" ]]; then - find "$git_dir/refs/heads" -type f 2>/dev/null | wc -l | tr -d ' ' - else - echo "0" - fi -} - -# Get ref hash from git directory -# Args: $1=git_dir, $2=ref_path (e.g., refs/heads/main) -# Returns: commit hash or empty -get_git_ref_hash() { - local git_dir="$1" - local ref_path="$2" - - # Try git show-ref first (handles packed refs) - local hash - hash=$(git --git-dir="$git_dir" show-ref --hash "$ref_path" 2>/dev/null | head -1 || echo "") - - if [[ -n "$hash" ]]; then - echo "$hash" - return - fi - - # Fallback: read loose ref file - local ref_file="$git_dir/$ref_path" - if [[ -f "$ref_file" ]]; then - cat "$ref_file" 2>/dev/null | tr -d '\n' || echo "" - else - echo "" - fi -} - -# Compare state event refs to git refs -# Args: $1=event_json, $2=git_dir -# Returns: count of matching refs -count_matching_refs() { - local event="$1" - local git_dir="$2" - local matching=0 - - # Extract refs/heads/ tags and compare - while IFS= read -r ref_tag; do - [[ -z "$ref_tag" ]] && continue - - local ref_path expected_hash - ref_path=$(echo "$ref_tag" | jq -r '.[0]' 2>/dev/null || echo "") - expected_hash=$(echo "$ref_tag" | jq -r '.[1]' 2>/dev/null || echo "") - - # Skip if not a heads ref or hash is missing - [[ ! "$ref_path" =~ ^refs/heads/ ]] && continue - [[ -z "$expected_hash" || "$expected_hash" == "null" ]] && continue - - # Get actual hash from git - local actual_hash - actual_hash=$(get_git_ref_hash "$git_dir" "$ref_path") - - if [[ "$expected_hash" == "$actual_hash" ]]; then - matching=$((matching + 1)) - fi - done < <(echo "$event" | jq -c '.tags[] | select(.[0] | startswith("refs/heads/"))' 2>/dev/null) - - echo "$matching" -} - -# Categorize a single entry -# Args: $1=state_refs, $2=git_refs, $3=matches, $4=reason -# Returns: category number (1-4) -categorize_entry() { - local state_refs="$1" - local git_refs="$2" - local matches="$3" - local reason="$4" - - # Category 2: Empty/Blank - if [[ -n "$reason" ]] || [[ "$git_refs" -eq 0 ]]; then - echo "2" - return - fi - - # Category 1: Complete Match - if [[ "$state_refs" -gt 0 ]] && [[ "$state_refs" -eq "$git_refs" ]] && [[ "$matches" -eq "$state_refs" ]]; then - echo "1" - return - fi - - # Category 4: No Match - if [[ "$git_refs" -gt 0 ]] && [[ "$matches" -eq 0 ]]; then - echo "4" - return - fi - - # Category 3: Partial Match (default for anything else with matches > 0) - if [[ "$matches" -gt 0 ]]; then - echo "3" - return - fi - - # Fallback to category 2 - echo "2" -} - -# Format entry for category file -# Args: $1=repo, $2=npub, $3=state_refs, $4=git_refs, $5=matches, $6=reason -format_category_line() { - local repo="$1" - local npub="$2" - local state_refs="$3" - local git_refs="$4" - local matches="$5" - local reason="$6" - - if [[ -n "$reason" ]]; then - echo "$repo | $npub | state_refs=$state_refs | git_refs=$git_refs | matches=$matches | reason=$reason" - else - echo "$repo | $npub | state_refs=$state_refs | git_refs=$git_refs | matches=$matches" - fi -} - -# Process a single state event -# Args: $1=event_json, $2=git_base -# Outputs: TSV line to stdout -process_event() { - local event="$1" - local git_base="$2" - - # Extract repository identifier (d tag) - local identifier - identifier=$(echo "$event" | jq -r '.tags[] | select(.[0] == "d") | .[1]' 2>/dev/null | head -1 || echo "") - - if [[ -z "$identifier" ]]; then - return 1 - fi - - # Extract maintainer pubkey (hex) - local hex_pubkey - hex_pubkey=$(echo "$event" | jq -r '.pubkey' 2>/dev/null || echo "") - - if [[ -z "$hex_pubkey" ]]; then - return 1 - fi - - # Convert to npub - local npub - npub=$(hex_to_npub "$hex_pubkey") - - if [[ -z "$npub" ]]; then - return 1 - fi - - # Count state refs - local state_refs - state_refs=$(count_state_refs "$event") - - # Find git directory - local git_dir="$git_base/${npub}/${identifier}.git" - - # Check git directory status - local git_refs=0 - local matches=0 - local reason="" - - if [[ ! -d "$git_dir" ]]; then - reason="no_git_dir" - elif [[ ! -d "$git_dir/refs/heads" ]] && [[ ! -f "$git_dir/packed-refs" ]]; then - reason="empty_refs" - else - git_refs=$(count_git_refs "$git_dir") - - if [[ "$git_refs" -eq 0 ]]; then - reason="empty_refs" - elif [[ "$state_refs" -eq 0 ]]; then - reason="no_state_refs" - else - matches=$(count_matching_refs "$event" "$git_dir") - fi - fi - - # Output TSV line: repo, npub, state_refs, git_refs, matches, reason - printf '%s\t%s\t%s\t%s\t%s\t%s\n' "$identifier" "$npub" "$state_refs" "$git_refs" "$matches" "$reason" -} - -# Main -main() { - local do_categorize=0 - local args=() - - # Parse arguments - for arg in "$@"; do - if [[ "$arg" == "--categorize" ]]; then - do_categorize=1 - else - args+=("$arg") - fi - done - - if [[ ${#args[@]} -ne 3 ]]; then - usage - fi - - local state_events_file="${args[0]}" - local git_base="${args[1]}" - local output_dir="${args[2]}" - - # Validate inputs - if [[ ! -f "$state_events_file" ]]; then - log_error "State events file not found: $state_events_file" - exit 1 - fi - - if [[ ! -d "$git_base" ]]; then - log_error "Git base directory not found: $git_base" - log_error "This script must run on the VPS with access to git directories." - exit 1 - fi - - # Check read permissions - if ! ls "$git_base" >/dev/null 2>&1; then - log_error "Cannot read git base directory (permission denied): $git_base" - log_error "Try running with sudo or grant read permissions." - exit 1 - fi - - check_prerequisites - - log_info "=== Git State Synchronization Check ===" - log_info "State events: $state_events_file" - log_info "Git base: $git_base" - log_info "Output: $output_dir" - if [[ $do_categorize -eq 1 ]]; then - log_info "Mode: TSV + categorization" - else - log_info "Mode: TSV only (use 20-categorize.sh for categories)" - fi - log_info "Started: $(date)" - echo "" - - # Create output directory - mkdir -p "$output_dir" - - # Output files - local tsv_file="$output_dir/git-sync-status.tsv" - - # Initialize TSV with header - echo -e "repo\tnpub\tstate_refs\tgit_refs\tmatches\treason" > "$tsv_file" - - # Initialize category files if categorizing - local cat1="" cat2="" cat3="" cat4="" - if [[ $do_categorize -eq 1 ]]; then - cat1="$output_dir/category1-complete-match.txt" - cat2="$output_dir/category2-empty-blank.txt" - cat3="$output_dir/category3-partial-match.txt" - cat4="$output_dir/category4-no-match.txt" - > "$cat1" - > "$cat2" - > "$cat3" - > "$cat4" - fi - - # Count total events - local total_events - total_events=$(wc -l < "$state_events_file" | tr -d ' ') - log_info "Processing $total_events state events..." - echo "" - - # Process each event - local count=0 - local processed=0 - local skipped=0 - local count_cat1=0 count_cat2=0 count_cat3=0 count_cat4=0 - local start_time - start_time=$(date +%s) - - while IFS= read -r event; do - count=$((count + 1)) - - # Skip empty lines - [[ -z "$event" ]] && continue - - # Process event - local result - if result=$(process_event "$event" "$git_base"); then - processed=$((processed + 1)) - - # Write to TSV (skip header line) - echo "$result" >> "$tsv_file" - - # Categorize if requested - if [[ $do_categorize -eq 1 ]]; then - # Parse result - IFS=$'\t' read -r repo npub state_refs git_refs matches reason <<< "$result" - - local category - category=$(categorize_entry "$state_refs" "$git_refs" "$matches" "$reason") - - local cat_line - cat_line=$(format_category_line "$repo" "$npub" "$state_refs" "$git_refs" "$matches" "$reason") - - case "$category" in - 1) echo "$cat_line" >> "$cat1"; count_cat1=$((count_cat1 + 1)) ;; - 2) echo "$cat_line" >> "$cat2"; count_cat2=$((count_cat2 + 1)) ;; - 3) echo "$cat_line" >> "$cat3"; count_cat3=$((count_cat3 + 1)) ;; - 4) echo "$cat_line" >> "$cat4"; count_cat4=$((count_cat4 + 1)) ;; - esac - fi - else - skipped=$((skipped + 1)) - fi - - # Progress indicator every 10 events - if [[ $((count % 10)) -eq 0 ]]; then - local elapsed=$(($(date +%s) - start_time)) - local rate=0 - if [[ $elapsed -gt 0 ]]; then - rate=$((count / elapsed)) - fi - local eta="?" - if [[ $rate -gt 0 ]]; then - eta=$(( (total_events - count) / rate )) - fi - log_progress "Processed $count/$total_events events (~${rate}/s, ETA: ${eta}s)..." - fi - done < "$state_events_file" - - # Clear progress line - echo "" >&2 - - local end_time - end_time=$(date +%s) - local duration=$((end_time - start_time)) - - # Summary - echo "" - log_info "=== Analysis Complete ===" - log_info "Finished: $(date)" - log_info "Duration: ${duration}s" - log_info "Processed: $processed events" - if [[ $skipped -gt 0 ]]; then - log_warn "Skipped: $skipped events (missing identifier or pubkey)" - fi - echo "" - - if [[ $do_categorize -eq 1 ]]; then - # Calculate percentages - local total=$((count_cat1 + count_cat2 + count_cat3 + count_cat4)) - local pct1=0 pct2=0 pct3=0 pct4=0 - if [[ $total -gt 0 ]]; then - pct1=$(awk "BEGIN {printf \"%.1f\", ($count_cat1/$total)*100}") - pct2=$(awk "BEGIN {printf \"%.1f\", ($count_cat2/$total)*100}") - pct3=$(awk "BEGIN {printf \"%.1f\", ($count_cat3/$total)*100}") - pct4=$(awk "BEGIN {printf \"%.1f\", ($count_cat4/$total)*100}") - fi - - log_info "=== Category Summary ===" - log_success "Category 1 (Complete Match): $count_cat1 ($pct1%)" - log_warn "Category 2 (Empty/Blank): $count_cat2 ($pct2%)" - log_warn "Category 3 (Partial Match): $count_cat3 ($pct3%)" - log_error "Category 4 (No Match): $count_cat4 ($pct4%)" - echo "" - - # Validation warning - if [[ $count_cat2 -eq $total ]] && [[ $total -gt 0 ]]; then - log_error "WARNING: 100% of repos categorized as Empty/Blank" - log_error "This usually indicates a permission or path issue." - echo "" - log_info "Troubleshooting:" - echo " 1. Verify git data exists: sudo ls -la $git_base | head -10" - echo " 2. Check sample repo: sudo find $git_base -name '*.git' -type d | head -1" - echo " 3. Re-run with sudo if not already using it" - echo "" - fi - fi - - log_info "Output files:" - echo " $tsv_file" - if [[ $do_categorize -eq 1 ]]; then - echo " $cat1" - echo " $cat2" - echo " $cat3" - echo " $cat4" - else - echo "" - log_info "Next step: Run 20-categorize.sh to categorize results" - echo " ./20-categorize.sh $tsv_file $output_dir" - fi -} - -main "$@" -- cgit v1.2.3