From 92a9a3bfe0bc522e8ae411991a366a3a6310d525 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 3 Feb 2026 14:41:46 +0000 Subject: docs: archive relay.ngit.dev migration materials for reference Move migration guide and scripts to docs/archive/2026-01-relay-ngit-dev-migration/ with clear warnings that these are reference-only materials from a specific migration context, not general-purpose tools. These materials document the relay.ngit.dev migration from ngit-relay to ngit-grasp in January 2026. The scripts were developed iteratively during the migration and are specific to that context. They are preserved for: - Historical reference - Context for production fixes in this branch - Inspiration for future migrations (not direct reuse) The migration uncovered critical bugs now fixed in this branch: - Git protocol error handling - Naughty list false positives - Purgatory event tracking - Sync startup issues - Configuration management --- .../migration-scripts/22-compare-git-data.sh | 390 --------------------- 1 file changed, 390 deletions(-) delete mode 100755 docs/how-to/migration-scripts/22-compare-git-data.sh (limited to 'docs/how-to/migration-scripts/22-compare-git-data.sh') diff --git a/docs/how-to/migration-scripts/22-compare-git-data.sh b/docs/how-to/migration-scripts/22-compare-git-data.sh deleted file mode 100755 index 76521d4..0000000 --- a/docs/how-to/migration-scripts/22-compare-git-data.sh +++ /dev/null @@ -1,390 +0,0 @@ -#!/usr/bin/env bash -# -# 22-compare-git-data.sh - Compare actual git data between prod and archive relays -# -# PHASE 3c of the GRASP relay to ngit-grasp migration analysis pipeline. -# Compares actual git commits between prod and archive to determine which is ahead. -# -# KEY INSIGHT: -# Archive (ngit-grasp) enforces GRASP - git data ALWAYS matches a state event. -# If archive has different/newer data than prod, it means: -# - A state event authorized those commits at some point -# - Archive is actually MORE up-to-date than prod -# - Migration should use archive data (it's already correct) -# -# USAGE: -# ./22-compare-git-data.sh -# -# EXAMPLES: -# ./22-compare-git-data.sh /var/lib/grasp-relay/git /var/lib/ngit-grasp/git \ -# output/comparison/complete-prod-incomplete-archive.txt output/comparison -# -# INPUT: -# prod-git-base Base directory for prod git repos (e.g., /var/lib/grasp-relay/git) -# archive-git-base Base directory for archive git repos (e.g., /var/lib/ngit-grasp/git) -# repo-list File with repos to compare (format: "repo | npub | ...") -# -# OUTPUT: -# /git-ancestry.tsv - Tab-separated values: -# reponpubrelationshipdetails -# -# Relationship values: -# archive-ahead - Archive has all prod commits plus more (GOOD - use archive) -# in-sync - Both have identical commits -# prod-ahead - Prod has commits archive is missing (needs re-sync) -# diverged - Both have unique commits (manual review) -# archive-only - Only archive has git data -# prod-only - Only prod has git data -# both-empty - Neither has git data -# -# PREREQUISITES: -# - git (for ref comparison) -# - Read access to both git directories (may need sudo) -# -# RUNTIME: Depends on number of repos to compare -# -# SEE ALSO: -# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide -# 21-compare-relays.sh - Phase 3b script that identifies repos to compare -# - -set -euo pipefail - -# Colors for output (disabled if not a terminal) -if [[ -t 1 ]]; then - RED='\033[0;31m' - GREEN='\033[0;32m' - YELLOW='\033[0;33m' - BLUE='\033[0;34m' - NC='\033[0m' -else - RED='' - GREEN='' - YELLOW='' - BLUE='' - NC='' -fi - -log_info() { - echo -e "${BLUE}[INFO]${NC} $*" >&2 -} - -log_success() { - echo -e "${GREEN}[OK]${NC} $*" >&2 -} - -log_warn() { - echo -e "${YELLOW}[WARN]${NC} $*" >&2 -} - -log_error() { - echo -e "${RED}[ERROR]${NC} $*" >&2 -} - -log_progress() { - echo -ne "\r${BLUE}[PROGRESS]${NC} $*" >&2 -} - -usage() { - echo "Usage: $0 " - echo "" - echo "Arguments:" - echo " prod-git-base Base directory for prod git repos" - echo " archive-git-base Base directory for archive git repos" - echo " repo-list File with repos to compare (format: 'repo | npub | ...')" - echo " output-dir Directory to store output files" - echo "" - echo "Examples:" - echo " $0 /var/lib/grasp-relay/git /var/lib/ngit-grasp/git \\" - echo " output/comparison/complete-prod-incomplete-archive.txt output/comparison" - echo "" - echo "Output:" - echo " git-ancestry.tsv - TSV with: repo, npub, relationship, details" - exit 1 -} - -# Get all branch refs from a git directory -# Args: $1=git_dir -# Returns: sorted list of "ref_name commit_hash" lines -get_git_refs() { - local git_dir="$1" - - if [[ ! -d "$git_dir" ]]; then - return - fi - - git --git-dir="$git_dir" show-ref --heads 2>/dev/null | sort || true -} - -# Check if commit A is ancestor of commit B -# Args: $1=git_dir, $2=commit_a, $3=commit_b -# Returns: 0 if A is ancestor of B, 1 otherwise -is_ancestor() { - local git_dir="$1" - local commit_a="$2" - local commit_b="$3" - - git --git-dir="$git_dir" merge-base --is-ancestor "$commit_a" "$commit_b" 2>/dev/null -} - -# Compare git data between prod and archive for a single repo -# Args: $1=prod_git_dir, $2=archive_git_dir -# Returns: relationship string -compare_repo_git() { - local prod_git="$1" - local archive_git="$2" - - local prod_exists=false - local archive_exists=false - - [[ -d "$prod_git" ]] && prod_exists=true - [[ -d "$archive_git" ]] && archive_exists=true - - # Handle cases where one or both don't exist - if [[ "$prod_exists" == "false" && "$archive_exists" == "false" ]]; then - echo "both-empty" - return - fi - - if [[ "$prod_exists" == "false" ]]; then - echo "archive-only" - return - fi - - if [[ "$archive_exists" == "false" ]]; then - echo "prod-only" - return - fi - - # Both exist - get refs - local prod_refs archive_refs - prod_refs=$(get_git_refs "$prod_git") - archive_refs=$(get_git_refs "$archive_git") - - # Handle empty refs - if [[ -z "$prod_refs" && -z "$archive_refs" ]]; then - echo "both-empty" - return - fi - - if [[ -z "$prod_refs" ]]; then - echo "archive-only" - return - fi - - if [[ -z "$archive_refs" ]]; then - echo "prod-only" - return - fi - - # Compare refs - check if they're identical - if [[ "$prod_refs" == "$archive_refs" ]]; then - echo "in-sync" - return - fi - - # Refs differ - need to check ancestry - # Strategy: For each branch, check if one is ancestor of the other - # If all archive branches are ahead of or equal to prod branches, archive is ahead - # If all prod branches are ahead of or equal to archive branches, prod is ahead - # Otherwise, they've diverged - - local archive_ahead=true - local prod_ahead=true - local has_common_branch=false - - # Create temporary file to use archive as reference repo for ancestry checks - # We need a repo that has both sets of commits to check ancestry - # Use archive since it's the target and should have the superset - - # Check each prod branch against archive - while read -r prod_hash prod_ref; do - [[ -z "$prod_hash" ]] && continue - - # Get the same branch from archive - local archive_hash - archive_hash=$(echo "$archive_refs" | grep " $prod_ref$" | awk '{print $1}' || echo "") - - if [[ -z "$archive_hash" ]]; then - # Branch exists in prod but not archive - prod has something archive doesn't - # But this could be a deleted branch, so don't immediately say prod is ahead - continue - fi - - has_common_branch=true - - if [[ "$prod_hash" == "$archive_hash" ]]; then - # Same commit - neither ahead for this branch - continue - fi - - # Different commits - check ancestry - # First, try to check if prod is ancestor of archive (archive ahead) - if is_ancestor "$archive_git" "$prod_hash" "$archive_hash" 2>/dev/null; then - # Prod commit is ancestor of archive commit - archive is ahead for this branch - prod_ahead=false - elif is_ancestor "$archive_git" "$archive_hash" "$prod_hash" 2>/dev/null; then - # Archive commit is ancestor of prod commit - prod is ahead for this branch - archive_ahead=false - else - # Neither is ancestor - diverged - archive_ahead=false - prod_ahead=false - fi - done <<< "$prod_refs" - - # Also check for branches only in archive (archive has extra branches) - while read -r archive_hash archive_ref; do - [[ -z "$archive_hash" ]] && continue - - local prod_hash - prod_hash=$(echo "$prod_refs" | grep " $archive_ref$" | awk '{print $1}' || echo "") - - if [[ -z "$prod_hash" ]]; then - # Branch exists in archive but not prod - archive has something prod doesn't - # This means archive is ahead (has extra branches) - prod_ahead=false - fi - done <<< "$archive_refs" - - # Determine final relationship - if [[ "$has_common_branch" == "false" ]]; then - # No common branches - completely different - echo "diverged" - return - fi - - if [[ "$archive_ahead" == "true" && "$prod_ahead" == "false" ]]; then - echo "archive-ahead" - elif [[ "$prod_ahead" == "true" && "$archive_ahead" == "false" ]]; then - echo "prod-ahead" - elif [[ "$archive_ahead" == "true" && "$prod_ahead" == "true" ]]; then - # Both true means all common branches are identical - # But one might have extra branches - echo "in-sync" - else - echo "diverged" - fi -} - -# Main -main() { - if [[ $# -ne 4 ]]; then - usage - fi - - local prod_git_base="$1" - local archive_git_base="$2" - local repo_list="$3" - local output_dir="$4" - - # Validate inputs - if [[ ! -d "$prod_git_base" ]]; then - log_error "Prod git base directory not found: $prod_git_base" - exit 1 - fi - - if [[ ! -d "$archive_git_base" ]]; then - log_error "Archive git base directory not found: $archive_git_base" - exit 1 - fi - - if [[ ! -f "$repo_list" ]]; then - log_error "Repo list file not found: $repo_list" - exit 1 - fi - - log_info "=== Git Data Comparison ===" - log_info "Prod git base: $prod_git_base" - log_info "Archive git base: $archive_git_base" - log_info "Repo list: $repo_list" - log_info "Output: $output_dir" - log_info "Started: $(date)" - echo "" - - # Create output directory - mkdir -p "$output_dir" - - # Output file - local tsv_file="$output_dir/git-ancestry.tsv" - - # Initialize TSV with header - echo -e "repo\tnpub\trelationship\tdetails" > "$tsv_file" - - # Count repos - local total_repos - total_repos=$(grep -c -v '^#' "$repo_list" 2>/dev/null || echo "0") - log_info "Processing $total_repos repos..." - echo "" - - # Counters - local count=0 - local count_archive_ahead=0 - local count_in_sync=0 - local count_prod_ahead=0 - local count_diverged=0 - local count_archive_only=0 - local count_prod_only=0 - local count_both_empty=0 - - # Process each repo - while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do - # Skip comments and empty lines - [[ "$repo" =~ ^# ]] && continue - [[ -z "$repo" ]] && continue - - # Clean up whitespace - repo="${repo// /}" - npub="${npub// /}" - - [[ -z "$repo" || -z "$npub" ]] && continue - - count=$((count + 1)) - - # Build git paths - local prod_git="$prod_git_base/${npub}/${repo}.git" - local archive_git="$archive_git_base/${npub}/${repo}.git" - - # Compare - local relationship details="" - relationship=$(compare_repo_git "$prod_git" "$archive_git") - - # Count by relationship - case "$relationship" in - archive-ahead) count_archive_ahead=$((count_archive_ahead + 1)) ;; - in-sync) count_in_sync=$((count_in_sync + 1)) ;; - prod-ahead) count_prod_ahead=$((count_prod_ahead + 1)) ;; - diverged) count_diverged=$((count_diverged + 1)) ;; - archive-only) count_archive_only=$((count_archive_only + 1)) ;; - prod-only) count_prod_only=$((count_prod_only + 1)) ;; - both-empty) count_both_empty=$((count_both_empty + 1)) ;; - esac - - # Output TSV line - printf '%s\t%s\t%s\t%s\n' "$repo" "$npub" "$relationship" "$details" >> "$tsv_file" - - # Progress indicator every 10 repos - if [[ $((count % 10)) -eq 0 ]]; then - log_progress "Processed $count/$total_repos repos..." - fi - done < "$repo_list" - - # Clear progress line - echo "" >&2 - - # Summary - echo "" - log_info "=== Comparison Summary ===" - log_success "Archive ahead (use archive data): $count_archive_ahead" - log_success "In sync: $count_in_sync" - log_warn "Prod ahead (needs re-sync): $count_prod_ahead" - log_error "Diverged (manual review): $count_diverged" - log_info "Archive only: $count_archive_only" - log_info "Prod only: $count_prod_only" - log_info "Both empty: $count_both_empty" - echo "" - log_info "Total: $count repos" - log_info "Output: $tsv_file" -} - -main "$@" -- cgit v1.2.3