diff options
Diffstat (limited to 'docs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh')
| -rwxr-xr-x | docs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh | 294 |
1 files changed, 294 insertions, 0 deletions
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh new file mode 100755 index 0000000..b9c0d30 --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh | |||
| @@ -0,0 +1,294 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | # | ||
| 3 | # 21-compare-relays.sh - Compare prod vs archive category files to find gaps | ||
| 4 | # | ||
| 5 | # PHASE 3b of the GRASP relay to ngit-grasp migration analysis pipeline. | ||
| 6 | # Compares categorized output from prod and archive to identify: | ||
| 7 | # - Repos complete in prod but missing/incomplete in archive | ||
| 8 | # - Repos in archive but not in prod | ||
| 9 | # - Status differences between relays | ||
| 10 | # | ||
| 11 | # USAGE: | ||
| 12 | # ./21-compare-relays.sh <prod-dir> <archive-dir> <output-dir> | ||
| 13 | # | ||
| 14 | # EXAMPLES: | ||
| 15 | # ./21-compare-relays.sh output/prod output/archive output/comparison | ||
| 16 | # | ||
| 17 | # INPUT: | ||
| 18 | # Both prod-dir and archive-dir must contain: | ||
| 19 | # - category1-complete-match.txt | ||
| 20 | # - category2-empty-blank.txt | ||
| 21 | # - category3-partial-match.txt | ||
| 22 | # - category4-no-match.txt | ||
| 23 | # | ||
| 24 | # OUTPUT: | ||
| 25 | # <output-dir>/complete-in-both.txt - Repos complete in both relays (no action) | ||
| 26 | # <output-dir>/complete-prod-missing-archive.txt - Complete in prod, not in archive cat1 | ||
| 27 | # <output-dir>/complete-prod-incomplete-archive.txt - Complete in prod, incomplete in archive | ||
| 28 | # <output-dir>/incomplete-in-both.txt - Incomplete in both relays | ||
| 29 | # <output-dir>/in-archive-not-prod.txt - In archive but not in prod | ||
| 30 | # <output-dir>/summary.txt - Human-readable summary | ||
| 31 | # | ||
| 32 | # OUTPUT FORMAT: | ||
| 33 | # Each file contains lines in the format: | ||
| 34 | # repo | npub | prod_status | archive_status | ||
| 35 | # | ||
| 36 | # PREREQUISITES: | ||
| 37 | # - awk, sort, comm (standard Unix tools) | ||
| 38 | # | ||
| 39 | # RUNTIME: < 1 second (local processing only) | ||
| 40 | # | ||
| 41 | # SEE ALSO: | ||
| 42 | # docs/how-to/migrate-to-ngit-grasp.md - Full migration guide | ||
| 43 | # 20-categorize.sh - Phase 3a script that produces input for this script | ||
| 44 | # | ||
| 45 | |||
| 46 | set -euo pipefail | ||
| 47 | |||
| 48 | # Colors for output (disabled if not a terminal) | ||
| 49 | if [[ -t 1 ]]; then | ||
| 50 | RED='\033[0;31m' | ||
| 51 | GREEN='\033[0;32m' | ||
| 52 | YELLOW='\033[0;33m' | ||
| 53 | BLUE='\033[0;34m' | ||
| 54 | NC='\033[0m' | ||
| 55 | else | ||
| 56 | RED='' | ||
| 57 | GREEN='' | ||
| 58 | YELLOW='' | ||
| 59 | BLUE='' | ||
| 60 | NC='' | ||
| 61 | fi | ||
| 62 | |||
| 63 | log_info() { | ||
| 64 | echo -e "${BLUE}[INFO]${NC} $*" >&2 | ||
| 65 | } | ||
| 66 | |||
| 67 | log_success() { | ||
| 68 | echo -e "${GREEN}[OK]${NC} $*" >&2 | ||
| 69 | } | ||
| 70 | |||
| 71 | log_warn() { | ||
| 72 | echo -e "${YELLOW}[WARN]${NC} $*" >&2 | ||
| 73 | } | ||
| 74 | |||
| 75 | log_error() { | ||
| 76 | echo -e "${RED}[ERROR]${NC} $*" >&2 | ||
| 77 | } | ||
| 78 | |||
| 79 | usage() { | ||
| 80 | echo "Usage: $0 <prod-dir> <archive-dir> <output-dir>" | ||
| 81 | echo "" | ||
| 82 | echo "Arguments:" | ||
| 83 | echo " prod-dir Directory containing prod category files" | ||
| 84 | echo " archive-dir Directory containing archive category files" | ||
| 85 | echo " output-dir Directory to store comparison results" | ||
| 86 | echo "" | ||
| 87 | echo "Examples:" | ||
| 88 | echo " $0 output/prod output/archive output/comparison" | ||
| 89 | echo "" | ||
| 90 | echo "Required input files in each directory:" | ||
| 91 | echo " category1-complete-match.txt" | ||
| 92 | echo " category2-empty-blank.txt" | ||
| 93 | echo " category3-partial-match.txt" | ||
| 94 | echo " category4-no-match.txt" | ||
| 95 | exit 1 | ||
| 96 | } | ||
| 97 | |||
| 98 | # Extract repo|npub key from category line | ||
| 99 | # Input: "repo | npub | state_refs=N | ..." | ||
| 100 | # Output: "repo|npub" | ||
| 101 | extract_key() { | ||
| 102 | awk -F' \\| ' '{print $1 "|" $2}' | ||
| 103 | } | ||
| 104 | |||
| 105 | # Build lookup table from category files | ||
| 106 | # Args: $1=directory, $2=output_file | ||
| 107 | build_lookup() { | ||
| 108 | local dir="$1" | ||
| 109 | local output="$2" | ||
| 110 | |||
| 111 | # Process all 4 category files | ||
| 112 | for cat in 1 2 3 4; do | ||
| 113 | local file="$dir/category${cat}-*.txt" | ||
| 114 | # shellcheck disable=SC2086 | ||
| 115 | if ls $file 1>/dev/null 2>&1; then | ||
| 116 | # shellcheck disable=SC2086 | ||
| 117 | cat $file | while IFS= read -r line; do | ||
| 118 | key=$(echo "$line" | extract_key) | ||
| 119 | echo "${key}|cat${cat}|${line}" | ||
| 120 | done | ||
| 121 | fi | ||
| 122 | done | sort -t'|' -k1,2 > "$output" | ||
| 123 | } | ||
| 124 | |||
| 125 | # Main | ||
| 126 | main() { | ||
| 127 | if [[ $# -ne 3 ]]; then | ||
| 128 | usage | ||
| 129 | fi | ||
| 130 | |||
| 131 | local prod_dir="$1" | ||
| 132 | local archive_dir="$2" | ||
| 133 | local output_dir="$3" | ||
| 134 | |||
| 135 | # Validate input directories | ||
| 136 | for dir in "$prod_dir" "$archive_dir"; do | ||
| 137 | if [[ ! -d "$dir" ]]; then | ||
| 138 | log_error "Directory not found: $dir" | ||
| 139 | exit 1 | ||
| 140 | fi | ||
| 141 | if [[ ! -f "$dir/category1-complete-match.txt" ]]; then | ||
| 142 | log_error "Missing category1-complete-match.txt in $dir" | ||
| 143 | exit 1 | ||
| 144 | fi | ||
| 145 | done | ||
| 146 | |||
| 147 | log_info "Comparing relay categories" | ||
| 148 | log_info "Prod: $prod_dir" | ||
| 149 | log_info "Archive: $archive_dir" | ||
| 150 | log_info "Output: $output_dir" | ||
| 151 | |||
| 152 | # Create output directory | ||
| 153 | mkdir -p "$output_dir" | ||
| 154 | |||
| 155 | # Create temp files for processing | ||
| 156 | local tmp_dir | ||
| 157 | tmp_dir=$(mktemp -d) | ||
| 158 | # shellcheck disable=SC2064 | ||
| 159 | trap "rm -rf '$tmp_dir'" EXIT | ||
| 160 | |||
| 161 | log_info "Building lookup tables..." | ||
| 162 | |||
| 163 | # Build lookup tables: key|category|full_line | ||
| 164 | build_lookup "$prod_dir" "$tmp_dir/prod_lookup.txt" | ||
| 165 | build_lookup "$archive_dir" "$tmp_dir/archive_lookup.txt" | ||
| 166 | |||
| 167 | # Extract just keys for comparison | ||
| 168 | cut -d'|' -f1,2 "$tmp_dir/prod_lookup.txt" | sort -u > "$tmp_dir/prod_keys.txt" | ||
| 169 | cut -d'|' -f1,2 "$tmp_dir/archive_lookup.txt" | sort -u > "$tmp_dir/archive_keys.txt" | ||
| 170 | |||
| 171 | log_info "Comparing categories..." | ||
| 172 | |||
| 173 | # Initialize output files | ||
| 174 | > "$output_dir/complete-in-both.txt" | ||
| 175 | > "$output_dir/complete-prod-missing-archive.txt" | ||
| 176 | > "$output_dir/complete-prod-incomplete-archive.txt" | ||
| 177 | > "$output_dir/incomplete-in-both.txt" | ||
| 178 | > "$output_dir/in-archive-not-prod.txt" | ||
| 179 | |||
| 180 | # Process prod category 1 (complete) entries | ||
| 181 | while IFS='|' read -r repo npub cat full_line; do | ||
| 182 | key="${repo}|${npub}" | ||
| 183 | |||
| 184 | # Look up in archive | ||
| 185 | archive_entry=$(grep "^${key}|" "$tmp_dir/archive_lookup.txt" 2>/dev/null | head -1 || echo "") | ||
| 186 | |||
| 187 | if [[ -z "$archive_entry" ]]; then | ||
| 188 | # Not in archive at all | ||
| 189 | echo "$repo | $npub | prod=complete | archive=missing" >> "$output_dir/complete-prod-missing-archive.txt" | ||
| 190 | else | ||
| 191 | archive_cat=$(echo "$archive_entry" | cut -d'|' -f3) | ||
| 192 | if [[ "$archive_cat" == "cat1" ]]; then | ||
| 193 | # Complete in both | ||
| 194 | echo "$repo | $npub | prod=complete | archive=complete" >> "$output_dir/complete-in-both.txt" | ||
| 195 | else | ||
| 196 | # Complete in prod, incomplete in archive | ||
| 197 | echo "$repo | $npub | prod=complete | archive=$archive_cat" >> "$output_dir/complete-prod-incomplete-archive.txt" | ||
| 198 | fi | ||
| 199 | fi | ||
| 200 | done < <(grep '|cat1|' "$tmp_dir/prod_lookup.txt" | sed 's/|cat1|/|cat1|/') | ||
| 201 | |||
| 202 | # Process prod categories 2-4 (incomplete) entries | ||
| 203 | for cat in cat2 cat3 cat4; do | ||
| 204 | while IFS='|' read -r repo npub _ full_line; do | ||
| 205 | key="${repo}|${npub}" | ||
| 206 | |||
| 207 | # Look up in archive | ||
| 208 | archive_entry=$(grep "^${key}|" "$tmp_dir/archive_lookup.txt" 2>/dev/null | head -1 || echo "") | ||
| 209 | |||
| 210 | if [[ -z "$archive_entry" ]]; then | ||
| 211 | # Incomplete in prod, missing in archive | ||
| 212 | echo "$repo | $npub | prod=$cat | archive=missing" >> "$output_dir/incomplete-in-both.txt" | ||
| 213 | else | ||
| 214 | archive_cat=$(echo "$archive_entry" | cut -d'|' -f3) | ||
| 215 | if [[ "$archive_cat" != "cat1" ]]; then | ||
| 216 | # Incomplete in both | ||
| 217 | echo "$repo | $npub | prod=$cat | archive=$archive_cat" >> "$output_dir/incomplete-in-both.txt" | ||
| 218 | fi | ||
| 219 | # If archive is complete but prod is not, that's unusual but not an error | ||
| 220 | fi | ||
| 221 | done < <(grep "|${cat}|" "$tmp_dir/prod_lookup.txt") | ||
| 222 | done | ||
| 223 | |||
| 224 | # Find entries in archive but not in prod | ||
| 225 | comm -23 "$tmp_dir/archive_keys.txt" "$tmp_dir/prod_keys.txt" | while IFS='|' read -r repo npub; do | ||
| 226 | key="${repo}|${npub}" | ||
| 227 | archive_entry=$(grep "^${key}|" "$tmp_dir/archive_lookup.txt" 2>/dev/null | head -1 || echo "") | ||
| 228 | archive_cat=$(echo "$archive_entry" | cut -d'|' -f3) | ||
| 229 | echo "$repo | $npub | prod=missing | archive=$archive_cat" >> "$output_dir/in-archive-not-prod.txt" | ||
| 230 | done | ||
| 231 | |||
| 232 | # Count results | ||
| 233 | local count_both count_missing count_incomplete count_both_incomplete count_archive_only | ||
| 234 | count_both=$(wc -l < "$output_dir/complete-in-both.txt" | tr -d ' ') | ||
| 235 | count_missing=$(wc -l < "$output_dir/complete-prod-missing-archive.txt" | tr -d ' ') | ||
| 236 | count_incomplete=$(wc -l < "$output_dir/complete-prod-incomplete-archive.txt" | tr -d ' ') | ||
| 237 | count_both_incomplete=$(wc -l < "$output_dir/incomplete-in-both.txt" | tr -d ' ') | ||
| 238 | count_archive_only=$(wc -l < "$output_dir/in-archive-not-prod.txt" | tr -d ' ') | ||
| 239 | |||
| 240 | # Generate summary | ||
| 241 | cat > "$output_dir/summary.txt" << EOF | ||
| 242 | # Relay Comparison Summary | ||
| 243 | Generated: $(date -Iseconds) | ||
| 244 | |||
| 245 | ## Input | ||
| 246 | - Prod: $prod_dir | ||
| 247 | - Archive: $archive_dir | ||
| 248 | |||
| 249 | ## Results | ||
| 250 | |||
| 251 | ### No Action Required | ||
| 252 | - Complete in both relays: $count_both | ||
| 253 | |||
| 254 | ### Action/Decision Required | ||
| 255 | - Complete in prod, MISSING from archive: $count_missing | ||
| 256 | - Complete in prod, INCOMPLETE in archive: $count_incomplete | ||
| 257 | - Incomplete in BOTH relays: $count_both_incomplete | ||
| 258 | |||
| 259 | ### For Reference | ||
| 260 | - In archive but not in prod: $count_archive_only | ||
| 261 | |||
| 262 | ## Files | ||
| 263 | - complete-in-both.txt: Repos successfully migrated (no action) | ||
| 264 | - complete-prod-missing-archive.txt: Need investigation - why not in archive? | ||
| 265 | - complete-prod-incomplete-archive.txt: Archive sync may still be in progress | ||
| 266 | - incomplete-in-both.txt: Git data incomplete on both relays | ||
| 267 | - in-archive-not-prod.txt: May be deleted from prod or new to archive | ||
| 268 | |||
| 269 | ## Next Steps | ||
| 270 | 1. Review complete-prod-missing-archive.txt - these repos need attention | ||
| 271 | 2. Check if archive sync is still running for incomplete entries | ||
| 272 | 3. Cross-reference with deletion events (kind 5) from Phase 1 | ||
| 273 | 4. Use Phase 4 logs to understand parse failures and purgatory expiry | ||
| 274 | EOF | ||
| 275 | |||
| 276 | # Display summary | ||
| 277 | echo "" | ||
| 278 | log_info "=== Comparison Summary ===" | ||
| 279 | log_success "Complete in both: $count_both (no action needed)" | ||
| 280 | log_error "Complete in prod, MISSING from archive: $count_missing" | ||
| 281 | log_warn "Complete in prod, incomplete in archive: $count_incomplete" | ||
| 282 | log_warn "Incomplete in both: $count_both_incomplete" | ||
| 283 | log_info "In archive only: $count_archive_only" | ||
| 284 | echo "" | ||
| 285 | log_info "Output files:" | ||
| 286 | echo " $output_dir/complete-in-both.txt" | ||
| 287 | echo " $output_dir/complete-prod-missing-archive.txt" | ||
| 288 | echo " $output_dir/complete-prod-incomplete-archive.txt" | ||
| 289 | echo " $output_dir/incomplete-in-both.txt" | ||
| 290 | echo " $output_dir/in-archive-not-prod.txt" | ||
| 291 | echo " $output_dir/summary.txt" | ||
| 292 | } | ||
| 293 | |||
| 294 | main "$@" | ||