upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/docs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh
diff options
context:
space:
mode:
Diffstat (limited to 'docs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh')
-rwxr-xr-xdocs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh294
1 files changed, 294 insertions, 0 deletions
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh
new file mode 100755
index 0000000..b9c0d30
--- /dev/null
+++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/21-compare-relays.sh
@@ -0,0 +1,294 @@
1#!/usr/bin/env bash
2#
3# 21-compare-relays.sh - Compare prod vs archive category files to find gaps
4#
5# PHASE 3b of the GRASP relay to ngit-grasp migration analysis pipeline.
6# Compares categorized output from prod and archive to identify:
7# - Repos complete in prod but missing/incomplete in archive
8# - Repos in archive but not in prod
9# - Status differences between relays
10#
11# USAGE:
12# ./21-compare-relays.sh <prod-dir> <archive-dir> <output-dir>
13#
14# EXAMPLES:
15# ./21-compare-relays.sh output/prod output/archive output/comparison
16#
17# INPUT:
18# Both prod-dir and archive-dir must contain:
19# - category1-complete-match.txt
20# - category2-empty-blank.txt
21# - category3-partial-match.txt
22# - category4-no-match.txt
23#
24# OUTPUT:
25# <output-dir>/complete-in-both.txt - Repos complete in both relays (no action)
26# <output-dir>/complete-prod-missing-archive.txt - Complete in prod, not in archive cat1
27# <output-dir>/complete-prod-incomplete-archive.txt - Complete in prod, incomplete in archive
28# <output-dir>/incomplete-in-both.txt - Incomplete in both relays
29# <output-dir>/in-archive-not-prod.txt - In archive but not in prod
30# <output-dir>/summary.txt - Human-readable summary
31#
32# OUTPUT FORMAT:
33# Each file contains lines in the format:
34# repo | npub | prod_status | archive_status
35#
36# PREREQUISITES:
37# - awk, sort, comm (standard Unix tools)
38#
39# RUNTIME: < 1 second (local processing only)
40#
41# SEE ALSO:
42# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide
43# 20-categorize.sh - Phase 3a script that produces input for this script
44#
45
46set -euo pipefail
47
48# Colors for output (disabled if not a terminal)
49if [[ -t 1 ]]; then
50 RED='\033[0;31m'
51 GREEN='\033[0;32m'
52 YELLOW='\033[0;33m'
53 BLUE='\033[0;34m'
54 NC='\033[0m'
55else
56 RED=''
57 GREEN=''
58 YELLOW=''
59 BLUE=''
60 NC=''
61fi
62
63log_info() {
64 echo -e "${BLUE}[INFO]${NC} $*" >&2
65}
66
67log_success() {
68 echo -e "${GREEN}[OK]${NC} $*" >&2
69}
70
71log_warn() {
72 echo -e "${YELLOW}[WARN]${NC} $*" >&2
73}
74
75log_error() {
76 echo -e "${RED}[ERROR]${NC} $*" >&2
77}
78
79usage() {
80 echo "Usage: $0 <prod-dir> <archive-dir> <output-dir>"
81 echo ""
82 echo "Arguments:"
83 echo " prod-dir Directory containing prod category files"
84 echo " archive-dir Directory containing archive category files"
85 echo " output-dir Directory to store comparison results"
86 echo ""
87 echo "Examples:"
88 echo " $0 output/prod output/archive output/comparison"
89 echo ""
90 echo "Required input files in each directory:"
91 echo " category1-complete-match.txt"
92 echo " category2-empty-blank.txt"
93 echo " category3-partial-match.txt"
94 echo " category4-no-match.txt"
95 exit 1
96}
97
98# Extract repo|npub key from category line
99# Input: "repo | npub | state_refs=N | ..."
100# Output: "repo|npub"
101extract_key() {
102 awk -F' \\| ' '{print $1 "|" $2}'
103}
104
105# Build lookup table from category files
106# Args: $1=directory, $2=output_file
107build_lookup() {
108 local dir="$1"
109 local output="$2"
110
111 # Process all 4 category files
112 for cat in 1 2 3 4; do
113 local file="$dir/category${cat}-*.txt"
114 # shellcheck disable=SC2086
115 if ls $file 1>/dev/null 2>&1; then
116 # shellcheck disable=SC2086
117 cat $file | while IFS= read -r line; do
118 key=$(echo "$line" | extract_key)
119 echo "${key}|cat${cat}|${line}"
120 done
121 fi
122 done | sort -t'|' -k1,2 > "$output"
123}
124
125# Main
126main() {
127 if [[ $# -ne 3 ]]; then
128 usage
129 fi
130
131 local prod_dir="$1"
132 local archive_dir="$2"
133 local output_dir="$3"
134
135 # Validate input directories
136 for dir in "$prod_dir" "$archive_dir"; do
137 if [[ ! -d "$dir" ]]; then
138 log_error "Directory not found: $dir"
139 exit 1
140 fi
141 if [[ ! -f "$dir/category1-complete-match.txt" ]]; then
142 log_error "Missing category1-complete-match.txt in $dir"
143 exit 1
144 fi
145 done
146
147 log_info "Comparing relay categories"
148 log_info "Prod: $prod_dir"
149 log_info "Archive: $archive_dir"
150 log_info "Output: $output_dir"
151
152 # Create output directory
153 mkdir -p "$output_dir"
154
155 # Create temp files for processing
156 local tmp_dir
157 tmp_dir=$(mktemp -d)
158 # shellcheck disable=SC2064
159 trap "rm -rf '$tmp_dir'" EXIT
160
161 log_info "Building lookup tables..."
162
163 # Build lookup tables: key|category|full_line
164 build_lookup "$prod_dir" "$tmp_dir/prod_lookup.txt"
165 build_lookup "$archive_dir" "$tmp_dir/archive_lookup.txt"
166
167 # Extract just keys for comparison
168 cut -d'|' -f1,2 "$tmp_dir/prod_lookup.txt" | sort -u > "$tmp_dir/prod_keys.txt"
169 cut -d'|' -f1,2 "$tmp_dir/archive_lookup.txt" | sort -u > "$tmp_dir/archive_keys.txt"
170
171 log_info "Comparing categories..."
172
173 # Initialize output files
174 > "$output_dir/complete-in-both.txt"
175 > "$output_dir/complete-prod-missing-archive.txt"
176 > "$output_dir/complete-prod-incomplete-archive.txt"
177 > "$output_dir/incomplete-in-both.txt"
178 > "$output_dir/in-archive-not-prod.txt"
179
180 # Process prod category 1 (complete) entries
181 while IFS='|' read -r repo npub cat full_line; do
182 key="${repo}|${npub}"
183
184 # Look up in archive
185 archive_entry=$(grep "^${key}|" "$tmp_dir/archive_lookup.txt" 2>/dev/null | head -1 || echo "")
186
187 if [[ -z "$archive_entry" ]]; then
188 # Not in archive at all
189 echo "$repo | $npub | prod=complete | archive=missing" >> "$output_dir/complete-prod-missing-archive.txt"
190 else
191 archive_cat=$(echo "$archive_entry" | cut -d'|' -f3)
192 if [[ "$archive_cat" == "cat1" ]]; then
193 # Complete in both
194 echo "$repo | $npub | prod=complete | archive=complete" >> "$output_dir/complete-in-both.txt"
195 else
196 # Complete in prod, incomplete in archive
197 echo "$repo | $npub | prod=complete | archive=$archive_cat" >> "$output_dir/complete-prod-incomplete-archive.txt"
198 fi
199 fi
200 done < <(grep '|cat1|' "$tmp_dir/prod_lookup.txt" | sed 's/|cat1|/|cat1|/')
201
202 # Process prod categories 2-4 (incomplete) entries
203 for cat in cat2 cat3 cat4; do
204 while IFS='|' read -r repo npub _ full_line; do
205 key="${repo}|${npub}"
206
207 # Look up in archive
208 archive_entry=$(grep "^${key}|" "$tmp_dir/archive_lookup.txt" 2>/dev/null | head -1 || echo "")
209
210 if [[ -z "$archive_entry" ]]; then
211 # Incomplete in prod, missing in archive
212 echo "$repo | $npub | prod=$cat | archive=missing" >> "$output_dir/incomplete-in-both.txt"
213 else
214 archive_cat=$(echo "$archive_entry" | cut -d'|' -f3)
215 if [[ "$archive_cat" != "cat1" ]]; then
216 # Incomplete in both
217 echo "$repo | $npub | prod=$cat | archive=$archive_cat" >> "$output_dir/incomplete-in-both.txt"
218 fi
219 # If archive is complete but prod is not, that's unusual but not an error
220 fi
221 done < <(grep "|${cat}|" "$tmp_dir/prod_lookup.txt")
222 done
223
224 # Find entries in archive but not in prod
225 comm -23 "$tmp_dir/archive_keys.txt" "$tmp_dir/prod_keys.txt" | while IFS='|' read -r repo npub; do
226 key="${repo}|${npub}"
227 archive_entry=$(grep "^${key}|" "$tmp_dir/archive_lookup.txt" 2>/dev/null | head -1 || echo "")
228 archive_cat=$(echo "$archive_entry" | cut -d'|' -f3)
229 echo "$repo | $npub | prod=missing | archive=$archive_cat" >> "$output_dir/in-archive-not-prod.txt"
230 done
231
232 # Count results
233 local count_both count_missing count_incomplete count_both_incomplete count_archive_only
234 count_both=$(wc -l < "$output_dir/complete-in-both.txt" | tr -d ' ')
235 count_missing=$(wc -l < "$output_dir/complete-prod-missing-archive.txt" | tr -d ' ')
236 count_incomplete=$(wc -l < "$output_dir/complete-prod-incomplete-archive.txt" | tr -d ' ')
237 count_both_incomplete=$(wc -l < "$output_dir/incomplete-in-both.txt" | tr -d ' ')
238 count_archive_only=$(wc -l < "$output_dir/in-archive-not-prod.txt" | tr -d ' ')
239
240 # Generate summary
241 cat > "$output_dir/summary.txt" << EOF
242# Relay Comparison Summary
243Generated: $(date -Iseconds)
244
245## Input
246- Prod: $prod_dir
247- Archive: $archive_dir
248
249## Results
250
251### No Action Required
252- Complete in both relays: $count_both
253
254### Action/Decision Required
255- Complete in prod, MISSING from archive: $count_missing
256- Complete in prod, INCOMPLETE in archive: $count_incomplete
257- Incomplete in BOTH relays: $count_both_incomplete
258
259### For Reference
260- In archive but not in prod: $count_archive_only
261
262## Files
263- complete-in-both.txt: Repos successfully migrated (no action)
264- complete-prod-missing-archive.txt: Need investigation - why not in archive?
265- complete-prod-incomplete-archive.txt: Archive sync may still be in progress
266- incomplete-in-both.txt: Git data incomplete on both relays
267- in-archive-not-prod.txt: May be deleted from prod or new to archive
268
269## Next Steps
2701. Review complete-prod-missing-archive.txt - these repos need attention
2712. Check if archive sync is still running for incomplete entries
2723. Cross-reference with deletion events (kind 5) from Phase 1
2734. Use Phase 4 logs to understand parse failures and purgatory expiry
274EOF
275
276 # Display summary
277 echo ""
278 log_info "=== Comparison Summary ==="
279 log_success "Complete in both: $count_both (no action needed)"
280 log_error "Complete in prod, MISSING from archive: $count_missing"
281 log_warn "Complete in prod, incomplete in archive: $count_incomplete"
282 log_warn "Incomplete in both: $count_both_incomplete"
283 log_info "In archive only: $count_archive_only"
284 echo ""
285 log_info "Output files:"
286 echo " $output_dir/complete-in-both.txt"
287 echo " $output_dir/complete-prod-missing-archive.txt"
288 echo " $output_dir/complete-prod-incomplete-archive.txt"
289 echo " $output_dir/incomplete-in-both.txt"
290 echo " $output_dir/in-archive-not-prod.txt"
291 echo " $output_dir/summary.txt"
292}
293
294main "$@"