diff options
| author | DanConwayDev <DanConwayDev@protonmail.com> | 2026-01-28 14:17:30 +0000 |
|---|---|---|
| committer | DanConwayDev <DanConwayDev@protonmail.com> | 2026-01-28 14:17:30 +0000 |
| commit | 3c1eda5fc9e660d40cadcdef8903aea986fe3242 (patch) | |
| tree | c11f81ca30069f4deca24de7c9c47368733ab7b8 | |
| parent | efc3da477d4edb9d1334718e3e20d197ba711468 (diff) | |
feat(migration): detect when archive git data is ahead of prod
Add git ancestry comparison (22-compare-git-data.sh) to determine
commit relationships between prod and archive repos. Repos where
archive is ahead are now correctly classified as ready-for-migration
since ngit-grasp only accepts git data authorized by state events.
Previously, repos with different git data were flagged as needs-resync
even when archive had newer/better data than prod.
| -rwxr-xr-x | docs/how-to/migration-scripts/22-compare-git-data.sh | 390 | ||||
| -rwxr-xr-x | docs/how-to/migration-scripts/40-classify-actions.sh | 84 | ||||
| -rwxr-xr-x | docs/how-to/migration-scripts/run-migration-analysis.sh | 16 |
3 files changed, 481 insertions, 9 deletions
diff --git a/docs/how-to/migration-scripts/22-compare-git-data.sh b/docs/how-to/migration-scripts/22-compare-git-data.sh new file mode 100755 index 0000000..76521d4 --- /dev/null +++ b/docs/how-to/migration-scripts/22-compare-git-data.sh | |||
| @@ -0,0 +1,390 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | # | ||
| 3 | # 22-compare-git-data.sh - Compare actual git data between prod and archive relays | ||
| 4 | # | ||
| 5 | # PHASE 3c of the GRASP relay to ngit-grasp migration analysis pipeline. | ||
| 6 | # Compares actual git commits between prod and archive to determine which is ahead. | ||
| 7 | # | ||
| 8 | # KEY INSIGHT: | ||
| 9 | # Archive (ngit-grasp) enforces GRASP - git data ALWAYS matches a state event. | ||
| 10 | # If archive has different/newer data than prod, it means: | ||
| 11 | # - A state event authorized those commits at some point | ||
| 12 | # - Archive is actually MORE up-to-date than prod | ||
| 13 | # - Migration should use archive data (it's already correct) | ||
| 14 | # | ||
| 15 | # USAGE: | ||
| 16 | # ./22-compare-git-data.sh <prod-git-base> <archive-git-base> <repo-list> <output-dir> | ||
| 17 | # | ||
| 18 | # EXAMPLES: | ||
| 19 | # ./22-compare-git-data.sh /var/lib/grasp-relay/git /var/lib/ngit-grasp/git \ | ||
| 20 | # output/comparison/complete-prod-incomplete-archive.txt output/comparison | ||
| 21 | # | ||
| 22 | # INPUT: | ||
| 23 | # prod-git-base Base directory for prod git repos (e.g., /var/lib/grasp-relay/git) | ||
| 24 | # archive-git-base Base directory for archive git repos (e.g., /var/lib/ngit-grasp/git) | ||
| 25 | # repo-list File with repos to compare (format: "repo | npub | ...") | ||
| 26 | # | ||
| 27 | # OUTPUT: | ||
| 28 | # <output-dir>/git-ancestry.tsv - Tab-separated values: | ||
| 29 | # repo<TAB>npub<TAB>relationship<TAB>details | ||
| 30 | # | ||
| 31 | # Relationship values: | ||
| 32 | # archive-ahead - Archive has all prod commits plus more (GOOD - use archive) | ||
| 33 | # in-sync - Both have identical commits | ||
| 34 | # prod-ahead - Prod has commits archive is missing (needs re-sync) | ||
| 35 | # diverged - Both have unique commits (manual review) | ||
| 36 | # archive-only - Only archive has git data | ||
| 37 | # prod-only - Only prod has git data | ||
| 38 | # both-empty - Neither has git data | ||
| 39 | # | ||
| 40 | # PREREQUISITES: | ||
| 41 | # - git (for ref comparison) | ||
| 42 | # - Read access to both git directories (may need sudo) | ||
| 43 | # | ||
| 44 | # RUNTIME: Depends on number of repos to compare | ||
| 45 | # | ||
| 46 | # SEE ALSO: | ||
| 47 | # docs/how-to/migrate-to-ngit-grasp.md - Full migration guide | ||
| 48 | # 21-compare-relays.sh - Phase 3b script that identifies repos to compare | ||
| 49 | # | ||
| 50 | |||
| 51 | set -euo pipefail | ||
| 52 | |||
| 53 | # Colors for output (disabled if not a terminal) | ||
| 54 | if [[ -t 1 ]]; then | ||
| 55 | RED='\033[0;31m' | ||
| 56 | GREEN='\033[0;32m' | ||
| 57 | YELLOW='\033[0;33m' | ||
| 58 | BLUE='\033[0;34m' | ||
| 59 | NC='\033[0m' | ||
| 60 | else | ||
| 61 | RED='' | ||
| 62 | GREEN='' | ||
| 63 | YELLOW='' | ||
| 64 | BLUE='' | ||
| 65 | NC='' | ||
| 66 | fi | ||
| 67 | |||
| 68 | log_info() { | ||
| 69 | echo -e "${BLUE}[INFO]${NC} $*" >&2 | ||
| 70 | } | ||
| 71 | |||
| 72 | log_success() { | ||
| 73 | echo -e "${GREEN}[OK]${NC} $*" >&2 | ||
| 74 | } | ||
| 75 | |||
| 76 | log_warn() { | ||
| 77 | echo -e "${YELLOW}[WARN]${NC} $*" >&2 | ||
| 78 | } | ||
| 79 | |||
| 80 | log_error() { | ||
| 81 | echo -e "${RED}[ERROR]${NC} $*" >&2 | ||
| 82 | } | ||
| 83 | |||
| 84 | log_progress() { | ||
| 85 | echo -ne "\r${BLUE}[PROGRESS]${NC} $*" >&2 | ||
| 86 | } | ||
| 87 | |||
| 88 | usage() { | ||
| 89 | echo "Usage: $0 <prod-git-base> <archive-git-base> <repo-list> <output-dir>" | ||
| 90 | echo "" | ||
| 91 | echo "Arguments:" | ||
| 92 | echo " prod-git-base Base directory for prod git repos" | ||
| 93 | echo " archive-git-base Base directory for archive git repos" | ||
| 94 | echo " repo-list File with repos to compare (format: 'repo | npub | ...')" | ||
| 95 | echo " output-dir Directory to store output files" | ||
| 96 | echo "" | ||
| 97 | echo "Examples:" | ||
| 98 | echo " $0 /var/lib/grasp-relay/git /var/lib/ngit-grasp/git \\" | ||
| 99 | echo " output/comparison/complete-prod-incomplete-archive.txt output/comparison" | ||
| 100 | echo "" | ||
| 101 | echo "Output:" | ||
| 102 | echo " git-ancestry.tsv - TSV with: repo, npub, relationship, details" | ||
| 103 | exit 1 | ||
| 104 | } | ||
| 105 | |||
| 106 | # Get all branch refs from a git directory | ||
| 107 | # Args: $1=git_dir | ||
| 108 | # Returns: sorted list of "ref_name commit_hash" lines | ||
| 109 | get_git_refs() { | ||
| 110 | local git_dir="$1" | ||
| 111 | |||
| 112 | if [[ ! -d "$git_dir" ]]; then | ||
| 113 | return | ||
| 114 | fi | ||
| 115 | |||
| 116 | git --git-dir="$git_dir" show-ref --heads 2>/dev/null | sort || true | ||
| 117 | } | ||
| 118 | |||
| 119 | # Check if commit A is ancestor of commit B | ||
| 120 | # Args: $1=git_dir, $2=commit_a, $3=commit_b | ||
| 121 | # Returns: 0 if A is ancestor of B, 1 otherwise | ||
| 122 | is_ancestor() { | ||
| 123 | local git_dir="$1" | ||
| 124 | local commit_a="$2" | ||
| 125 | local commit_b="$3" | ||
| 126 | |||
| 127 | git --git-dir="$git_dir" merge-base --is-ancestor "$commit_a" "$commit_b" 2>/dev/null | ||
| 128 | } | ||
| 129 | |||
| 130 | # Compare git data between prod and archive for a single repo | ||
| 131 | # Args: $1=prod_git_dir, $2=archive_git_dir | ||
| 132 | # Returns: relationship string | ||
| 133 | compare_repo_git() { | ||
| 134 | local prod_git="$1" | ||
| 135 | local archive_git="$2" | ||
| 136 | |||
| 137 | local prod_exists=false | ||
| 138 | local archive_exists=false | ||
| 139 | |||
| 140 | [[ -d "$prod_git" ]] && prod_exists=true | ||
| 141 | [[ -d "$archive_git" ]] && archive_exists=true | ||
| 142 | |||
| 143 | # Handle cases where one or both don't exist | ||
| 144 | if [[ "$prod_exists" == "false" && "$archive_exists" == "false" ]]; then | ||
| 145 | echo "both-empty" | ||
| 146 | return | ||
| 147 | fi | ||
| 148 | |||
| 149 | if [[ "$prod_exists" == "false" ]]; then | ||
| 150 | echo "archive-only" | ||
| 151 | return | ||
| 152 | fi | ||
| 153 | |||
| 154 | if [[ "$archive_exists" == "false" ]]; then | ||
| 155 | echo "prod-only" | ||
| 156 | return | ||
| 157 | fi | ||
| 158 | |||
| 159 | # Both exist - get refs | ||
| 160 | local prod_refs archive_refs | ||
| 161 | prod_refs=$(get_git_refs "$prod_git") | ||
| 162 | archive_refs=$(get_git_refs "$archive_git") | ||
| 163 | |||
| 164 | # Handle empty refs | ||
| 165 | if [[ -z "$prod_refs" && -z "$archive_refs" ]]; then | ||
| 166 | echo "both-empty" | ||
| 167 | return | ||
| 168 | fi | ||
| 169 | |||
| 170 | if [[ -z "$prod_refs" ]]; then | ||
| 171 | echo "archive-only" | ||
| 172 | return | ||
| 173 | fi | ||
| 174 | |||
| 175 | if [[ -z "$archive_refs" ]]; then | ||
| 176 | echo "prod-only" | ||
| 177 | return | ||
| 178 | fi | ||
| 179 | |||
| 180 | # Compare refs - check if they're identical | ||
| 181 | if [[ "$prod_refs" == "$archive_refs" ]]; then | ||
| 182 | echo "in-sync" | ||
| 183 | return | ||
| 184 | fi | ||
| 185 | |||
| 186 | # Refs differ - need to check ancestry | ||
| 187 | # Strategy: For each branch, check if one is ancestor of the other | ||
| 188 | # If all archive branches are ahead of or equal to prod branches, archive is ahead | ||
| 189 | # If all prod branches are ahead of or equal to archive branches, prod is ahead | ||
| 190 | # Otherwise, they've diverged | ||
| 191 | |||
| 192 | local archive_ahead=true | ||
| 193 | local prod_ahead=true | ||
| 194 | local has_common_branch=false | ||
| 195 | |||
| 196 | # Create temporary file to use archive as reference repo for ancestry checks | ||
| 197 | # We need a repo that has both sets of commits to check ancestry | ||
| 198 | # Use archive since it's the target and should have the superset | ||
| 199 | |||
| 200 | # Check each prod branch against archive | ||
| 201 | while read -r prod_hash prod_ref; do | ||
| 202 | [[ -z "$prod_hash" ]] && continue | ||
| 203 | |||
| 204 | # Get the same branch from archive | ||
| 205 | local archive_hash | ||
| 206 | archive_hash=$(echo "$archive_refs" | grep " $prod_ref$" | awk '{print $1}' || echo "") | ||
| 207 | |||
| 208 | if [[ -z "$archive_hash" ]]; then | ||
| 209 | # Branch exists in prod but not archive - prod has something archive doesn't | ||
| 210 | # But this could be a deleted branch, so don't immediately say prod is ahead | ||
| 211 | continue | ||
| 212 | fi | ||
| 213 | |||
| 214 | has_common_branch=true | ||
| 215 | |||
| 216 | if [[ "$prod_hash" == "$archive_hash" ]]; then | ||
| 217 | # Same commit - neither ahead for this branch | ||
| 218 | continue | ||
| 219 | fi | ||
| 220 | |||
| 221 | # Different commits - check ancestry | ||
| 222 | # First, try to check if prod is ancestor of archive (archive ahead) | ||
| 223 | if is_ancestor "$archive_git" "$prod_hash" "$archive_hash" 2>/dev/null; then | ||
| 224 | # Prod commit is ancestor of archive commit - archive is ahead for this branch | ||
| 225 | prod_ahead=false | ||
| 226 | elif is_ancestor "$archive_git" "$archive_hash" "$prod_hash" 2>/dev/null; then | ||
| 227 | # Archive commit is ancestor of prod commit - prod is ahead for this branch | ||
| 228 | archive_ahead=false | ||
| 229 | else | ||
| 230 | # Neither is ancestor - diverged | ||
| 231 | archive_ahead=false | ||
| 232 | prod_ahead=false | ||
| 233 | fi | ||
| 234 | done <<< "$prod_refs" | ||
| 235 | |||
| 236 | # Also check for branches only in archive (archive has extra branches) | ||
| 237 | while read -r archive_hash archive_ref; do | ||
| 238 | [[ -z "$archive_hash" ]] && continue | ||
| 239 | |||
| 240 | local prod_hash | ||
| 241 | prod_hash=$(echo "$prod_refs" | grep " $archive_ref$" | awk '{print $1}' || echo "") | ||
| 242 | |||
| 243 | if [[ -z "$prod_hash" ]]; then | ||
| 244 | # Branch exists in archive but not prod - archive has something prod doesn't | ||
| 245 | # This means archive is ahead (has extra branches) | ||
| 246 | prod_ahead=false | ||
| 247 | fi | ||
| 248 | done <<< "$archive_refs" | ||
| 249 | |||
| 250 | # Determine final relationship | ||
| 251 | if [[ "$has_common_branch" == "false" ]]; then | ||
| 252 | # No common branches - completely different | ||
| 253 | echo "diverged" | ||
| 254 | return | ||
| 255 | fi | ||
| 256 | |||
| 257 | if [[ "$archive_ahead" == "true" && "$prod_ahead" == "false" ]]; then | ||
| 258 | echo "archive-ahead" | ||
| 259 | elif [[ "$prod_ahead" == "true" && "$archive_ahead" == "false" ]]; then | ||
| 260 | echo "prod-ahead" | ||
| 261 | elif [[ "$archive_ahead" == "true" && "$prod_ahead" == "true" ]]; then | ||
| 262 | # Both true means all common branches are identical | ||
| 263 | # But one might have extra branches | ||
| 264 | echo "in-sync" | ||
| 265 | else | ||
| 266 | echo "diverged" | ||
| 267 | fi | ||
| 268 | } | ||
| 269 | |||
| 270 | # Main | ||
| 271 | main() { | ||
| 272 | if [[ $# -ne 4 ]]; then | ||
| 273 | usage | ||
| 274 | fi | ||
| 275 | |||
| 276 | local prod_git_base="$1" | ||
| 277 | local archive_git_base="$2" | ||
| 278 | local repo_list="$3" | ||
| 279 | local output_dir="$4" | ||
| 280 | |||
| 281 | # Validate inputs | ||
| 282 | if [[ ! -d "$prod_git_base" ]]; then | ||
| 283 | log_error "Prod git base directory not found: $prod_git_base" | ||
| 284 | exit 1 | ||
| 285 | fi | ||
| 286 | |||
| 287 | if [[ ! -d "$archive_git_base" ]]; then | ||
| 288 | log_error "Archive git base directory not found: $archive_git_base" | ||
| 289 | exit 1 | ||
| 290 | fi | ||
| 291 | |||
| 292 | if [[ ! -f "$repo_list" ]]; then | ||
| 293 | log_error "Repo list file not found: $repo_list" | ||
| 294 | exit 1 | ||
| 295 | fi | ||
| 296 | |||
| 297 | log_info "=== Git Data Comparison ===" | ||
| 298 | log_info "Prod git base: $prod_git_base" | ||
| 299 | log_info "Archive git base: $archive_git_base" | ||
| 300 | log_info "Repo list: $repo_list" | ||
| 301 | log_info "Output: $output_dir" | ||
| 302 | log_info "Started: $(date)" | ||
| 303 | echo "" | ||
| 304 | |||
| 305 | # Create output directory | ||
| 306 | mkdir -p "$output_dir" | ||
| 307 | |||
| 308 | # Output file | ||
| 309 | local tsv_file="$output_dir/git-ancestry.tsv" | ||
| 310 | |||
| 311 | # Initialize TSV with header | ||
| 312 | echo -e "repo\tnpub\trelationship\tdetails" > "$tsv_file" | ||
| 313 | |||
| 314 | # Count repos | ||
| 315 | local total_repos | ||
| 316 | total_repos=$(grep -c -v '^#' "$repo_list" 2>/dev/null || echo "0") | ||
| 317 | log_info "Processing $total_repos repos..." | ||
| 318 | echo "" | ||
| 319 | |||
| 320 | # Counters | ||
| 321 | local count=0 | ||
| 322 | local count_archive_ahead=0 | ||
| 323 | local count_in_sync=0 | ||
| 324 | local count_prod_ahead=0 | ||
| 325 | local count_diverged=0 | ||
| 326 | local count_archive_only=0 | ||
| 327 | local count_prod_only=0 | ||
| 328 | local count_both_empty=0 | ||
| 329 | |||
| 330 | # Process each repo | ||
| 331 | while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do | ||
| 332 | # Skip comments and empty lines | ||
| 333 | [[ "$repo" =~ ^# ]] && continue | ||
| 334 | [[ -z "$repo" ]] && continue | ||
| 335 | |||
| 336 | # Clean up whitespace | ||
| 337 | repo="${repo// /}" | ||
| 338 | npub="${npub// /}" | ||
| 339 | |||
| 340 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 341 | |||
| 342 | count=$((count + 1)) | ||
| 343 | |||
| 344 | # Build git paths | ||
| 345 | local prod_git="$prod_git_base/${npub}/${repo}.git" | ||
| 346 | local archive_git="$archive_git_base/${npub}/${repo}.git" | ||
| 347 | |||
| 348 | # Compare | ||
| 349 | local relationship details="" | ||
| 350 | relationship=$(compare_repo_git "$prod_git" "$archive_git") | ||
| 351 | |||
| 352 | # Count by relationship | ||
| 353 | case "$relationship" in | ||
| 354 | archive-ahead) count_archive_ahead=$((count_archive_ahead + 1)) ;; | ||
| 355 | in-sync) count_in_sync=$((count_in_sync + 1)) ;; | ||
| 356 | prod-ahead) count_prod_ahead=$((count_prod_ahead + 1)) ;; | ||
| 357 | diverged) count_diverged=$((count_diverged + 1)) ;; | ||
| 358 | archive-only) count_archive_only=$((count_archive_only + 1)) ;; | ||
| 359 | prod-only) count_prod_only=$((count_prod_only + 1)) ;; | ||
| 360 | both-empty) count_both_empty=$((count_both_empty + 1)) ;; | ||
| 361 | esac | ||
| 362 | |||
| 363 | # Output TSV line | ||
| 364 | printf '%s\t%s\t%s\t%s\n' "$repo" "$npub" "$relationship" "$details" >> "$tsv_file" | ||
| 365 | |||
| 366 | # Progress indicator every 10 repos | ||
| 367 | if [[ $((count % 10)) -eq 0 ]]; then | ||
| 368 | log_progress "Processed $count/$total_repos repos..." | ||
| 369 | fi | ||
| 370 | done < "$repo_list" | ||
| 371 | |||
| 372 | # Clear progress line | ||
| 373 | echo "" >&2 | ||
| 374 | |||
| 375 | # Summary | ||
| 376 | echo "" | ||
| 377 | log_info "=== Comparison Summary ===" | ||
| 378 | log_success "Archive ahead (use archive data): $count_archive_ahead" | ||
| 379 | log_success "In sync: $count_in_sync" | ||
| 380 | log_warn "Prod ahead (needs re-sync): $count_prod_ahead" | ||
| 381 | log_error "Diverged (manual review): $count_diverged" | ||
| 382 | log_info "Archive only: $count_archive_only" | ||
| 383 | log_info "Prod only: $count_prod_only" | ||
| 384 | log_info "Both empty: $count_both_empty" | ||
| 385 | echo "" | ||
| 386 | log_info "Total: $count repos" | ||
| 387 | log_info "Output: $tsv_file" | ||
| 388 | } | ||
| 389 | |||
| 390 | main "$@" | ||
diff --git a/docs/how-to/migration-scripts/40-classify-actions.sh b/docs/how-to/migration-scripts/40-classify-actions.sh index b1348f8..07ae7c9 100755 --- a/docs/how-to/migration-scripts/40-classify-actions.sh +++ b/docs/how-to/migration-scripts/40-classify-actions.sh | |||
| @@ -10,16 +10,25 @@ | |||
| 10 | # - Empty in prod (prod=cat2, any archive status) | 10 | # - Empty in prod (prod=cat2, any archive status) |
| 11 | # - Archive-only (archive=any, prod=missing) | 11 | # - Archive-only (archive=any, prod=missing) |
| 12 | # - Not in prod (purgatory-only, prod=missing) | 12 | # - Not in prod (purgatory-only, prod=missing) |
| 13 | # - Archive ahead (archive has newer git data than prod - GRASP enforced) | ||
| 13 | # | 14 | # |
| 14 | # Tier 2: Action Required (needs-resync.txt) | 15 | # Tier 2: Action Required (needs-resync.txt) |
| 15 | # - Complete in prod, missing from archive (with purgatory context) | 16 | # - Complete in prod, missing from archive (with purgatory context) |
| 16 | # - Complete in prod, incomplete in archive (with purgatory context) | 17 | # - Complete in prod, incomplete in archive AND prod is ahead (with purgatory context) |
| 17 | # | 18 | # |
| 18 | # Tier 3: Manual Investigation (manual-review.txt) | 19 | # Tier 3: Manual Investigation (manual-review.txt) |
| 19 | # - Partial in prod (prod=cat3) | 20 | # - Partial in prod (prod=cat3) |
| 20 | # - No-match in prod (prod=cat4) | 21 | # - No-match in prod (prod=cat4) |
| 21 | # - Parse failures | 22 | # - Parse failures |
| 22 | # - Conflicting states | 23 | # - Conflicting states |
| 24 | # - Diverged git history (both have unique commits) | ||
| 25 | # | ||
| 26 | # KEY INSIGHT: | ||
| 27 | # Archive (ngit-grasp) enforces GRASP - git data ALWAYS matches a state event. | ||
| 28 | # If archive has different/newer data than prod, it means: | ||
| 29 | # - A state event authorized those commits at some point | ||
| 30 | # - Archive is actually MORE up-to-date than prod | ||
| 31 | # - Migration should use archive data (it's already correct) | ||
| 23 | # | 32 | # |
| 24 | # Usage: ./40-classify-actions.sh <analysis-dir> | 33 | # Usage: ./40-classify-actions.sh <analysis-dir> |
| 25 | # | 34 | # |
| @@ -231,6 +240,25 @@ DELETED_COUNT=0 | |||
| 231 | [[ ${#DELETED[@]} -gt 0 ]] && DELETED_COUNT=${#DELETED[@]} | 240 | [[ ${#DELETED[@]} -gt 0 ]] && DELETED_COUNT=${#DELETED[@]} |
| 232 | log_info "Loaded $DELETED_COUNT deletion entries" | 241 | log_info "Loaded $DELETED_COUNT deletion entries" |
| 233 | 242 | ||
| 243 | # Build git ancestry lookup: repo|npub -> relationship (archive-ahead, prod-ahead, diverged, etc.) | ||
| 244 | # This data comes from 22-compare-git-data.sh which compares actual git commits | ||
| 245 | declare -A GIT_ANCESTRY | ||
| 246 | GIT_ANCESTRY_COUNT=0 | ||
| 247 | if [[ -f "$COMPARISON_DIR/git-ancestry.tsv" ]]; then | ||
| 248 | while IFS=$'\t' read -r repo npub relationship details || [[ -n "$repo" ]]; do | ||
| 249 | # Skip header and comments | ||
| 250 | [[ "$repo" == "repo" ]] && continue | ||
| 251 | [[ "$repo" =~ ^# ]] && continue | ||
| 252 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 253 | GIT_ANCESTRY["$repo|$npub"]="$relationship" | ||
| 254 | GIT_ANCESTRY_COUNT=$((GIT_ANCESTRY_COUNT + 1)) | ||
| 255 | done < "$COMPARISON_DIR/git-ancestry.tsv" | ||
| 256 | log_info "Loaded $GIT_ANCESTRY_COUNT git ancestry entries" | ||
| 257 | else | ||
| 258 | log_warn "No git-ancestry.tsv found - will not check if archive is ahead of prod" | ||
| 259 | log_warn "Run 22-compare-git-data.sh to enable archive-ahead detection" | ||
| 260 | fi | ||
| 261 | |||
| 234 | # ============================================================================ | 262 | # ============================================================================ |
| 235 | # Phase 2: Build unique repo list from all sources | 263 | # Phase 2: Build unique repo list from all sources |
| 236 | # ============================================================================ | 264 | # ============================================================================ |
| @@ -263,12 +291,14 @@ COUNTS[ready_deleted]=0 | |||
| 263 | COUNTS[ready_empty_prod]=0 | 291 | COUNTS[ready_empty_prod]=0 |
| 264 | COUNTS[ready_archive_only]=0 | 292 | COUNTS[ready_archive_only]=0 |
| 265 | COUNTS[ready_not_in_prod]=0 | 293 | COUNTS[ready_not_in_prod]=0 |
| 294 | COUNTS[ready_archive_ahead]=0 | ||
| 266 | COUNTS[resync_missing_archive]=0 | 295 | COUNTS[resync_missing_archive]=0 |
| 267 | COUNTS[resync_incomplete_archive]=0 | 296 | COUNTS[resync_incomplete_archive]=0 |
| 268 | COUNTS[review_partial_prod]=0 | 297 | COUNTS[review_partial_prod]=0 |
| 269 | COUNTS[review_nomatch_prod]=0 | 298 | COUNTS[review_nomatch_prod]=0 |
| 270 | COUNTS[review_parse_failure]=0 | 299 | COUNTS[review_parse_failure]=0 |
| 271 | COUNTS[review_conflicting]=0 | 300 | COUNTS[review_conflicting]=0 |
| 301 | COUNTS[review_diverged]=0 | ||
| 272 | 302 | ||
| 273 | # Output arrays | 303 | # Output arrays |
| 274 | declare -a READY_LINES | 304 | declare -a READY_LINES |
| @@ -381,14 +411,48 @@ for key in "${!ALL_REPOS[@]}"; do | |||
| 381 | REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in prod with parse failure") | 411 | REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in prod with parse failure") |
| 382 | COUNTS[review_parse_failure]=$((COUNTS[review_parse_failure] + 1)) | 412 | COUNTS[review_parse_failure]=$((COUNTS[review_parse_failure] + 1)) |
| 383 | else | 413 | else |
| 384 | # Needs resync - include purgatory context | 414 | # Check git ancestry to see if archive is actually ahead |
| 385 | context=$(get_context "$key" "$prod_status" "$archive_status") | 415 | local git_relationship="${GIT_ANCESTRY[$key]:-unknown}" |
| 386 | if [[ "$archive_cat" == "missing" ]]; then | 416 | |
| 387 | RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync to archive") | 417 | if [[ "$git_relationship" == "archive-ahead" || "$git_relationship" == "in-sync" ]]; then |
| 388 | COUNTS[resync_missing_archive]=$((COUNTS[resync_missing_archive] + 1)) | 418 | # Archive has newer/same git data - this is GOOD |
| 419 | # Archive's git data was authorized by a state event (GRASP enforced) | ||
| 420 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 421 | if [[ -n "$context" && "$context" != "none" ]]; then | ||
| 422 | context="$context, git=$git_relationship" | ||
| 423 | else | ||
| 424 | context="git=$git_relationship" | ||
| 425 | fi | ||
| 426 | READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive ahead (use archive data)") | ||
| 427 | COUNTS[ready_archive_ahead]=$((COUNTS[ready_archive_ahead] + 1)) | ||
| 428 | elif [[ "$git_relationship" == "diverged" ]]; then | ||
| 429 | # Git histories diverged - needs manual review | ||
| 430 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 431 | if [[ -n "$context" && "$context" != "none" ]]; then | ||
| 432 | context="$context, git=diverged" | ||
| 433 | else | ||
| 434 | context="git=diverged" | ||
| 435 | fi | ||
| 436 | REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | git histories diverged (manual review)") | ||
| 437 | COUNTS[review_diverged]=$((COUNTS[review_diverged] + 1)) | ||
| 389 | else | 438 | else |
| 390 | RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync (archive incomplete)") | 439 | # prod-ahead, archive-only, prod-only, both-empty, or unknown |
| 391 | COUNTS[resync_incomplete_archive]=$((COUNTS[resync_incomplete_archive] + 1)) | 440 | # These need resync - include purgatory context |
| 441 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 442 | if [[ "$git_relationship" != "unknown" ]]; then | ||
| 443 | if [[ -n "$context" && "$context" != "none" ]]; then | ||
| 444 | context="$context, git=$git_relationship" | ||
| 445 | else | ||
| 446 | context="git=$git_relationship" | ||
| 447 | fi | ||
| 448 | fi | ||
| 449 | if [[ "$archive_cat" == "missing" ]]; then | ||
| 450 | RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync to archive") | ||
| 451 | COUNTS[resync_missing_archive]=$((COUNTS[resync_missing_archive] + 1)) | ||
| 452 | else | ||
| 453 | RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync (archive incomplete)") | ||
| 454 | COUNTS[resync_incomplete_archive]=$((COUNTS[resync_incomplete_archive] + 1)) | ||
| 455 | fi | ||
| 392 | fi | 456 | fi |
| 393 | fi | 457 | fi |
| 394 | fi | 458 | fi |
| @@ -498,6 +562,7 @@ fi | |||
| 498 | echo "| Reason | Count |" | 562 | echo "| Reason | Count |" |
| 499 | echo "|--------|-------|" | 563 | echo "|--------|-------|" |
| 500 | echo "| complete in both prod and archive | ${COUNTS[ready_complete_both]} |" | 564 | echo "| complete in both prod and archive | ${COUNTS[ready_complete_both]} |" |
| 565 | echo "| archive ahead (has newer git data) | ${COUNTS[ready_archive_ahead]} |" | ||
| 501 | echo "| deleted by user | ${COUNTS[ready_deleted]} |" | 566 | echo "| deleted by user | ${COUNTS[ready_deleted]} |" |
| 502 | echo "| empty in prod (user never pushed) | ${COUNTS[ready_empty_prod]} |" | 567 | echo "| empty in prod (user never pushed) | ${COUNTS[ready_empty_prod]} |" |
| 503 | echo "| archive-only (not in prod) | ${COUNTS[ready_archive_only]} |" | 568 | echo "| archive-only (not in prod) | ${COUNTS[ready_archive_only]} |" |
| @@ -527,6 +592,7 @@ fi | |||
| 527 | echo "| partial in prod (cat3) | ${COUNTS[review_partial_prod]} |" | 592 | echo "| partial in prod (cat3) | ${COUNTS[review_partial_prod]} |" |
| 528 | echo "| no-match in prod (cat4) | ${COUNTS[review_nomatch_prod]} |" | 593 | echo "| no-match in prod (cat4) | ${COUNTS[review_nomatch_prod]} |" |
| 529 | echo "| complete in prod with parse failure | ${COUNTS[review_parse_failure]} |" | 594 | echo "| complete in prod with parse failure | ${COUNTS[review_parse_failure]} |" |
| 595 | echo "| git histories diverged | ${COUNTS[review_diverged]} |" | ||
| 530 | echo "" | 596 | echo "" |
| 531 | echo "## Input Data Summary" | 597 | echo "## Input Data Summary" |
| 532 | echo "" | 598 | echo "" |
| @@ -571,6 +637,7 @@ echo "" | |||
| 571 | echo "=== Summary ===" | 637 | echo "=== Summary ===" |
| 572 | echo "Ready for Migration: $TOTAL_READY ($PCT_READY%)" | 638 | echo "Ready for Migration: $TOTAL_READY ($PCT_READY%)" |
| 573 | echo " - Complete in both: ${COUNTS[ready_complete_both]}" | 639 | echo " - Complete in both: ${COUNTS[ready_complete_both]}" |
| 640 | echo " - Archive ahead: ${COUNTS[ready_archive_ahead]}" | ||
| 574 | echo " - Deleted by user: ${COUNTS[ready_deleted]}" | 641 | echo " - Deleted by user: ${COUNTS[ready_deleted]}" |
| 575 | echo " - Empty in prod: ${COUNTS[ready_empty_prod]}" | 642 | echo " - Empty in prod: ${COUNTS[ready_empty_prod]}" |
| 576 | echo " - Archive-only: ${COUNTS[ready_archive_only]}" | 643 | echo " - Archive-only: ${COUNTS[ready_archive_only]}" |
| @@ -584,6 +651,7 @@ echo "Manual Review: $TOTAL_REVIEW ($PCT_REVIEW%)" | |||
| 584 | echo " - Partial in prod: ${COUNTS[review_partial_prod]}" | 651 | echo " - Partial in prod: ${COUNTS[review_partial_prod]}" |
| 585 | echo " - No-match in prod: ${COUNTS[review_nomatch_prod]}" | 652 | echo " - No-match in prod: ${COUNTS[review_nomatch_prod]}" |
| 586 | echo " - Parse failures: ${COUNTS[review_parse_failure]}" | 653 | echo " - Parse failures: ${COUNTS[review_parse_failure]}" |
| 654 | echo " - Git diverged: ${COUNTS[review_diverged]}" | ||
| 587 | echo "" | 655 | echo "" |
| 588 | echo "Total: $TOTAL repos" | 656 | echo "Total: $TOTAL repos" |
| 589 | echo "" | 657 | echo "" |
diff --git a/docs/how-to/migration-scripts/run-migration-analysis.sh b/docs/how-to/migration-scripts/run-migration-analysis.sh index 089b553..acc5e44 100755 --- a/docs/how-to/migration-scripts/run-migration-analysis.sh +++ b/docs/how-to/migration-scripts/run-migration-analysis.sh | |||
| @@ -320,7 +320,7 @@ check_prerequisites() { | |||
| 320 | fi | 320 | fi |
| 321 | 321 | ||
| 322 | # Check scripts exist | 322 | # Check scripts exist |
| 323 | for script in 01-fetch-events.sh 10-check-git-sync.sh 20-categorize.sh 21-compare-relays.sh 30-extract-parse-failures.sh 31-extract-purgatory-expiry.sh 40-classify-actions.sh; do | 323 | for script in 01-fetch-events.sh 10-check-git-sync.sh 20-categorize.sh 21-compare-relays.sh 22-compare-git-data.sh 30-extract-parse-failures.sh 31-extract-purgatory-expiry.sh 40-classify-actions.sh; do |
| 324 | if [[ ! -x "$SCRIPT_DIR/$script" ]]; then | 324 | if [[ ! -x "$SCRIPT_DIR/$script" ]]; then |
| 325 | log_error "Script not found or not executable: $SCRIPT_DIR/$script" | 325 | log_error "Script not found or not executable: $SCRIPT_DIR/$script" |
| 326 | missing=1 | 326 | missing=1 |
| @@ -551,6 +551,20 @@ run_phase_3() { | |||
| 551 | fi | 551 | fi |
| 552 | 552 | ||
| 553 | run_phase 3 "Categorize & Compare (fast)" "${cmds[@]}" | 553 | run_phase 3 "Categorize & Compare (fast)" "${cmds[@]}" |
| 554 | |||
| 555 | # Phase 3c: Compare git data between relays (requires git paths) | ||
| 556 | # This determines if archive is ahead of prod for repos with mismatched state | ||
| 557 | if [[ -n "$PROD_GIT" && -n "$ARCHIVE_GIT" ]]; then | ||
| 558 | # Build list of repos to compare: those where prod=complete but archive is not | ||
| 559 | local repos_to_compare="$OUTPUT_DIR/comparison/complete-prod-incomplete-archive.txt" | ||
| 560 | if [[ -f "$repos_to_compare" ]] && [[ ! -f "$OUTPUT_DIR/comparison/git-ancestry.tsv" ]]; then | ||
| 561 | log_info "Running git ancestry comparison (Phase 3c)..." | ||
| 562 | run_phase 3 "Git Ancestry Comparison" "'$SCRIPT_DIR/22-compare-git-data.sh' '$PROD_GIT' '$ARCHIVE_GIT' '$repos_to_compare' '$OUTPUT_DIR/comparison'" | ||
| 563 | fi | ||
| 564 | else | ||
| 565 | log_warn "Git paths not provided - skipping git ancestry comparison" | ||
| 566 | log_warn "Without git comparison, repos where archive is ahead will be incorrectly flagged as needing re-sync" | ||
| 567 | fi | ||
| 554 | } | 568 | } |
| 555 | 569 | ||
| 556 | # Phase 4: Extract logs | 570 | # Phase 4: Extract logs |