diff options
| author | DanConwayDev <DanConwayDev@protonmail.com> | 2026-01-28 14:17:30 +0000 |
|---|---|---|
| committer | DanConwayDev <DanConwayDev@protonmail.com> | 2026-01-28 14:17:30 +0000 |
| commit | 3c1eda5fc9e660d40cadcdef8903aea986fe3242 (patch) | |
| tree | c11f81ca30069f4deca24de7c9c47368733ab7b8 /docs/how-to/migration-scripts/40-classify-actions.sh | |
| parent | efc3da477d4edb9d1334718e3e20d197ba711468 (diff) | |
feat(migration): detect when archive git data is ahead of prod
Add git ancestry comparison (22-compare-git-data.sh) to determine
commit relationships between prod and archive repos. Repos where
archive is ahead are now correctly classified as ready-for-migration
since ngit-grasp only accepts git data authorized by state events.
Previously, repos with different git data were flagged as needs-resync
even when archive had newer/better data than prod.
Diffstat (limited to 'docs/how-to/migration-scripts/40-classify-actions.sh')
| -rwxr-xr-x | docs/how-to/migration-scripts/40-classify-actions.sh | 84 |
1 files changed, 76 insertions, 8 deletions
diff --git a/docs/how-to/migration-scripts/40-classify-actions.sh b/docs/how-to/migration-scripts/40-classify-actions.sh index b1348f8..07ae7c9 100755 --- a/docs/how-to/migration-scripts/40-classify-actions.sh +++ b/docs/how-to/migration-scripts/40-classify-actions.sh | |||
| @@ -10,16 +10,25 @@ | |||
| 10 | # - Empty in prod (prod=cat2, any archive status) | 10 | # - Empty in prod (prod=cat2, any archive status) |
| 11 | # - Archive-only (archive=any, prod=missing) | 11 | # - Archive-only (archive=any, prod=missing) |
| 12 | # - Not in prod (purgatory-only, prod=missing) | 12 | # - Not in prod (purgatory-only, prod=missing) |
| 13 | # - Archive ahead (archive has newer git data than prod - GRASP enforced) | ||
| 13 | # | 14 | # |
| 14 | # Tier 2: Action Required (needs-resync.txt) | 15 | # Tier 2: Action Required (needs-resync.txt) |
| 15 | # - Complete in prod, missing from archive (with purgatory context) | 16 | # - Complete in prod, missing from archive (with purgatory context) |
| 16 | # - Complete in prod, incomplete in archive (with purgatory context) | 17 | # - Complete in prod, incomplete in archive AND prod is ahead (with purgatory context) |
| 17 | # | 18 | # |
| 18 | # Tier 3: Manual Investigation (manual-review.txt) | 19 | # Tier 3: Manual Investigation (manual-review.txt) |
| 19 | # - Partial in prod (prod=cat3) | 20 | # - Partial in prod (prod=cat3) |
| 20 | # - No-match in prod (prod=cat4) | 21 | # - No-match in prod (prod=cat4) |
| 21 | # - Parse failures | 22 | # - Parse failures |
| 22 | # - Conflicting states | 23 | # - Conflicting states |
| 24 | # - Diverged git history (both have unique commits) | ||
| 25 | # | ||
| 26 | # KEY INSIGHT: | ||
| 27 | # Archive (ngit-grasp) enforces GRASP - git data ALWAYS matches a state event. | ||
| 28 | # If archive has different/newer data than prod, it means: | ||
| 29 | # - A state event authorized those commits at some point | ||
| 30 | # - Archive is actually MORE up-to-date than prod | ||
| 31 | # - Migration should use archive data (it's already correct) | ||
| 23 | # | 32 | # |
| 24 | # Usage: ./40-classify-actions.sh <analysis-dir> | 33 | # Usage: ./40-classify-actions.sh <analysis-dir> |
| 25 | # | 34 | # |
| @@ -231,6 +240,25 @@ DELETED_COUNT=0 | |||
| 231 | [[ ${#DELETED[@]} -gt 0 ]] && DELETED_COUNT=${#DELETED[@]} | 240 | [[ ${#DELETED[@]} -gt 0 ]] && DELETED_COUNT=${#DELETED[@]} |
| 232 | log_info "Loaded $DELETED_COUNT deletion entries" | 241 | log_info "Loaded $DELETED_COUNT deletion entries" |
| 233 | 242 | ||
| 243 | # Build git ancestry lookup: repo|npub -> relationship (archive-ahead, prod-ahead, diverged, etc.) | ||
| 244 | # This data comes from 22-compare-git-data.sh which compares actual git commits | ||
| 245 | declare -A GIT_ANCESTRY | ||
| 246 | GIT_ANCESTRY_COUNT=0 | ||
| 247 | if [[ -f "$COMPARISON_DIR/git-ancestry.tsv" ]]; then | ||
| 248 | while IFS=$'\t' read -r repo npub relationship details || [[ -n "$repo" ]]; do | ||
| 249 | # Skip header and comments | ||
| 250 | [[ "$repo" == "repo" ]] && continue | ||
| 251 | [[ "$repo" =~ ^# ]] && continue | ||
| 252 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 253 | GIT_ANCESTRY["$repo|$npub"]="$relationship" | ||
| 254 | GIT_ANCESTRY_COUNT=$((GIT_ANCESTRY_COUNT + 1)) | ||
| 255 | done < "$COMPARISON_DIR/git-ancestry.tsv" | ||
| 256 | log_info "Loaded $GIT_ANCESTRY_COUNT git ancestry entries" | ||
| 257 | else | ||
| 258 | log_warn "No git-ancestry.tsv found - will not check if archive is ahead of prod" | ||
| 259 | log_warn "Run 22-compare-git-data.sh to enable archive-ahead detection" | ||
| 260 | fi | ||
| 261 | |||
| 234 | # ============================================================================ | 262 | # ============================================================================ |
| 235 | # Phase 2: Build unique repo list from all sources | 263 | # Phase 2: Build unique repo list from all sources |
| 236 | # ============================================================================ | 264 | # ============================================================================ |
| @@ -263,12 +291,14 @@ COUNTS[ready_deleted]=0 | |||
| 263 | COUNTS[ready_empty_prod]=0 | 291 | COUNTS[ready_empty_prod]=0 |
| 264 | COUNTS[ready_archive_only]=0 | 292 | COUNTS[ready_archive_only]=0 |
| 265 | COUNTS[ready_not_in_prod]=0 | 293 | COUNTS[ready_not_in_prod]=0 |
| 294 | COUNTS[ready_archive_ahead]=0 | ||
| 266 | COUNTS[resync_missing_archive]=0 | 295 | COUNTS[resync_missing_archive]=0 |
| 267 | COUNTS[resync_incomplete_archive]=0 | 296 | COUNTS[resync_incomplete_archive]=0 |
| 268 | COUNTS[review_partial_prod]=0 | 297 | COUNTS[review_partial_prod]=0 |
| 269 | COUNTS[review_nomatch_prod]=0 | 298 | COUNTS[review_nomatch_prod]=0 |
| 270 | COUNTS[review_parse_failure]=0 | 299 | COUNTS[review_parse_failure]=0 |
| 271 | COUNTS[review_conflicting]=0 | 300 | COUNTS[review_conflicting]=0 |
| 301 | COUNTS[review_diverged]=0 | ||
| 272 | 302 | ||
| 273 | # Output arrays | 303 | # Output arrays |
| 274 | declare -a READY_LINES | 304 | declare -a READY_LINES |
| @@ -381,14 +411,48 @@ for key in "${!ALL_REPOS[@]}"; do | |||
| 381 | REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in prod with parse failure") | 411 | REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in prod with parse failure") |
| 382 | COUNTS[review_parse_failure]=$((COUNTS[review_parse_failure] + 1)) | 412 | COUNTS[review_parse_failure]=$((COUNTS[review_parse_failure] + 1)) |
| 383 | else | 413 | else |
| 384 | # Needs resync - include purgatory context | 414 | # Check git ancestry to see if archive is actually ahead |
| 385 | context=$(get_context "$key" "$prod_status" "$archive_status") | 415 | local git_relationship="${GIT_ANCESTRY[$key]:-unknown}" |
| 386 | if [[ "$archive_cat" == "missing" ]]; then | 416 | |
| 387 | RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync to archive") | 417 | if [[ "$git_relationship" == "archive-ahead" || "$git_relationship" == "in-sync" ]]; then |
| 388 | COUNTS[resync_missing_archive]=$((COUNTS[resync_missing_archive] + 1)) | 418 | # Archive has newer/same git data - this is GOOD |
| 419 | # Archive's git data was authorized by a state event (GRASP enforced) | ||
| 420 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 421 | if [[ -n "$context" && "$context" != "none" ]]; then | ||
| 422 | context="$context, git=$git_relationship" | ||
| 423 | else | ||
| 424 | context="git=$git_relationship" | ||
| 425 | fi | ||
| 426 | READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive ahead (use archive data)") | ||
| 427 | COUNTS[ready_archive_ahead]=$((COUNTS[ready_archive_ahead] + 1)) | ||
| 428 | elif [[ "$git_relationship" == "diverged" ]]; then | ||
| 429 | # Git histories diverged - needs manual review | ||
| 430 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 431 | if [[ -n "$context" && "$context" != "none" ]]; then | ||
| 432 | context="$context, git=diverged" | ||
| 433 | else | ||
| 434 | context="git=diverged" | ||
| 435 | fi | ||
| 436 | REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | git histories diverged (manual review)") | ||
| 437 | COUNTS[review_diverged]=$((COUNTS[review_diverged] + 1)) | ||
| 389 | else | 438 | else |
| 390 | RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync (archive incomplete)") | 439 | # prod-ahead, archive-only, prod-only, both-empty, or unknown |
| 391 | COUNTS[resync_incomplete_archive]=$((COUNTS[resync_incomplete_archive] + 1)) | 440 | # These need resync - include purgatory context |
| 441 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 442 | if [[ "$git_relationship" != "unknown" ]]; then | ||
| 443 | if [[ -n "$context" && "$context" != "none" ]]; then | ||
| 444 | context="$context, git=$git_relationship" | ||
| 445 | else | ||
| 446 | context="git=$git_relationship" | ||
| 447 | fi | ||
| 448 | fi | ||
| 449 | if [[ "$archive_cat" == "missing" ]]; then | ||
| 450 | RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync to archive") | ||
| 451 | COUNTS[resync_missing_archive]=$((COUNTS[resync_missing_archive] + 1)) | ||
| 452 | else | ||
| 453 | RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync (archive incomplete)") | ||
| 454 | COUNTS[resync_incomplete_archive]=$((COUNTS[resync_incomplete_archive] + 1)) | ||
| 455 | fi | ||
| 392 | fi | 456 | fi |
| 393 | fi | 457 | fi |
| 394 | fi | 458 | fi |
| @@ -498,6 +562,7 @@ fi | |||
| 498 | echo "| Reason | Count |" | 562 | echo "| Reason | Count |" |
| 499 | echo "|--------|-------|" | 563 | echo "|--------|-------|" |
| 500 | echo "| complete in both prod and archive | ${COUNTS[ready_complete_both]} |" | 564 | echo "| complete in both prod and archive | ${COUNTS[ready_complete_both]} |" |
| 565 | echo "| archive ahead (has newer git data) | ${COUNTS[ready_archive_ahead]} |" | ||
| 501 | echo "| deleted by user | ${COUNTS[ready_deleted]} |" | 566 | echo "| deleted by user | ${COUNTS[ready_deleted]} |" |
| 502 | echo "| empty in prod (user never pushed) | ${COUNTS[ready_empty_prod]} |" | 567 | echo "| empty in prod (user never pushed) | ${COUNTS[ready_empty_prod]} |" |
| 503 | echo "| archive-only (not in prod) | ${COUNTS[ready_archive_only]} |" | 568 | echo "| archive-only (not in prod) | ${COUNTS[ready_archive_only]} |" |
| @@ -527,6 +592,7 @@ fi | |||
| 527 | echo "| partial in prod (cat3) | ${COUNTS[review_partial_prod]} |" | 592 | echo "| partial in prod (cat3) | ${COUNTS[review_partial_prod]} |" |
| 528 | echo "| no-match in prod (cat4) | ${COUNTS[review_nomatch_prod]} |" | 593 | echo "| no-match in prod (cat4) | ${COUNTS[review_nomatch_prod]} |" |
| 529 | echo "| complete in prod with parse failure | ${COUNTS[review_parse_failure]} |" | 594 | echo "| complete in prod with parse failure | ${COUNTS[review_parse_failure]} |" |
| 595 | echo "| git histories diverged | ${COUNTS[review_diverged]} |" | ||
| 530 | echo "" | 596 | echo "" |
| 531 | echo "## Input Data Summary" | 597 | echo "## Input Data Summary" |
| 532 | echo "" | 598 | echo "" |
| @@ -571,6 +637,7 @@ echo "" | |||
| 571 | echo "=== Summary ===" | 637 | echo "=== Summary ===" |
| 572 | echo "Ready for Migration: $TOTAL_READY ($PCT_READY%)" | 638 | echo "Ready for Migration: $TOTAL_READY ($PCT_READY%)" |
| 573 | echo " - Complete in both: ${COUNTS[ready_complete_both]}" | 639 | echo " - Complete in both: ${COUNTS[ready_complete_both]}" |
| 640 | echo " - Archive ahead: ${COUNTS[ready_archive_ahead]}" | ||
| 574 | echo " - Deleted by user: ${COUNTS[ready_deleted]}" | 641 | echo " - Deleted by user: ${COUNTS[ready_deleted]}" |
| 575 | echo " - Empty in prod: ${COUNTS[ready_empty_prod]}" | 642 | echo " - Empty in prod: ${COUNTS[ready_empty_prod]}" |
| 576 | echo " - Archive-only: ${COUNTS[ready_archive_only]}" | 643 | echo " - Archive-only: ${COUNTS[ready_archive_only]}" |
| @@ -584,6 +651,7 @@ echo "Manual Review: $TOTAL_REVIEW ($PCT_REVIEW%)" | |||
| 584 | echo " - Partial in prod: ${COUNTS[review_partial_prod]}" | 651 | echo " - Partial in prod: ${COUNTS[review_partial_prod]}" |
| 585 | echo " - No-match in prod: ${COUNTS[review_nomatch_prod]}" | 652 | echo " - No-match in prod: ${COUNTS[review_nomatch_prod]}" |
| 586 | echo " - Parse failures: ${COUNTS[review_parse_failure]}" | 653 | echo " - Parse failures: ${COUNTS[review_parse_failure]}" |
| 654 | echo " - Git diverged: ${COUNTS[review_diverged]}" | ||
| 587 | echo "" | 655 | echo "" |
| 588 | echo "Total: $TOTAL repos" | 656 | echo "Total: $TOTAL repos" |
| 589 | echo "" | 657 | echo "" |