upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/docs/how-to/migration-scripts/40-classify-actions.sh
diff options
context:
space:
mode:
authorDanConwayDev <DanConwayDev@protonmail.com>2026-01-28 14:17:30 +0000
committerDanConwayDev <DanConwayDev@protonmail.com>2026-01-28 14:17:30 +0000
commit3c1eda5fc9e660d40cadcdef8903aea986fe3242 (patch)
treec11f81ca30069f4deca24de7c9c47368733ab7b8 /docs/how-to/migration-scripts/40-classify-actions.sh
parentefc3da477d4edb9d1334718e3e20d197ba711468 (diff)
feat(migration): detect when archive git data is ahead of prod
Add git ancestry comparison (22-compare-git-data.sh) to determine commit relationships between prod and archive repos. Repos where archive is ahead are now correctly classified as ready-for-migration since ngit-grasp only accepts git data authorized by state events. Previously, repos with different git data were flagged as needs-resync even when archive had newer/better data than prod.
Diffstat (limited to 'docs/how-to/migration-scripts/40-classify-actions.sh')
-rwxr-xr-xdocs/how-to/migration-scripts/40-classify-actions.sh84
1 files changed, 76 insertions, 8 deletions
diff --git a/docs/how-to/migration-scripts/40-classify-actions.sh b/docs/how-to/migration-scripts/40-classify-actions.sh
index b1348f8..07ae7c9 100755
--- a/docs/how-to/migration-scripts/40-classify-actions.sh
+++ b/docs/how-to/migration-scripts/40-classify-actions.sh
@@ -10,16 +10,25 @@
10# - Empty in prod (prod=cat2, any archive status) 10# - Empty in prod (prod=cat2, any archive status)
11# - Archive-only (archive=any, prod=missing) 11# - Archive-only (archive=any, prod=missing)
12# - Not in prod (purgatory-only, prod=missing) 12# - Not in prod (purgatory-only, prod=missing)
13# - Archive ahead (archive has newer git data than prod - GRASP enforced)
13# 14#
14# Tier 2: Action Required (needs-resync.txt) 15# Tier 2: Action Required (needs-resync.txt)
15# - Complete in prod, missing from archive (with purgatory context) 16# - Complete in prod, missing from archive (with purgatory context)
16# - Complete in prod, incomplete in archive (with purgatory context) 17# - Complete in prod, incomplete in archive AND prod is ahead (with purgatory context)
17# 18#
18# Tier 3: Manual Investigation (manual-review.txt) 19# Tier 3: Manual Investigation (manual-review.txt)
19# - Partial in prod (prod=cat3) 20# - Partial in prod (prod=cat3)
20# - No-match in prod (prod=cat4) 21# - No-match in prod (prod=cat4)
21# - Parse failures 22# - Parse failures
22# - Conflicting states 23# - Conflicting states
24# - Diverged git history (both have unique commits)
25#
26# KEY INSIGHT:
27# Archive (ngit-grasp) enforces GRASP - git data ALWAYS matches a state event.
28# If archive has different/newer data than prod, it means:
29# - A state event authorized those commits at some point
30# - Archive is actually MORE up-to-date than prod
31# - Migration should use archive data (it's already correct)
23# 32#
24# Usage: ./40-classify-actions.sh <analysis-dir> 33# Usage: ./40-classify-actions.sh <analysis-dir>
25# 34#
@@ -231,6 +240,25 @@ DELETED_COUNT=0
231[[ ${#DELETED[@]} -gt 0 ]] && DELETED_COUNT=${#DELETED[@]} 240[[ ${#DELETED[@]} -gt 0 ]] && DELETED_COUNT=${#DELETED[@]}
232log_info "Loaded $DELETED_COUNT deletion entries" 241log_info "Loaded $DELETED_COUNT deletion entries"
233 242
243# Build git ancestry lookup: repo|npub -> relationship (archive-ahead, prod-ahead, diverged, etc.)
244# This data comes from 22-compare-git-data.sh which compares actual git commits
245declare -A GIT_ANCESTRY
246GIT_ANCESTRY_COUNT=0
247if [[ -f "$COMPARISON_DIR/git-ancestry.tsv" ]]; then
248 while IFS=$'\t' read -r repo npub relationship details || [[ -n "$repo" ]]; do
249 # Skip header and comments
250 [[ "$repo" == "repo" ]] && continue
251 [[ "$repo" =~ ^# ]] && continue
252 [[ -z "$repo" || -z "$npub" ]] && continue
253 GIT_ANCESTRY["$repo|$npub"]="$relationship"
254 GIT_ANCESTRY_COUNT=$((GIT_ANCESTRY_COUNT + 1))
255 done < "$COMPARISON_DIR/git-ancestry.tsv"
256 log_info "Loaded $GIT_ANCESTRY_COUNT git ancestry entries"
257else
258 log_warn "No git-ancestry.tsv found - will not check if archive is ahead of prod"
259 log_warn "Run 22-compare-git-data.sh to enable archive-ahead detection"
260fi
261
234# ============================================================================ 262# ============================================================================
235# Phase 2: Build unique repo list from all sources 263# Phase 2: Build unique repo list from all sources
236# ============================================================================ 264# ============================================================================
@@ -263,12 +291,14 @@ COUNTS[ready_deleted]=0
263COUNTS[ready_empty_prod]=0 291COUNTS[ready_empty_prod]=0
264COUNTS[ready_archive_only]=0 292COUNTS[ready_archive_only]=0
265COUNTS[ready_not_in_prod]=0 293COUNTS[ready_not_in_prod]=0
294COUNTS[ready_archive_ahead]=0
266COUNTS[resync_missing_archive]=0 295COUNTS[resync_missing_archive]=0
267COUNTS[resync_incomplete_archive]=0 296COUNTS[resync_incomplete_archive]=0
268COUNTS[review_partial_prod]=0 297COUNTS[review_partial_prod]=0
269COUNTS[review_nomatch_prod]=0 298COUNTS[review_nomatch_prod]=0
270COUNTS[review_parse_failure]=0 299COUNTS[review_parse_failure]=0
271COUNTS[review_conflicting]=0 300COUNTS[review_conflicting]=0
301COUNTS[review_diverged]=0
272 302
273# Output arrays 303# Output arrays
274declare -a READY_LINES 304declare -a READY_LINES
@@ -381,14 +411,48 @@ for key in "${!ALL_REPOS[@]}"; do
381 REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in prod with parse failure") 411 REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in prod with parse failure")
382 COUNTS[review_parse_failure]=$((COUNTS[review_parse_failure] + 1)) 412 COUNTS[review_parse_failure]=$((COUNTS[review_parse_failure] + 1))
383 else 413 else
384 # Needs resync - include purgatory context 414 # Check git ancestry to see if archive is actually ahead
385 context=$(get_context "$key" "$prod_status" "$archive_status") 415 local git_relationship="${GIT_ANCESTRY[$key]:-unknown}"
386 if [[ "$archive_cat" == "missing" ]]; then 416
387 RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync to archive") 417 if [[ "$git_relationship" == "archive-ahead" || "$git_relationship" == "in-sync" ]]; then
388 COUNTS[resync_missing_archive]=$((COUNTS[resync_missing_archive] + 1)) 418 # Archive has newer/same git data - this is GOOD
419 # Archive's git data was authorized by a state event (GRASP enforced)
420 context=$(get_context "$key" "$prod_status" "$archive_status")
421 if [[ -n "$context" && "$context" != "none" ]]; then
422 context="$context, git=$git_relationship"
423 else
424 context="git=$git_relationship"
425 fi
426 READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive ahead (use archive data)")
427 COUNTS[ready_archive_ahead]=$((COUNTS[ready_archive_ahead] + 1))
428 elif [[ "$git_relationship" == "diverged" ]]; then
429 # Git histories diverged - needs manual review
430 context=$(get_context "$key" "$prod_status" "$archive_status")
431 if [[ -n "$context" && "$context" != "none" ]]; then
432 context="$context, git=diverged"
433 else
434 context="git=diverged"
435 fi
436 REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | git histories diverged (manual review)")
437 COUNTS[review_diverged]=$((COUNTS[review_diverged] + 1))
389 else 438 else
390 RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync (archive incomplete)") 439 # prod-ahead, archive-only, prod-only, both-empty, or unknown
391 COUNTS[resync_incomplete_archive]=$((COUNTS[resync_incomplete_archive] + 1)) 440 # These need resync - include purgatory context
441 context=$(get_context "$key" "$prod_status" "$archive_status")
442 if [[ "$git_relationship" != "unknown" ]]; then
443 if [[ -n "$context" && "$context" != "none" ]]; then
444 context="$context, git=$git_relationship"
445 else
446 context="git=$git_relationship"
447 fi
448 fi
449 if [[ "$archive_cat" == "missing" ]]; then
450 RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync to archive")
451 COUNTS[resync_missing_archive]=$((COUNTS[resync_missing_archive] + 1))
452 else
453 RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync (archive incomplete)")
454 COUNTS[resync_incomplete_archive]=$((COUNTS[resync_incomplete_archive] + 1))
455 fi
392 fi 456 fi
393 fi 457 fi
394 fi 458 fi
@@ -498,6 +562,7 @@ fi
498 echo "| Reason | Count |" 562 echo "| Reason | Count |"
499 echo "|--------|-------|" 563 echo "|--------|-------|"
500 echo "| complete in both prod and archive | ${COUNTS[ready_complete_both]} |" 564 echo "| complete in both prod and archive | ${COUNTS[ready_complete_both]} |"
565 echo "| archive ahead (has newer git data) | ${COUNTS[ready_archive_ahead]} |"
501 echo "| deleted by user | ${COUNTS[ready_deleted]} |" 566 echo "| deleted by user | ${COUNTS[ready_deleted]} |"
502 echo "| empty in prod (user never pushed) | ${COUNTS[ready_empty_prod]} |" 567 echo "| empty in prod (user never pushed) | ${COUNTS[ready_empty_prod]} |"
503 echo "| archive-only (not in prod) | ${COUNTS[ready_archive_only]} |" 568 echo "| archive-only (not in prod) | ${COUNTS[ready_archive_only]} |"
@@ -527,6 +592,7 @@ fi
527 echo "| partial in prod (cat3) | ${COUNTS[review_partial_prod]} |" 592 echo "| partial in prod (cat3) | ${COUNTS[review_partial_prod]} |"
528 echo "| no-match in prod (cat4) | ${COUNTS[review_nomatch_prod]} |" 593 echo "| no-match in prod (cat4) | ${COUNTS[review_nomatch_prod]} |"
529 echo "| complete in prod with parse failure | ${COUNTS[review_parse_failure]} |" 594 echo "| complete in prod with parse failure | ${COUNTS[review_parse_failure]} |"
595 echo "| git histories diverged | ${COUNTS[review_diverged]} |"
530 echo "" 596 echo ""
531 echo "## Input Data Summary" 597 echo "## Input Data Summary"
532 echo "" 598 echo ""
@@ -571,6 +637,7 @@ echo ""
571echo "=== Summary ===" 637echo "=== Summary ==="
572echo "Ready for Migration: $TOTAL_READY ($PCT_READY%)" 638echo "Ready for Migration: $TOTAL_READY ($PCT_READY%)"
573echo " - Complete in both: ${COUNTS[ready_complete_both]}" 639echo " - Complete in both: ${COUNTS[ready_complete_both]}"
640echo " - Archive ahead: ${COUNTS[ready_archive_ahead]}"
574echo " - Deleted by user: ${COUNTS[ready_deleted]}" 641echo " - Deleted by user: ${COUNTS[ready_deleted]}"
575echo " - Empty in prod: ${COUNTS[ready_empty_prod]}" 642echo " - Empty in prod: ${COUNTS[ready_empty_prod]}"
576echo " - Archive-only: ${COUNTS[ready_archive_only]}" 643echo " - Archive-only: ${COUNTS[ready_archive_only]}"
@@ -584,6 +651,7 @@ echo "Manual Review: $TOTAL_REVIEW ($PCT_REVIEW%)"
584echo " - Partial in prod: ${COUNTS[review_partial_prod]}" 651echo " - Partial in prod: ${COUNTS[review_partial_prod]}"
585echo " - No-match in prod: ${COUNTS[review_nomatch_prod]}" 652echo " - No-match in prod: ${COUNTS[review_nomatch_prod]}"
586echo " - Parse failures: ${COUNTS[review_parse_failure]}" 653echo " - Parse failures: ${COUNTS[review_parse_failure]}"
654echo " - Git diverged: ${COUNTS[review_diverged]}"
587echo "" 655echo ""
588echo "Total: $TOTAL repos" 656echo "Total: $TOTAL repos"
589echo "" 657echo ""