upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/docs/archive/2026-01-relay-ngit-dev-migration/scripts/40-classify-actions.sh
diff options
context:
space:
mode:
authorDanConwayDev <DanConwayDev@protonmail.com>2026-02-23 15:20:59 +0000
committerDanConwayDev <DanConwayDev@protonmail.com>2026-02-23 15:20:59 +0000
commit113928aa84894ea8f65c247d9987527e792b32a9 (patch)
treeec967d6195d9f7ec4f061449596611afe3a0950f /docs/archive/2026-01-relay-ngit-dev-migration/scripts/40-classify-actions.sh
parent26f608e5011b9d1ad6036da75b89272835e69695 (diff)
parente0ad39a489b3398f8208713bf728db0cb11475b0 (diff)
Merge master into 3ca0-announcements-purgatory
Diffstat (limited to 'docs/archive/2026-01-relay-ngit-dev-migration/scripts/40-classify-actions.sh')
-rwxr-xr-xdocs/archive/2026-01-relay-ngit-dev-migration/scripts/40-classify-actions.sh662
1 files changed, 662 insertions, 0 deletions
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/40-classify-actions.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/40-classify-actions.sh
new file mode 100755
index 0000000..8b61636
--- /dev/null
+++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/40-classify-actions.sh
@@ -0,0 +1,662 @@
1#!/usr/bin/env bash
2#
3# 40-classify-actions.sh - Classify repos by migration action required
4#
5# Implements the redesigned classification system (Option B) with user feedback:
6#
7# Tier 1: No Action Required (ready-for-migration.txt)
8# - Complete in both (prod=cat1, archive=cat1)
9# - Deleted by user (kind 5 event)
10# - Empty in prod (prod=cat2, any archive status)
11# - Archive-only (archive=any, prod=missing)
12# - Not in prod (purgatory-only, prod=missing)
13# - Archive ahead (archive has newer git data than prod - GRASP enforced)
14#
15# Tier 2: Action Required (needs-resync.txt)
16# - Complete in prod, missing from archive (with purgatory context)
17# - Complete in prod, incomplete in archive AND prod is ahead (with purgatory context)
18#
19# Tier 3: Manual Investigation (manual-review.txt)
20# - Partial in prod (prod=cat3)
21# - No-match in prod (prod=cat4)
22# - Parse failures
23# - Conflicting states
24# - Diverged git history (both have unique commits)
25#
26# KEY INSIGHT:
27# Archive (ngit-grasp) enforces GRASP - git data ALWAYS matches a state event.
28# If archive has different/newer data than prod, it means:
29# - A state event authorized those commits at some point
30# - Archive is actually MORE up-to-date than prod
31# - Migration should use archive data (it's already correct)
32#
33# Usage: ./40-classify-actions.sh <analysis-dir>
34#
35# Output format: repo | npub | prod_status | archive_status | context | action
36#
37
38set -euo pipefail
39
40# Colors for output
41RED='\033[0;31m'
42GREEN='\033[0;32m'
43YELLOW='\033[1;33m'
44BLUE='\033[0;34m'
45NC='\033[0m' # No Color
46
47log_info() { echo -e "${BLUE}[INFO]${NC} $*"; }
48log_success() { echo -e "${GREEN}[OK]${NC} $*"; }
49log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
50log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; }
51
52# Check arguments
53if [[ $# -lt 1 ]]; then
54 echo "Usage: $0 <analysis-dir>"
55 echo "Example: $0 work/migration-analysis-20260123-200701"
56 exit 1
57fi
58
59ANALYSIS_DIR="$1"
60
61# Validate analysis directory
62if [[ ! -d "$ANALYSIS_DIR" ]]; then
63 log_error "Analysis directory not found: $ANALYSIS_DIR"
64 exit 1
65fi
66
67# Define paths
68PROD_DIR="$ANALYSIS_DIR/prod"
69ARCHIVE_DIR="$ANALYSIS_DIR/archive"
70COMPARISON_DIR="$ANALYSIS_DIR/comparison"
71LOGS_DIR="$ANALYSIS_DIR/logs"
72RESULTS_DIR="$ANALYSIS_DIR/results"
73
74# Validate required directories
75for dir in "$PROD_DIR" "$ARCHIVE_DIR" "$COMPARISON_DIR" "$LOGS_DIR"; do
76 if [[ ! -d "$dir" ]]; then
77 log_error "Required directory not found: $dir"
78 exit 1
79 fi
80done
81
82# Create results directory
83mkdir -p "$RESULTS_DIR"
84
85# Output files
86READY_FILE="$RESULTS_DIR/ready-for-migration.txt"
87RESYNC_FILE="$RESULTS_DIR/needs-resync.txt"
88REVIEW_FILE="$RESULTS_DIR/manual-review.txt"
89SUMMARY_FILE="$RESULTS_DIR/summary.txt"
90
91# Temporary files for processing
92TMP_DIR=$(mktemp -d)
93trap 'rm -rf "$TMP_DIR"' EXIT
94
95log_info "Starting classification with revised system (Option B)"
96log_info "Analysis directory: $ANALYSIS_DIR"
97
98# ============================================================================
99# Phase 1: Build lookup tables from source data
100# ============================================================================
101
102log_info "Building lookup tables..."
103
104# Build prod category lookup: repo|npub -> category
105declare -A PROD_CAT
106while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do
107 repo="${repo// /}" # Remove all spaces
108 npub="${npub// /}" # Remove all spaces
109 [[ -z "$repo" || -z "$npub" ]] && continue
110 PROD_CAT["$repo|$npub"]="cat1"
111done < "$PROD_DIR/category1-complete-match.txt"
112
113while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do
114 repo="${repo// /}"
115 npub="${npub// /}"
116 [[ -z "$repo" || -z "$npub" ]] && continue
117 PROD_CAT["$repo|$npub"]="cat2"
118done < "$PROD_DIR/category2-empty-blank.txt"
119
120while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do
121 repo="${repo// /}"
122 npub="${npub// /}"
123 [[ -z "$repo" || -z "$npub" ]] && continue
124 PROD_CAT["$repo|$npub"]="cat3"
125done < "$PROD_DIR/category3-partial-match.txt"
126
127while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do
128 repo="${repo// /}"
129 npub="${npub// /}"
130 [[ -z "$repo" || -z "$npub" ]] && continue
131 PROD_CAT["$repo|$npub"]="cat4"
132done < "$PROD_DIR/category4-no-match.txt"
133
134log_info "Loaded ${#PROD_CAT[@]} prod entries"
135
136# Build archive category lookup: repo|npub -> category
137declare -A ARCHIVE_CAT
138while IFS='|' read -r repo npub rest; do
139 repo="${repo// /}"
140 npub="${npub// /}"
141 [[ -z "$repo" || -z "$npub" ]] && continue
142 ARCHIVE_CAT["$repo|$npub"]="cat1"
143done < "$ARCHIVE_DIR/category1-complete-match.txt"
144
145while IFS='|' read -r repo npub rest; do
146 repo="${repo// /}"
147 npub="${npub// /}"
148 [[ -z "$repo" || -z "$npub" ]] && continue
149 ARCHIVE_CAT["$repo|$npub"]="cat2"
150done < "$ARCHIVE_DIR/category2-empty-blank.txt"
151
152while IFS='|' read -r repo npub rest; do
153 repo="${repo// /}"
154 npub="${npub// /}"
155 [[ -z "$repo" || -z "$npub" ]] && continue
156 ARCHIVE_CAT["$repo|$npub"]="cat3"
157done < "$ARCHIVE_DIR/category3-partial-match.txt"
158
159while IFS='|' read -r repo npub rest; do
160 repo="${repo// /}"
161 npub="${npub// /}"
162 [[ -z "$repo" || -z "$npub" ]] && continue
163 ARCHIVE_CAT["$repo|$npub"]="cat4"
164done < "$ARCHIVE_DIR/category4-no-match.txt"
165
166log_info "Loaded ${#ARCHIVE_CAT[@]} archive entries"
167
168# Build purgatory lookup: repo|npub -> 1 (if purgatory expired)
169declare -A PURGATORY
170PURGATORY_COUNT=0
171if [[ -f "$LOGS_DIR/purgatory-expired.txt" ]]; then
172 while IFS=$'\t' read -r repo npub timestamp reason || [[ -n "$repo" ]]; do
173 # Skip comments and empty lines
174 [[ "$repo" =~ ^# ]] && continue
175 [[ -z "$repo" || -z "$npub" ]] && continue
176 PURGATORY["$repo|$npub"]=1
177 PURGATORY_COUNT=$((PURGATORY_COUNT + 1))
178 done < "$LOGS_DIR/purgatory-expired.txt"
179fi
180log_info "Loaded $PURGATORY_COUNT purgatory entries"
181
182# Build parse failure lookup: repo|npub -> 1 (if parse failure logged)
183# Parse failures file format: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub
184declare -A PARSE_FAIL
185PARSE_FAIL_COUNT=0
186if [[ -f "$LOGS_DIR/parse-failures.txt" ]]; then
187 while IFS=$'\t' read -r event_id kind reason repo npub || [[ -n "$event_id" ]]; do
188 # Skip comments and empty lines
189 [[ "$event_id" =~ ^# ]] && continue
190 [[ -z "$repo" || -z "$npub" ]] && continue
191 PARSE_FAIL["$repo|$npub"]=1
192 PARSE_FAIL_COUNT=$((PARSE_FAIL_COUNT + 1))
193 done < "$LOGS_DIR/parse-failures.txt"
194fi
195log_info "Loaded $PARSE_FAIL_COUNT parse failure entries"
196
197# Build deletion lookup: repo|npub -> 1 (if kind 5 deletion event)
198# Deletions are in NDJSON format with "a" tags like "30617:pubkey_hex:repo"
199# We need to convert hex pubkeys to npub format using nak
200declare -A DELETED
201
202# Helper function to process deletion file (NDJSON format)
203# Extracts unique pubkey_hex:repo pairs and converts to npub
204process_deletions() {
205 local file="$1"
206 [[ ! -f "$file" ]] && return
207
208 # Extract unique pubkey_hex|repo pairs from NDJSON
209 # Each line is a JSON object, extract "a" tags
210 local pairs
211 pairs=$(jq -r '.tags[] | select(.[0] == "a") | .[1]' "$file" 2>/dev/null | \
212 sed 's/^30617://' | awk -F: '{print $1 "|" $2}' | sort -u)
213
214 # Get unique hex pubkeys for batch conversion
215 local hex_keys
216 hex_keys=$(echo "$pairs" | cut -d'|' -f1 | sort -u)
217
218 # Build hex->npub lookup via batch nak call
219 declare -A HEX_TO_NPUB
220 while read -r hex; do
221 [[ -z "$hex" ]] && continue
222 local npub
223 npub=$(nak encode npub "$hex" 2>/dev/null || echo "")
224 [[ -n "$npub" ]] && HEX_TO_NPUB["$hex"]="$npub"
225 done <<< "$hex_keys"
226
227 # Now process pairs with cached npub values
228 while IFS='|' read -r pubkey_hex repo; do
229 [[ -z "$repo" || -z "$pubkey_hex" ]] && continue
230 local npub="${HEX_TO_NPUB[$pubkey_hex]:-}"
231 [[ -z "$npub" ]] && continue
232 DELETED["$repo|$npub"]=1
233 done <<< "$pairs"
234}
235
236# Process prod and archive deletions
237process_deletions "$PROD_DIR/raw/deletions.json"
238process_deletions "$ARCHIVE_DIR/raw/deletions.json"
239DELETED_COUNT=0
240[[ ${#DELETED[@]} -gt 0 ]] && DELETED_COUNT=${#DELETED[@]}
241log_info "Loaded $DELETED_COUNT deletion entries"
242
243# Build git ancestry lookup: repo|npub -> relationship (archive-ahead, prod-ahead, diverged, etc.)
244# This data comes from 22-compare-git-data.sh which compares actual git commits
245declare -A GIT_ANCESTRY
246GIT_ANCESTRY_COUNT=0
247if [[ -f "$COMPARISON_DIR/git-ancestry.tsv" ]]; then
248 while IFS=$'\t' read -r repo npub relationship details || [[ -n "$repo" ]]; do
249 # Skip header and comments
250 [[ "$repo" == "repo" ]] && continue
251 [[ "$repo" =~ ^# ]] && continue
252 [[ -z "$repo" || -z "$npub" ]] && continue
253 GIT_ANCESTRY["$repo|$npub"]="$relationship"
254 GIT_ANCESTRY_COUNT=$((GIT_ANCESTRY_COUNT + 1))
255 done < "$COMPARISON_DIR/git-ancestry.tsv"
256 log_info "Loaded $GIT_ANCESTRY_COUNT git ancestry entries"
257else
258 log_warn "No git-ancestry.tsv found - will not check if archive is ahead of prod"
259 log_warn "Run 22-compare-git-data.sh to enable archive-ahead detection"
260fi
261
262# ============================================================================
263# Phase 2: Build unique repo list from all sources
264# ============================================================================
265
266log_info "Building unique repo list..."
267
268declare -A ALL_REPOS
269for key in "${!PROD_CAT[@]}"; do
270 ALL_REPOS["$key"]=1
271done
272for key in "${!ARCHIVE_CAT[@]}"; do
273 ALL_REPOS["$key"]=1
274done
275for key in "${!PURGATORY[@]}"; do
276 ALL_REPOS["$key"]=1
277done
278
279log_info "Total unique repos: ${#ALL_REPOS[@]}"
280
281# ============================================================================
282# Phase 3: Classify each repo according to revised decision tree
283# ============================================================================
284
285log_info "Classifying repos..."
286
287# Counters for summary
288declare -A COUNTS
289COUNTS[ready_complete_both]=0
290COUNTS[ready_deleted]=0
291COUNTS[ready_empty_prod]=0
292COUNTS[ready_archive_only]=0
293COUNTS[ready_not_in_prod]=0
294COUNTS[ready_archive_ahead]=0
295COUNTS[resync_missing_archive]=0
296COUNTS[resync_incomplete_archive]=0
297COUNTS[review_partial_prod]=0
298COUNTS[review_nomatch_prod]=0
299COUNTS[review_parse_failure]=0
300COUNTS[review_conflicting]=0
301COUNTS[review_diverged]=0
302
303# Output arrays
304declare -a READY_LINES
305declare -a RESYNC_LINES
306declare -a REVIEW_LINES
307
308# Helper function to get context string
309get_context() {
310 local key="$1"
311 local prod_status="$2"
312 local archive_status="$3"
313 local context=""
314
315 # Check purgatory
316 if [[ -n "${PURGATORY[$key]:-}" ]]; then
317 context="purgatory-expired"
318 fi
319
320 # Check parse failure
321 if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then
322 if [[ -n "$context" ]]; then
323 context="$context, parse-failure"
324 else
325 context="parse-failure"
326 fi
327 fi
328
329 # Add archive context for unexpected states
330 if [[ "$prod_status" == "empty" && "$archive_status" != "missing" && "$archive_status" != "empty" ]]; then
331 if [[ -n "$context" ]]; then
332 context="$context, archive-has-data"
333 else
334 context="archive-has-data"
335 fi
336 fi
337
338 echo "${context:-none}"
339}
340
341# Helper to convert category to human-readable status
342cat_to_status() {
343 case "$1" in
344 cat1) echo "complete" ;;
345 cat2) echo "empty" ;;
346 cat3) echo "partial" ;;
347 cat4) echo "no-match" ;;
348 missing) echo "missing" ;;
349 *) echo "$1" ;;
350 esac
351}
352
353LOOP_COUNT=0
354for key in "${!ALL_REPOS[@]}"; do
355 LOOP_COUNT=$((LOOP_COUNT + 1))
356 [[ $((LOOP_COUNT % 100)) -eq 0 ]] && log_info "Processed $LOOP_COUNT repos..."
357 IFS='|' read -r repo npub <<< "$key"
358
359 prod_cat="${PROD_CAT[$key]:-missing}"
360 archive_cat="${ARCHIVE_CAT[$key]:-missing}"
361 prod_status=$(cat_to_status "$prod_cat")
362 archive_status=$(cat_to_status "$archive_cat")
363
364 # Decision tree implementation
365
366 # 1. Is there a kind 5 deletion event?
367 if [[ -n "${DELETED[$key]:-}" ]]; then
368 context=$(get_context "$key" "$prod_status" "$archive_status")
369 READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | deleted by user")
370 COUNTS[ready_deleted]=$((COUNTS[ready_deleted] + 1))
371 continue
372 fi
373
374 # 2. What is the prod status?
375 case "$prod_cat" in
376 missing)
377 # Not in prod
378 if [[ "$archive_cat" != "missing" ]]; then
379 # In archive but not in prod -> no action (archive-only)
380 context=$(get_context "$key" "$prod_status" "$archive_status")
381 READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive-only (not in prod)")
382 COUNTS[ready_archive_only]=$((COUNTS[ready_archive_only] + 1))
383 elif [[ -n "${PURGATORY[$key]:-}" ]]; then
384 # Purgatory only, not in prod -> no action
385 context="purgatory-expired"
386 READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | purgatory-only (not in prod)")
387 COUNTS[ready_not_in_prod]=$((COUNTS[ready_not_in_prod] + 1))
388 fi
389 # Otherwise skip (not a real repo - no data anywhere)
390 ;;
391
392 cat2)
393 # Empty in prod -> ALWAYS no action required
394 context=$(get_context "$key" "$prod_status" "$archive_status")
395 READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | empty in prod (user never pushed)")
396 COUNTS[ready_empty_prod]=$((COUNTS[ready_empty_prod] + 1))
397 ;;
398
399 cat1)
400 # Complete in prod
401 if [[ "$archive_cat" == "cat1" ]]; then
402 # Complete in both -> no action
403 context=$(get_context "$key" "$prod_status" "$archive_status")
404 READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in both")
405 COUNTS[ready_complete_both]=$((COUNTS[ready_complete_both] + 1))
406 else
407 # Complete in prod, missing/incomplete in archive
408 # Check for parse failure - if so, needs manual review
409 if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then
410 context=$(get_context "$key" "$prod_status" "$archive_status")
411 REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in prod with parse failure")
412 COUNTS[review_parse_failure]=$((COUNTS[review_parse_failure] + 1))
413 else
414 # Check git ancestry to see if archive is actually ahead
415 git_relationship="${GIT_ANCESTRY[$key]:-unknown}"
416
417 if [[ "$git_relationship" == "archive-ahead" || "$git_relationship" == "in-sync" ]]; then
418 # Archive has newer/same git data - this is GOOD
419 # Archive's git data was authorized by a state event (GRASP enforced)
420 context=$(get_context "$key" "$prod_status" "$archive_status")
421 if [[ -n "$context" && "$context" != "none" ]]; then
422 context="$context, git=$git_relationship"
423 else
424 context="git=$git_relationship"
425 fi
426 READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive ahead (use archive data)")
427 COUNTS[ready_archive_ahead]=$((COUNTS[ready_archive_ahead] + 1))
428 elif [[ "$git_relationship" == "diverged" ]]; then
429 # Git histories diverged - needs manual review
430 context=$(get_context "$key" "$prod_status" "$archive_status")
431 if [[ -n "$context" && "$context" != "none" ]]; then
432 context="$context, git=diverged"
433 else
434 context="git=diverged"
435 fi
436 REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | git histories diverged (manual review)")
437 COUNTS[review_diverged]=$((COUNTS[review_diverged] + 1))
438 else
439 # prod-ahead, archive-only, prod-only, both-empty, or unknown
440 # These need resync - include purgatory context
441 context=$(get_context "$key" "$prod_status" "$archive_status")
442 if [[ "$git_relationship" != "unknown" ]]; then
443 if [[ -n "$context" && "$context" != "none" ]]; then
444 context="$context, git=$git_relationship"
445 else
446 context="git=$git_relationship"
447 fi
448 fi
449 if [[ "$archive_cat" == "missing" ]]; then
450 RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync to archive")
451 COUNTS[resync_missing_archive]=$((COUNTS[resync_missing_archive] + 1))
452 else
453 RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync (archive incomplete)")
454 COUNTS[resync_incomplete_archive]=$((COUNTS[resync_incomplete_archive] + 1))
455 fi
456 fi
457 fi
458 fi
459 ;;
460
461 cat3)
462 # Partial in prod -> ALWAYS manual investigation
463 context=$(get_context "$key" "$prod_status" "$archive_status")
464 REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | partial in prod (investigate git data)")
465 COUNTS[review_partial_prod]=$((COUNTS[review_partial_prod] + 1))
466 ;;
467
468 cat4)
469 # No-match in prod -> ALWAYS manual investigation
470 context=$(get_context "$key" "$prod_status" "$archive_status")
471 REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | no-match in prod (git corruption)")
472 COUNTS[review_nomatch_prod]=$((COUNTS[review_nomatch_prod] + 1))
473 ;;
474 esac
475done
476
477# ============================================================================
478# Phase 4: Write output files
479# ============================================================================
480
481log_info "Writing output files..."
482
483TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%S+00:00")
484
485# Write ready-for-migration.txt
486{
487 echo "# Ready for Migration - No action required"
488 echo "# Generated: $TIMESTAMP"
489 echo "# Format: repo | npub | prod_status | archive_status | context | reason"
490 echo "#"
491 for line in "${READY_LINES[@]}"; do
492 echo "$line"
493 done
494} > "$READY_FILE"
495
496# Write needs-resync.txt
497{
498 echo "# Needs Re-sync - Action required"
499 echo "# Generated: $TIMESTAMP"
500 echo "# Format: repo | npub | prod_status | archive_status | context | action"
501 echo "#"
502 echo "# Context meanings:"
503 echo "# purgatory-expired = archive tried to sync but failed (30min timeout)"
504 echo "# none = archive never tried or announcement missing"
505 echo "#"
506 for line in "${RESYNC_LINES[@]}"; do
507 echo "$line"
508 done
509} > "$RESYNC_FILE"
510
511# Write manual-review.txt
512{
513 echo "# Manual Review Required - Investigation needed"
514 echo "# Generated: $TIMESTAMP"
515 echo "# Format: repo | npub | prod_status | archive_status | context | reason"
516 echo "#"
517 for line in "${REVIEW_LINES[@]}"; do
518 echo "$line"
519 done
520} > "$REVIEW_FILE"
521
522# ============================================================================
523# Phase 5: Generate summary
524# ============================================================================
525
526log_info "Generating summary..."
527
528TOTAL_READY="${#READY_LINES[@]}"
529TOTAL_RESYNC="${#RESYNC_LINES[@]}"
530TOTAL_REVIEW="${#REVIEW_LINES[@]}"
531TOTAL=$((TOTAL_READY + TOTAL_RESYNC + TOTAL_REVIEW))
532
533# Calculate percentages
534if [[ $TOTAL -gt 0 ]]; then
535 PCT_READY=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_READY / $TOTAL) * 100}")
536 PCT_RESYNC=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_RESYNC / $TOTAL) * 100}")
537 PCT_REVIEW=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_REVIEW / $TOTAL) * 100}")
538else
539 PCT_READY="0.0"
540 PCT_RESYNC="0.0"
541 PCT_REVIEW="0.0"
542fi
543
544{
545 echo "# Migration Classification Summary"
546 echo "Generated: $TIMESTAMP"
547 echo "Analysis Directory: $ANALYSIS_DIR"
548 echo ""
549 echo "## Overview"
550 echo ""
551 echo "| Category | Count | Percentage |"
552 echo "|----------|-------|------------|"
553 echo "| Ready for Migration | $TOTAL_READY | $PCT_READY% |"
554 echo "| Needs Re-sync | $TOTAL_RESYNC | $PCT_RESYNC% |"
555 echo "| Manual Review | $TOTAL_REVIEW | $PCT_REVIEW% |"
556 echo "| **Total** | **$TOTAL** | **100%** |"
557 echo ""
558 echo "## Tier 1: Ready for Migration ($TOTAL_READY repos)"
559 echo ""
560 echo "These repositories are ready for migration or don't need migration:"
561 echo ""
562 echo "| Reason | Count |"
563 echo "|--------|-------|"
564 echo "| complete in both prod and archive | ${COUNTS[ready_complete_both]} |"
565 echo "| archive ahead (has newer git data) | ${COUNTS[ready_archive_ahead]} |"
566 echo "| deleted by user | ${COUNTS[ready_deleted]} |"
567 echo "| empty in prod (user never pushed) | ${COUNTS[ready_empty_prod]} |"
568 echo "| archive-only (not in prod) | ${COUNTS[ready_archive_only]} |"
569 echo "| purgatory-only (not in prod) | ${COUNTS[ready_not_in_prod]} |"
570 echo ""
571 echo "## Tier 2: Needs Re-sync ($TOTAL_RESYNC repos)"
572 echo ""
573 echo "These repositories need re-sync to archive before migration:"
574 echo ""
575 echo "| Reason | Count | Action |"
576 echo "|--------|-------|--------|"
577 echo "| complete in prod, missing from archive | ${COUNTS[resync_missing_archive]} | trigger re-sync |"
578 echo "| complete in prod, incomplete in archive | ${COUNTS[resync_incomplete_archive]} | trigger re-sync |"
579 echo ""
580 echo "### Purgatory Context"
581 echo ""
582 echo "Repos in needs-resync.txt include purgatory context:"
583 echo "- **purgatory-expired**: Archive tried to sync but failed (30min timeout)"
584 echo "- **none**: Archive never tried or announcement missing"
585 echo ""
586 echo "## Tier 3: Manual Review ($TOTAL_REVIEW repos)"
587 echo ""
588 echo "These repositories require human investigation:"
589 echo ""
590 echo "| Reason | Count |"
591 echo "|--------|-------|"
592 echo "| partial in prod (cat3) | ${COUNTS[review_partial_prod]} |"
593 echo "| no-match in prod (cat4) | ${COUNTS[review_nomatch_prod]} |"
594 echo "| complete in prod with parse failure | ${COUNTS[review_parse_failure]} |"
595 echo "| git histories diverged | ${COUNTS[review_diverged]} |"
596 echo ""
597 echo "## Input Data Summary"
598 echo ""
599 echo "### Prod Categories"
600 echo "- Category 1 (complete): $(wc -l < "$PROD_DIR/category1-complete-match.txt")"
601 echo "- Category 2 (empty): $(wc -l < "$PROD_DIR/category2-empty-blank.txt")"
602 echo "- Category 3 (partial): $(wc -l < "$PROD_DIR/category3-partial-match.txt")"
603 echo "- Category 4 (no match): $(wc -l < "$PROD_DIR/category4-no-match.txt")"
604 echo ""
605 echo "### Archive Categories"
606 echo "- Category 1 (complete): $(wc -l < "$ARCHIVE_DIR/category1-complete-match.txt")"
607 echo "- Category 2 (empty): $(wc -l < "$ARCHIVE_DIR/category2-empty-blank.txt")"
608 echo "- Category 3 (partial): $(wc -l < "$ARCHIVE_DIR/category3-partial-match.txt")"
609 echo "- Category 4 (no match): $(wc -l < "$ARCHIVE_DIR/category4-no-match.txt")"
610 echo ""
611 echo "### Logs"
612 echo "- Parse failures: $(grep -c -v '^#' "$LOGS_DIR/parse-failures.txt" 2>/dev/null || echo 0)"
613 echo "- Purgatory expired: $(grep -c -v '^#' "$LOGS_DIR/purgatory-expired.txt" 2>/dev/null || echo 0)"
614 echo ""
615 echo "## Output Files"
616 echo ""
617 echo "- \`results/ready-for-migration.txt\` - $TOTAL_READY repos ready for migration"
618 echo "- \`results/needs-resync.txt\` - $TOTAL_RESYNC repos needing re-sync"
619 echo "- \`results/manual-review.txt\` - $TOTAL_REVIEW repos needing investigation"
620 echo "- \`results/summary.txt\` - This summary file"
621 echo ""
622 echo "## Recommended Next Steps"
623 echo ""
624 echo "1. **Review needs-resync.txt** - Trigger re-sync for these repos"
625 echo "2. **Review manual-review.txt** - Investigate unusual states"
626 echo "3. **Verify ready-for-migration.txt** - Spot-check a few repos"
627 echo "4. **Plan migration window** - Schedule cutover when action items resolved"
628} > "$SUMMARY_FILE"
629
630# ============================================================================
631# Phase 6: Print summary to console
632# ============================================================================
633
634echo ""
635log_success "Classification complete!"
636echo ""
637echo "=== Summary ==="
638echo "Ready for Migration: $TOTAL_READY ($PCT_READY%)"
639echo " - Complete in both: ${COUNTS[ready_complete_both]}"
640echo " - Archive ahead: ${COUNTS[ready_archive_ahead]}"
641echo " - Deleted by user: ${COUNTS[ready_deleted]}"
642echo " - Empty in prod: ${COUNTS[ready_empty_prod]}"
643echo " - Archive-only: ${COUNTS[ready_archive_only]}"
644echo " - Purgatory-only: ${COUNTS[ready_not_in_prod]}"
645echo ""
646echo "Needs Re-sync: $TOTAL_RESYNC ($PCT_RESYNC%)"
647echo " - Missing from archive: ${COUNTS[resync_missing_archive]}"
648echo " - Incomplete in archive: ${COUNTS[resync_incomplete_archive]}"
649echo ""
650echo "Manual Review: $TOTAL_REVIEW ($PCT_REVIEW%)"
651echo " - Partial in prod: ${COUNTS[review_partial_prod]}"
652echo " - No-match in prod: ${COUNTS[review_nomatch_prod]}"
653echo " - Parse failures: ${COUNTS[review_parse_failure]}"
654echo " - Git diverged: ${COUNTS[review_diverged]}"
655echo ""
656echo "Total: $TOTAL repos"
657echo ""
658echo "Output files:"
659echo " $READY_FILE"
660echo " $RESYNC_FILE"
661echo " $REVIEW_FILE"
662echo " $SUMMARY_FILE"