diff options
Diffstat (limited to 'docs/how-to/migration-scripts/40-classify-actions.sh')
| -rwxr-xr-x | docs/how-to/migration-scripts/40-classify-actions.sh | 662 |
1 files changed, 0 insertions, 662 deletions
diff --git a/docs/how-to/migration-scripts/40-classify-actions.sh b/docs/how-to/migration-scripts/40-classify-actions.sh deleted file mode 100755 index 8b61636..0000000 --- a/docs/how-to/migration-scripts/40-classify-actions.sh +++ /dev/null | |||
| @@ -1,662 +0,0 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | # | ||
| 3 | # 40-classify-actions.sh - Classify repos by migration action required | ||
| 4 | # | ||
| 5 | # Implements the redesigned classification system (Option B) with user feedback: | ||
| 6 | # | ||
| 7 | # Tier 1: No Action Required (ready-for-migration.txt) | ||
| 8 | # - Complete in both (prod=cat1, archive=cat1) | ||
| 9 | # - Deleted by user (kind 5 event) | ||
| 10 | # - Empty in prod (prod=cat2, any archive status) | ||
| 11 | # - Archive-only (archive=any, prod=missing) | ||
| 12 | # - Not in prod (purgatory-only, prod=missing) | ||
| 13 | # - Archive ahead (archive has newer git data than prod - GRASP enforced) | ||
| 14 | # | ||
| 15 | # Tier 2: Action Required (needs-resync.txt) | ||
| 16 | # - Complete in prod, missing from archive (with purgatory context) | ||
| 17 | # - Complete in prod, incomplete in archive AND prod is ahead (with purgatory context) | ||
| 18 | # | ||
| 19 | # Tier 3: Manual Investigation (manual-review.txt) | ||
| 20 | # - Partial in prod (prod=cat3) | ||
| 21 | # - No-match in prod (prod=cat4) | ||
| 22 | # - Parse failures | ||
| 23 | # - Conflicting states | ||
| 24 | # - Diverged git history (both have unique commits) | ||
| 25 | # | ||
| 26 | # KEY INSIGHT: | ||
| 27 | # Archive (ngit-grasp) enforces GRASP - git data ALWAYS matches a state event. | ||
| 28 | # If archive has different/newer data than prod, it means: | ||
| 29 | # - A state event authorized those commits at some point | ||
| 30 | # - Archive is actually MORE up-to-date than prod | ||
| 31 | # - Migration should use archive data (it's already correct) | ||
| 32 | # | ||
| 33 | # Usage: ./40-classify-actions.sh <analysis-dir> | ||
| 34 | # | ||
| 35 | # Output format: repo | npub | prod_status | archive_status | context | action | ||
| 36 | # | ||
| 37 | |||
| 38 | set -euo pipefail | ||
| 39 | |||
| 40 | # Colors for output | ||
| 41 | RED='\033[0;31m' | ||
| 42 | GREEN='\033[0;32m' | ||
| 43 | YELLOW='\033[1;33m' | ||
| 44 | BLUE='\033[0;34m' | ||
| 45 | NC='\033[0m' # No Color | ||
| 46 | |||
| 47 | log_info() { echo -e "${BLUE}[INFO]${NC} $*"; } | ||
| 48 | log_success() { echo -e "${GREEN}[OK]${NC} $*"; } | ||
| 49 | log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } | ||
| 50 | log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; } | ||
| 51 | |||
| 52 | # Check arguments | ||
| 53 | if [[ $# -lt 1 ]]; then | ||
| 54 | echo "Usage: $0 <analysis-dir>" | ||
| 55 | echo "Example: $0 work/migration-analysis-20260123-200701" | ||
| 56 | exit 1 | ||
| 57 | fi | ||
| 58 | |||
| 59 | ANALYSIS_DIR="$1" | ||
| 60 | |||
| 61 | # Validate analysis directory | ||
| 62 | if [[ ! -d "$ANALYSIS_DIR" ]]; then | ||
| 63 | log_error "Analysis directory not found: $ANALYSIS_DIR" | ||
| 64 | exit 1 | ||
| 65 | fi | ||
| 66 | |||
| 67 | # Define paths | ||
| 68 | PROD_DIR="$ANALYSIS_DIR/prod" | ||
| 69 | ARCHIVE_DIR="$ANALYSIS_DIR/archive" | ||
| 70 | COMPARISON_DIR="$ANALYSIS_DIR/comparison" | ||
| 71 | LOGS_DIR="$ANALYSIS_DIR/logs" | ||
| 72 | RESULTS_DIR="$ANALYSIS_DIR/results" | ||
| 73 | |||
| 74 | # Validate required directories | ||
| 75 | for dir in "$PROD_DIR" "$ARCHIVE_DIR" "$COMPARISON_DIR" "$LOGS_DIR"; do | ||
| 76 | if [[ ! -d "$dir" ]]; then | ||
| 77 | log_error "Required directory not found: $dir" | ||
| 78 | exit 1 | ||
| 79 | fi | ||
| 80 | done | ||
| 81 | |||
| 82 | # Create results directory | ||
| 83 | mkdir -p "$RESULTS_DIR" | ||
| 84 | |||
| 85 | # Output files | ||
| 86 | READY_FILE="$RESULTS_DIR/ready-for-migration.txt" | ||
| 87 | RESYNC_FILE="$RESULTS_DIR/needs-resync.txt" | ||
| 88 | REVIEW_FILE="$RESULTS_DIR/manual-review.txt" | ||
| 89 | SUMMARY_FILE="$RESULTS_DIR/summary.txt" | ||
| 90 | |||
| 91 | # Temporary files for processing | ||
| 92 | TMP_DIR=$(mktemp -d) | ||
| 93 | trap 'rm -rf "$TMP_DIR"' EXIT | ||
| 94 | |||
| 95 | log_info "Starting classification with revised system (Option B)" | ||
| 96 | log_info "Analysis directory: $ANALYSIS_DIR" | ||
| 97 | |||
| 98 | # ============================================================================ | ||
| 99 | # Phase 1: Build lookup tables from source data | ||
| 100 | # ============================================================================ | ||
| 101 | |||
| 102 | log_info "Building lookup tables..." | ||
| 103 | |||
| 104 | # Build prod category lookup: repo|npub -> category | ||
| 105 | declare -A PROD_CAT | ||
| 106 | while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do | ||
| 107 | repo="${repo// /}" # Remove all spaces | ||
| 108 | npub="${npub// /}" # Remove all spaces | ||
| 109 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 110 | PROD_CAT["$repo|$npub"]="cat1" | ||
| 111 | done < "$PROD_DIR/category1-complete-match.txt" | ||
| 112 | |||
| 113 | while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do | ||
| 114 | repo="${repo// /}" | ||
| 115 | npub="${npub// /}" | ||
| 116 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 117 | PROD_CAT["$repo|$npub"]="cat2" | ||
| 118 | done < "$PROD_DIR/category2-empty-blank.txt" | ||
| 119 | |||
| 120 | while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do | ||
| 121 | repo="${repo// /}" | ||
| 122 | npub="${npub// /}" | ||
| 123 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 124 | PROD_CAT["$repo|$npub"]="cat3" | ||
| 125 | done < "$PROD_DIR/category3-partial-match.txt" | ||
| 126 | |||
| 127 | while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do | ||
| 128 | repo="${repo// /}" | ||
| 129 | npub="${npub// /}" | ||
| 130 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 131 | PROD_CAT["$repo|$npub"]="cat4" | ||
| 132 | done < "$PROD_DIR/category4-no-match.txt" | ||
| 133 | |||
| 134 | log_info "Loaded ${#PROD_CAT[@]} prod entries" | ||
| 135 | |||
| 136 | # Build archive category lookup: repo|npub -> category | ||
| 137 | declare -A ARCHIVE_CAT | ||
| 138 | while IFS='|' read -r repo npub rest; do | ||
| 139 | repo="${repo// /}" | ||
| 140 | npub="${npub// /}" | ||
| 141 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 142 | ARCHIVE_CAT["$repo|$npub"]="cat1" | ||
| 143 | done < "$ARCHIVE_DIR/category1-complete-match.txt" | ||
| 144 | |||
| 145 | while IFS='|' read -r repo npub rest; do | ||
| 146 | repo="${repo// /}" | ||
| 147 | npub="${npub// /}" | ||
| 148 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 149 | ARCHIVE_CAT["$repo|$npub"]="cat2" | ||
| 150 | done < "$ARCHIVE_DIR/category2-empty-blank.txt" | ||
| 151 | |||
| 152 | while IFS='|' read -r repo npub rest; do | ||
| 153 | repo="${repo// /}" | ||
| 154 | npub="${npub// /}" | ||
| 155 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 156 | ARCHIVE_CAT["$repo|$npub"]="cat3" | ||
| 157 | done < "$ARCHIVE_DIR/category3-partial-match.txt" | ||
| 158 | |||
| 159 | while IFS='|' read -r repo npub rest; do | ||
| 160 | repo="${repo// /}" | ||
| 161 | npub="${npub// /}" | ||
| 162 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 163 | ARCHIVE_CAT["$repo|$npub"]="cat4" | ||
| 164 | done < "$ARCHIVE_DIR/category4-no-match.txt" | ||
| 165 | |||
| 166 | log_info "Loaded ${#ARCHIVE_CAT[@]} archive entries" | ||
| 167 | |||
| 168 | # Build purgatory lookup: repo|npub -> 1 (if purgatory expired) | ||
| 169 | declare -A PURGATORY | ||
| 170 | PURGATORY_COUNT=0 | ||
| 171 | if [[ -f "$LOGS_DIR/purgatory-expired.txt" ]]; then | ||
| 172 | while IFS=$'\t' read -r repo npub timestamp reason || [[ -n "$repo" ]]; do | ||
| 173 | # Skip comments and empty lines | ||
| 174 | [[ "$repo" =~ ^# ]] && continue | ||
| 175 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 176 | PURGATORY["$repo|$npub"]=1 | ||
| 177 | PURGATORY_COUNT=$((PURGATORY_COUNT + 1)) | ||
| 178 | done < "$LOGS_DIR/purgatory-expired.txt" | ||
| 179 | fi | ||
| 180 | log_info "Loaded $PURGATORY_COUNT purgatory entries" | ||
| 181 | |||
| 182 | # Build parse failure lookup: repo|npub -> 1 (if parse failure logged) | ||
| 183 | # Parse failures file format: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub | ||
| 184 | declare -A PARSE_FAIL | ||
| 185 | PARSE_FAIL_COUNT=0 | ||
| 186 | if [[ -f "$LOGS_DIR/parse-failures.txt" ]]; then | ||
| 187 | while IFS=$'\t' read -r event_id kind reason repo npub || [[ -n "$event_id" ]]; do | ||
| 188 | # Skip comments and empty lines | ||
| 189 | [[ "$event_id" =~ ^# ]] && continue | ||
| 190 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 191 | PARSE_FAIL["$repo|$npub"]=1 | ||
| 192 | PARSE_FAIL_COUNT=$((PARSE_FAIL_COUNT + 1)) | ||
| 193 | done < "$LOGS_DIR/parse-failures.txt" | ||
| 194 | fi | ||
| 195 | log_info "Loaded $PARSE_FAIL_COUNT parse failure entries" | ||
| 196 | |||
| 197 | # Build deletion lookup: repo|npub -> 1 (if kind 5 deletion event) | ||
| 198 | # Deletions are in NDJSON format with "a" tags like "30617:pubkey_hex:repo" | ||
| 199 | # We need to convert hex pubkeys to npub format using nak | ||
| 200 | declare -A DELETED | ||
| 201 | |||
| 202 | # Helper function to process deletion file (NDJSON format) | ||
| 203 | # Extracts unique pubkey_hex:repo pairs and converts to npub | ||
| 204 | process_deletions() { | ||
| 205 | local file="$1" | ||
| 206 | [[ ! -f "$file" ]] && return | ||
| 207 | |||
| 208 | # Extract unique pubkey_hex|repo pairs from NDJSON | ||
| 209 | # Each line is a JSON object, extract "a" tags | ||
| 210 | local pairs | ||
| 211 | pairs=$(jq -r '.tags[] | select(.[0] == "a") | .[1]' "$file" 2>/dev/null | \ | ||
| 212 | sed 's/^30617://' | awk -F: '{print $1 "|" $2}' | sort -u) | ||
| 213 | |||
| 214 | # Get unique hex pubkeys for batch conversion | ||
| 215 | local hex_keys | ||
| 216 | hex_keys=$(echo "$pairs" | cut -d'|' -f1 | sort -u) | ||
| 217 | |||
| 218 | # Build hex->npub lookup via batch nak call | ||
| 219 | declare -A HEX_TO_NPUB | ||
| 220 | while read -r hex; do | ||
| 221 | [[ -z "$hex" ]] && continue | ||
| 222 | local npub | ||
| 223 | npub=$(nak encode npub "$hex" 2>/dev/null || echo "") | ||
| 224 | [[ -n "$npub" ]] && HEX_TO_NPUB["$hex"]="$npub" | ||
| 225 | done <<< "$hex_keys" | ||
| 226 | |||
| 227 | # Now process pairs with cached npub values | ||
| 228 | while IFS='|' read -r pubkey_hex repo; do | ||
| 229 | [[ -z "$repo" || -z "$pubkey_hex" ]] && continue | ||
| 230 | local npub="${HEX_TO_NPUB[$pubkey_hex]:-}" | ||
| 231 | [[ -z "$npub" ]] && continue | ||
| 232 | DELETED["$repo|$npub"]=1 | ||
| 233 | done <<< "$pairs" | ||
| 234 | } | ||
| 235 | |||
| 236 | # Process prod and archive deletions | ||
| 237 | process_deletions "$PROD_DIR/raw/deletions.json" | ||
| 238 | process_deletions "$ARCHIVE_DIR/raw/deletions.json" | ||
| 239 | DELETED_COUNT=0 | ||
| 240 | [[ ${#DELETED[@]} -gt 0 ]] && DELETED_COUNT=${#DELETED[@]} | ||
| 241 | log_info "Loaded $DELETED_COUNT deletion entries" | ||
| 242 | |||
| 243 | # Build git ancestry lookup: repo|npub -> relationship (archive-ahead, prod-ahead, diverged, etc.) | ||
| 244 | # This data comes from 22-compare-git-data.sh which compares actual git commits | ||
| 245 | declare -A GIT_ANCESTRY | ||
| 246 | GIT_ANCESTRY_COUNT=0 | ||
| 247 | if [[ -f "$COMPARISON_DIR/git-ancestry.tsv" ]]; then | ||
| 248 | while IFS=$'\t' read -r repo npub relationship details || [[ -n "$repo" ]]; do | ||
| 249 | # Skip header and comments | ||
| 250 | [[ "$repo" == "repo" ]] && continue | ||
| 251 | [[ "$repo" =~ ^# ]] && continue | ||
| 252 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 253 | GIT_ANCESTRY["$repo|$npub"]="$relationship" | ||
| 254 | GIT_ANCESTRY_COUNT=$((GIT_ANCESTRY_COUNT + 1)) | ||
| 255 | done < "$COMPARISON_DIR/git-ancestry.tsv" | ||
| 256 | log_info "Loaded $GIT_ANCESTRY_COUNT git ancestry entries" | ||
| 257 | else | ||
| 258 | log_warn "No git-ancestry.tsv found - will not check if archive is ahead of prod" | ||
| 259 | log_warn "Run 22-compare-git-data.sh to enable archive-ahead detection" | ||
| 260 | fi | ||
| 261 | |||
| 262 | # ============================================================================ | ||
| 263 | # Phase 2: Build unique repo list from all sources | ||
| 264 | # ============================================================================ | ||
| 265 | |||
| 266 | log_info "Building unique repo list..." | ||
| 267 | |||
| 268 | declare -A ALL_REPOS | ||
| 269 | for key in "${!PROD_CAT[@]}"; do | ||
| 270 | ALL_REPOS["$key"]=1 | ||
| 271 | done | ||
| 272 | for key in "${!ARCHIVE_CAT[@]}"; do | ||
| 273 | ALL_REPOS["$key"]=1 | ||
| 274 | done | ||
| 275 | for key in "${!PURGATORY[@]}"; do | ||
| 276 | ALL_REPOS["$key"]=1 | ||
| 277 | done | ||
| 278 | |||
| 279 | log_info "Total unique repos: ${#ALL_REPOS[@]}" | ||
| 280 | |||
| 281 | # ============================================================================ | ||
| 282 | # Phase 3: Classify each repo according to revised decision tree | ||
| 283 | # ============================================================================ | ||
| 284 | |||
| 285 | log_info "Classifying repos..." | ||
| 286 | |||
| 287 | # Counters for summary | ||
| 288 | declare -A COUNTS | ||
| 289 | COUNTS[ready_complete_both]=0 | ||
| 290 | COUNTS[ready_deleted]=0 | ||
| 291 | COUNTS[ready_empty_prod]=0 | ||
| 292 | COUNTS[ready_archive_only]=0 | ||
| 293 | COUNTS[ready_not_in_prod]=0 | ||
| 294 | COUNTS[ready_archive_ahead]=0 | ||
| 295 | COUNTS[resync_missing_archive]=0 | ||
| 296 | COUNTS[resync_incomplete_archive]=0 | ||
| 297 | COUNTS[review_partial_prod]=0 | ||
| 298 | COUNTS[review_nomatch_prod]=0 | ||
| 299 | COUNTS[review_parse_failure]=0 | ||
| 300 | COUNTS[review_conflicting]=0 | ||
| 301 | COUNTS[review_diverged]=0 | ||
| 302 | |||
| 303 | # Output arrays | ||
| 304 | declare -a READY_LINES | ||
| 305 | declare -a RESYNC_LINES | ||
| 306 | declare -a REVIEW_LINES | ||
| 307 | |||
| 308 | # Helper function to get context string | ||
| 309 | get_context() { | ||
| 310 | local key="$1" | ||
| 311 | local prod_status="$2" | ||
| 312 | local archive_status="$3" | ||
| 313 | local context="" | ||
| 314 | |||
| 315 | # Check purgatory | ||
| 316 | if [[ -n "${PURGATORY[$key]:-}" ]]; then | ||
| 317 | context="purgatory-expired" | ||
| 318 | fi | ||
| 319 | |||
| 320 | # Check parse failure | ||
| 321 | if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then | ||
| 322 | if [[ -n "$context" ]]; then | ||
| 323 | context="$context, parse-failure" | ||
| 324 | else | ||
| 325 | context="parse-failure" | ||
| 326 | fi | ||
| 327 | fi | ||
| 328 | |||
| 329 | # Add archive context for unexpected states | ||
| 330 | if [[ "$prod_status" == "empty" && "$archive_status" != "missing" && "$archive_status" != "empty" ]]; then | ||
| 331 | if [[ -n "$context" ]]; then | ||
| 332 | context="$context, archive-has-data" | ||
| 333 | else | ||
| 334 | context="archive-has-data" | ||
| 335 | fi | ||
| 336 | fi | ||
| 337 | |||
| 338 | echo "${context:-none}" | ||
| 339 | } | ||
| 340 | |||
| 341 | # Helper to convert category to human-readable status | ||
| 342 | cat_to_status() { | ||
| 343 | case "$1" in | ||
| 344 | cat1) echo "complete" ;; | ||
| 345 | cat2) echo "empty" ;; | ||
| 346 | cat3) echo "partial" ;; | ||
| 347 | cat4) echo "no-match" ;; | ||
| 348 | missing) echo "missing" ;; | ||
| 349 | *) echo "$1" ;; | ||
| 350 | esac | ||
| 351 | } | ||
| 352 | |||
| 353 | LOOP_COUNT=0 | ||
| 354 | for key in "${!ALL_REPOS[@]}"; do | ||
| 355 | LOOP_COUNT=$((LOOP_COUNT + 1)) | ||
| 356 | [[ $((LOOP_COUNT % 100)) -eq 0 ]] && log_info "Processed $LOOP_COUNT repos..." | ||
| 357 | IFS='|' read -r repo npub <<< "$key" | ||
| 358 | |||
| 359 | prod_cat="${PROD_CAT[$key]:-missing}" | ||
| 360 | archive_cat="${ARCHIVE_CAT[$key]:-missing}" | ||
| 361 | prod_status=$(cat_to_status "$prod_cat") | ||
| 362 | archive_status=$(cat_to_status "$archive_cat") | ||
| 363 | |||
| 364 | # Decision tree implementation | ||
| 365 | |||
| 366 | # 1. Is there a kind 5 deletion event? | ||
| 367 | if [[ -n "${DELETED[$key]:-}" ]]; then | ||
| 368 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 369 | READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | deleted by user") | ||
| 370 | COUNTS[ready_deleted]=$((COUNTS[ready_deleted] + 1)) | ||
| 371 | continue | ||
| 372 | fi | ||
| 373 | |||
| 374 | # 2. What is the prod status? | ||
| 375 | case "$prod_cat" in | ||
| 376 | missing) | ||
| 377 | # Not in prod | ||
| 378 | if [[ "$archive_cat" != "missing" ]]; then | ||
| 379 | # In archive but not in prod -> no action (archive-only) | ||
| 380 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 381 | READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive-only (not in prod)") | ||
| 382 | COUNTS[ready_archive_only]=$((COUNTS[ready_archive_only] + 1)) | ||
| 383 | elif [[ -n "${PURGATORY[$key]:-}" ]]; then | ||
| 384 | # Purgatory only, not in prod -> no action | ||
| 385 | context="purgatory-expired" | ||
| 386 | READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | purgatory-only (not in prod)") | ||
| 387 | COUNTS[ready_not_in_prod]=$((COUNTS[ready_not_in_prod] + 1)) | ||
| 388 | fi | ||
| 389 | # Otherwise skip (not a real repo - no data anywhere) | ||
| 390 | ;; | ||
| 391 | |||
| 392 | cat2) | ||
| 393 | # Empty in prod -> ALWAYS no action required | ||
| 394 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 395 | READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | empty in prod (user never pushed)") | ||
| 396 | COUNTS[ready_empty_prod]=$((COUNTS[ready_empty_prod] + 1)) | ||
| 397 | ;; | ||
| 398 | |||
| 399 | cat1) | ||
| 400 | # Complete in prod | ||
| 401 | if [[ "$archive_cat" == "cat1" ]]; then | ||
| 402 | # Complete in both -> no action | ||
| 403 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 404 | READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in both") | ||
| 405 | COUNTS[ready_complete_both]=$((COUNTS[ready_complete_both] + 1)) | ||
| 406 | else | ||
| 407 | # Complete in prod, missing/incomplete in archive | ||
| 408 | # Check for parse failure - if so, needs manual review | ||
| 409 | if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then | ||
| 410 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 411 | REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in prod with parse failure") | ||
| 412 | COUNTS[review_parse_failure]=$((COUNTS[review_parse_failure] + 1)) | ||
| 413 | else | ||
| 414 | # Check git ancestry to see if archive is actually ahead | ||
| 415 | git_relationship="${GIT_ANCESTRY[$key]:-unknown}" | ||
| 416 | |||
| 417 | if [[ "$git_relationship" == "archive-ahead" || "$git_relationship" == "in-sync" ]]; then | ||
| 418 | # Archive has newer/same git data - this is GOOD | ||
| 419 | # Archive's git data was authorized by a state event (GRASP enforced) | ||
| 420 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 421 | if [[ -n "$context" && "$context" != "none" ]]; then | ||
| 422 | context="$context, git=$git_relationship" | ||
| 423 | else | ||
| 424 | context="git=$git_relationship" | ||
| 425 | fi | ||
| 426 | READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive ahead (use archive data)") | ||
| 427 | COUNTS[ready_archive_ahead]=$((COUNTS[ready_archive_ahead] + 1)) | ||
| 428 | elif [[ "$git_relationship" == "diverged" ]]; then | ||
| 429 | # Git histories diverged - needs manual review | ||
| 430 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 431 | if [[ -n "$context" && "$context" != "none" ]]; then | ||
| 432 | context="$context, git=diverged" | ||
| 433 | else | ||
| 434 | context="git=diverged" | ||
| 435 | fi | ||
| 436 | REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | git histories diverged (manual review)") | ||
| 437 | COUNTS[review_diverged]=$((COUNTS[review_diverged] + 1)) | ||
| 438 | else | ||
| 439 | # prod-ahead, archive-only, prod-only, both-empty, or unknown | ||
| 440 | # These need resync - include purgatory context | ||
| 441 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 442 | if [[ "$git_relationship" != "unknown" ]]; then | ||
| 443 | if [[ -n "$context" && "$context" != "none" ]]; then | ||
| 444 | context="$context, git=$git_relationship" | ||
| 445 | else | ||
| 446 | context="git=$git_relationship" | ||
| 447 | fi | ||
| 448 | fi | ||
| 449 | if [[ "$archive_cat" == "missing" ]]; then | ||
| 450 | RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync to archive") | ||
| 451 | COUNTS[resync_missing_archive]=$((COUNTS[resync_missing_archive] + 1)) | ||
| 452 | else | ||
| 453 | RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync (archive incomplete)") | ||
| 454 | COUNTS[resync_incomplete_archive]=$((COUNTS[resync_incomplete_archive] + 1)) | ||
| 455 | fi | ||
| 456 | fi | ||
| 457 | fi | ||
| 458 | fi | ||
| 459 | ;; | ||
| 460 | |||
| 461 | cat3) | ||
| 462 | # Partial in prod -> ALWAYS manual investigation | ||
| 463 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 464 | REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | partial in prod (investigate git data)") | ||
| 465 | COUNTS[review_partial_prod]=$((COUNTS[review_partial_prod] + 1)) | ||
| 466 | ;; | ||
| 467 | |||
| 468 | cat4) | ||
| 469 | # No-match in prod -> ALWAYS manual investigation | ||
| 470 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 471 | REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | no-match in prod (git corruption)") | ||
| 472 | COUNTS[review_nomatch_prod]=$((COUNTS[review_nomatch_prod] + 1)) | ||
| 473 | ;; | ||
| 474 | esac | ||
| 475 | done | ||
| 476 | |||
| 477 | # ============================================================================ | ||
| 478 | # Phase 4: Write output files | ||
| 479 | # ============================================================================ | ||
| 480 | |||
| 481 | log_info "Writing output files..." | ||
| 482 | |||
| 483 | TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%S+00:00") | ||
| 484 | |||
| 485 | # Write ready-for-migration.txt | ||
| 486 | { | ||
| 487 | echo "# Ready for Migration - No action required" | ||
| 488 | echo "# Generated: $TIMESTAMP" | ||
| 489 | echo "# Format: repo | npub | prod_status | archive_status | context | reason" | ||
| 490 | echo "#" | ||
| 491 | for line in "${READY_LINES[@]}"; do | ||
| 492 | echo "$line" | ||
| 493 | done | ||
| 494 | } > "$READY_FILE" | ||
| 495 | |||
| 496 | # Write needs-resync.txt | ||
| 497 | { | ||
| 498 | echo "# Needs Re-sync - Action required" | ||
| 499 | echo "# Generated: $TIMESTAMP" | ||
| 500 | echo "# Format: repo | npub | prod_status | archive_status | context | action" | ||
| 501 | echo "#" | ||
| 502 | echo "# Context meanings:" | ||
| 503 | echo "# purgatory-expired = archive tried to sync but failed (30min timeout)" | ||
| 504 | echo "# none = archive never tried or announcement missing" | ||
| 505 | echo "#" | ||
| 506 | for line in "${RESYNC_LINES[@]}"; do | ||
| 507 | echo "$line" | ||
| 508 | done | ||
| 509 | } > "$RESYNC_FILE" | ||
| 510 | |||
| 511 | # Write manual-review.txt | ||
| 512 | { | ||
| 513 | echo "# Manual Review Required - Investigation needed" | ||
| 514 | echo "# Generated: $TIMESTAMP" | ||
| 515 | echo "# Format: repo | npub | prod_status | archive_status | context | reason" | ||
| 516 | echo "#" | ||
| 517 | for line in "${REVIEW_LINES[@]}"; do | ||
| 518 | echo "$line" | ||
| 519 | done | ||
| 520 | } > "$REVIEW_FILE" | ||
| 521 | |||
| 522 | # ============================================================================ | ||
| 523 | # Phase 5: Generate summary | ||
| 524 | # ============================================================================ | ||
| 525 | |||
| 526 | log_info "Generating summary..." | ||
| 527 | |||
| 528 | TOTAL_READY="${#READY_LINES[@]}" | ||
| 529 | TOTAL_RESYNC="${#RESYNC_LINES[@]}" | ||
| 530 | TOTAL_REVIEW="${#REVIEW_LINES[@]}" | ||
| 531 | TOTAL=$((TOTAL_READY + TOTAL_RESYNC + TOTAL_REVIEW)) | ||
| 532 | |||
| 533 | # Calculate percentages | ||
| 534 | if [[ $TOTAL -gt 0 ]]; then | ||
| 535 | PCT_READY=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_READY / $TOTAL) * 100}") | ||
| 536 | PCT_RESYNC=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_RESYNC / $TOTAL) * 100}") | ||
| 537 | PCT_REVIEW=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_REVIEW / $TOTAL) * 100}") | ||
| 538 | else | ||
| 539 | PCT_READY="0.0" | ||
| 540 | PCT_RESYNC="0.0" | ||
| 541 | PCT_REVIEW="0.0" | ||
| 542 | fi | ||
| 543 | |||
| 544 | { | ||
| 545 | echo "# Migration Classification Summary" | ||
| 546 | echo "Generated: $TIMESTAMP" | ||
| 547 | echo "Analysis Directory: $ANALYSIS_DIR" | ||
| 548 | echo "" | ||
| 549 | echo "## Overview" | ||
| 550 | echo "" | ||
| 551 | echo "| Category | Count | Percentage |" | ||
| 552 | echo "|----------|-------|------------|" | ||
| 553 | echo "| Ready for Migration | $TOTAL_READY | $PCT_READY% |" | ||
| 554 | echo "| Needs Re-sync | $TOTAL_RESYNC | $PCT_RESYNC% |" | ||
| 555 | echo "| Manual Review | $TOTAL_REVIEW | $PCT_REVIEW% |" | ||
| 556 | echo "| **Total** | **$TOTAL** | **100%** |" | ||
| 557 | echo "" | ||
| 558 | echo "## Tier 1: Ready for Migration ($TOTAL_READY repos)" | ||
| 559 | echo "" | ||
| 560 | echo "These repositories are ready for migration or don't need migration:" | ||
| 561 | echo "" | ||
| 562 | echo "| Reason | Count |" | ||
| 563 | echo "|--------|-------|" | ||
| 564 | echo "| complete in both prod and archive | ${COUNTS[ready_complete_both]} |" | ||
| 565 | echo "| archive ahead (has newer git data) | ${COUNTS[ready_archive_ahead]} |" | ||
| 566 | echo "| deleted by user | ${COUNTS[ready_deleted]} |" | ||
| 567 | echo "| empty in prod (user never pushed) | ${COUNTS[ready_empty_prod]} |" | ||
| 568 | echo "| archive-only (not in prod) | ${COUNTS[ready_archive_only]} |" | ||
| 569 | echo "| purgatory-only (not in prod) | ${COUNTS[ready_not_in_prod]} |" | ||
| 570 | echo "" | ||
| 571 | echo "## Tier 2: Needs Re-sync ($TOTAL_RESYNC repos)" | ||
| 572 | echo "" | ||
| 573 | echo "These repositories need re-sync to archive before migration:" | ||
| 574 | echo "" | ||
| 575 | echo "| Reason | Count | Action |" | ||
| 576 | echo "|--------|-------|--------|" | ||
| 577 | echo "| complete in prod, missing from archive | ${COUNTS[resync_missing_archive]} | trigger re-sync |" | ||
| 578 | echo "| complete in prod, incomplete in archive | ${COUNTS[resync_incomplete_archive]} | trigger re-sync |" | ||
| 579 | echo "" | ||
| 580 | echo "### Purgatory Context" | ||
| 581 | echo "" | ||
| 582 | echo "Repos in needs-resync.txt include purgatory context:" | ||
| 583 | echo "- **purgatory-expired**: Archive tried to sync but failed (30min timeout)" | ||
| 584 | echo "- **none**: Archive never tried or announcement missing" | ||
| 585 | echo "" | ||
| 586 | echo "## Tier 3: Manual Review ($TOTAL_REVIEW repos)" | ||
| 587 | echo "" | ||
| 588 | echo "These repositories require human investigation:" | ||
| 589 | echo "" | ||
| 590 | echo "| Reason | Count |" | ||
| 591 | echo "|--------|-------|" | ||
| 592 | echo "| partial in prod (cat3) | ${COUNTS[review_partial_prod]} |" | ||
| 593 | echo "| no-match in prod (cat4) | ${COUNTS[review_nomatch_prod]} |" | ||
| 594 | echo "| complete in prod with parse failure | ${COUNTS[review_parse_failure]} |" | ||
| 595 | echo "| git histories diverged | ${COUNTS[review_diverged]} |" | ||
| 596 | echo "" | ||
| 597 | echo "## Input Data Summary" | ||
| 598 | echo "" | ||
| 599 | echo "### Prod Categories" | ||
| 600 | echo "- Category 1 (complete): $(wc -l < "$PROD_DIR/category1-complete-match.txt")" | ||
| 601 | echo "- Category 2 (empty): $(wc -l < "$PROD_DIR/category2-empty-blank.txt")" | ||
| 602 | echo "- Category 3 (partial): $(wc -l < "$PROD_DIR/category3-partial-match.txt")" | ||
| 603 | echo "- Category 4 (no match): $(wc -l < "$PROD_DIR/category4-no-match.txt")" | ||
| 604 | echo "" | ||
| 605 | echo "### Archive Categories" | ||
| 606 | echo "- Category 1 (complete): $(wc -l < "$ARCHIVE_DIR/category1-complete-match.txt")" | ||
| 607 | echo "- Category 2 (empty): $(wc -l < "$ARCHIVE_DIR/category2-empty-blank.txt")" | ||
| 608 | echo "- Category 3 (partial): $(wc -l < "$ARCHIVE_DIR/category3-partial-match.txt")" | ||
| 609 | echo "- Category 4 (no match): $(wc -l < "$ARCHIVE_DIR/category4-no-match.txt")" | ||
| 610 | echo "" | ||
| 611 | echo "### Logs" | ||
| 612 | echo "- Parse failures: $(grep -c -v '^#' "$LOGS_DIR/parse-failures.txt" 2>/dev/null || echo 0)" | ||
| 613 | echo "- Purgatory expired: $(grep -c -v '^#' "$LOGS_DIR/purgatory-expired.txt" 2>/dev/null || echo 0)" | ||
| 614 | echo "" | ||
| 615 | echo "## Output Files" | ||
| 616 | echo "" | ||
| 617 | echo "- \`results/ready-for-migration.txt\` - $TOTAL_READY repos ready for migration" | ||
| 618 | echo "- \`results/needs-resync.txt\` - $TOTAL_RESYNC repos needing re-sync" | ||
| 619 | echo "- \`results/manual-review.txt\` - $TOTAL_REVIEW repos needing investigation" | ||
| 620 | echo "- \`results/summary.txt\` - This summary file" | ||
| 621 | echo "" | ||
| 622 | echo "## Recommended Next Steps" | ||
| 623 | echo "" | ||
| 624 | echo "1. **Review needs-resync.txt** - Trigger re-sync for these repos" | ||
| 625 | echo "2. **Review manual-review.txt** - Investigate unusual states" | ||
| 626 | echo "3. **Verify ready-for-migration.txt** - Spot-check a few repos" | ||
| 627 | echo "4. **Plan migration window** - Schedule cutover when action items resolved" | ||
| 628 | } > "$SUMMARY_FILE" | ||
| 629 | |||
| 630 | # ============================================================================ | ||
| 631 | # Phase 6: Print summary to console | ||
| 632 | # ============================================================================ | ||
| 633 | |||
| 634 | echo "" | ||
| 635 | log_success "Classification complete!" | ||
| 636 | echo "" | ||
| 637 | echo "=== Summary ===" | ||
| 638 | echo "Ready for Migration: $TOTAL_READY ($PCT_READY%)" | ||
| 639 | echo " - Complete in both: ${COUNTS[ready_complete_both]}" | ||
| 640 | echo " - Archive ahead: ${COUNTS[ready_archive_ahead]}" | ||
| 641 | echo " - Deleted by user: ${COUNTS[ready_deleted]}" | ||
| 642 | echo " - Empty in prod: ${COUNTS[ready_empty_prod]}" | ||
| 643 | echo " - Archive-only: ${COUNTS[ready_archive_only]}" | ||
| 644 | echo " - Purgatory-only: ${COUNTS[ready_not_in_prod]}" | ||
| 645 | echo "" | ||
| 646 | echo "Needs Re-sync: $TOTAL_RESYNC ($PCT_RESYNC%)" | ||
| 647 | echo " - Missing from archive: ${COUNTS[resync_missing_archive]}" | ||
| 648 | echo " - Incomplete in archive: ${COUNTS[resync_incomplete_archive]}" | ||
| 649 | echo "" | ||
| 650 | echo "Manual Review: $TOTAL_REVIEW ($PCT_REVIEW%)" | ||
| 651 | echo " - Partial in prod: ${COUNTS[review_partial_prod]}" | ||
| 652 | echo " - No-match in prod: ${COUNTS[review_nomatch_prod]}" | ||
| 653 | echo " - Parse failures: ${COUNTS[review_parse_failure]}" | ||
| 654 | echo " - Git diverged: ${COUNTS[review_diverged]}" | ||
| 655 | echo "" | ||
| 656 | echo "Total: $TOTAL repos" | ||
| 657 | echo "" | ||
| 658 | echo "Output files:" | ||
| 659 | echo " $READY_FILE" | ||
| 660 | echo " $RESYNC_FILE" | ||
| 661 | echo " $REVIEW_FILE" | ||
| 662 | echo " $SUMMARY_FILE" | ||