diff options
Diffstat (limited to 'scripts')
| -rwxr-xr-x | scripts/40-classify-actions.sh | 588 |
1 files changed, 0 insertions, 588 deletions
diff --git a/scripts/40-classify-actions.sh b/scripts/40-classify-actions.sh deleted file mode 100755 index 021a2da..0000000 --- a/scripts/40-classify-actions.sh +++ /dev/null | |||
| @@ -1,588 +0,0 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | # | ||
| 3 | # 40-classify-actions.sh - Classify repos by migration action required | ||
| 4 | # | ||
| 5 | # Implements the redesigned classification system (Option B) with user feedback: | ||
| 6 | # | ||
| 7 | # Tier 1: No Action Required (ready-for-migration.txt) | ||
| 8 | # - Complete in both (prod=cat1, archive=cat1) | ||
| 9 | # - Deleted by user (kind 5 event) | ||
| 10 | # - Empty in prod (prod=cat2, any archive status) | ||
| 11 | # - Archive-only (archive=any, prod=missing) | ||
| 12 | # - Not in prod (purgatory-only, prod=missing) | ||
| 13 | # | ||
| 14 | # Tier 2: Action Required (needs-resync.txt) | ||
| 15 | # - Complete in prod, missing from archive (with purgatory context) | ||
| 16 | # - Complete in prod, incomplete in archive (with purgatory context) | ||
| 17 | # | ||
| 18 | # Tier 3: Manual Investigation (manual-review.txt) | ||
| 19 | # - Partial in prod (prod=cat3) | ||
| 20 | # - No-match in prod (prod=cat4) | ||
| 21 | # - Parse failures | ||
| 22 | # - Conflicting states | ||
| 23 | # | ||
| 24 | # Usage: ./40-classify-actions.sh <analysis-dir> | ||
| 25 | # | ||
| 26 | # Output format: repo | npub | prod_status | archive_status | context | action | ||
| 27 | # | ||
| 28 | |||
| 29 | set -euo pipefail | ||
| 30 | |||
| 31 | # Colors for output | ||
| 32 | RED='\033[0;31m' | ||
| 33 | GREEN='\033[0;32m' | ||
| 34 | YELLOW='\033[1;33m' | ||
| 35 | BLUE='\033[0;34m' | ||
| 36 | NC='\033[0m' # No Color | ||
| 37 | |||
| 38 | log_info() { echo -e "${BLUE}[INFO]${NC} $*"; } | ||
| 39 | log_success() { echo -e "${GREEN}[OK]${NC} $*"; } | ||
| 40 | log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } | ||
| 41 | log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; } | ||
| 42 | |||
| 43 | # Check arguments | ||
| 44 | if [[ $# -lt 1 ]]; then | ||
| 45 | echo "Usage: $0 <analysis-dir>" | ||
| 46 | echo "Example: $0 work/migration-analysis-20260123-200701" | ||
| 47 | exit 1 | ||
| 48 | fi | ||
| 49 | |||
| 50 | ANALYSIS_DIR="$1" | ||
| 51 | |||
| 52 | # Validate analysis directory | ||
| 53 | if [[ ! -d "$ANALYSIS_DIR" ]]; then | ||
| 54 | log_error "Analysis directory not found: $ANALYSIS_DIR" | ||
| 55 | exit 1 | ||
| 56 | fi | ||
| 57 | |||
| 58 | # Define paths | ||
| 59 | PROD_DIR="$ANALYSIS_DIR/prod" | ||
| 60 | ARCHIVE_DIR="$ANALYSIS_DIR/archive" | ||
| 61 | COMPARISON_DIR="$ANALYSIS_DIR/comparison" | ||
| 62 | LOGS_DIR="$ANALYSIS_DIR/logs" | ||
| 63 | RESULTS_DIR="$ANALYSIS_DIR/results" | ||
| 64 | |||
| 65 | # Validate required directories | ||
| 66 | for dir in "$PROD_DIR" "$ARCHIVE_DIR" "$COMPARISON_DIR" "$LOGS_DIR"; do | ||
| 67 | if [[ ! -d "$dir" ]]; then | ||
| 68 | log_error "Required directory not found: $dir" | ||
| 69 | exit 1 | ||
| 70 | fi | ||
| 71 | done | ||
| 72 | |||
| 73 | # Create results directory | ||
| 74 | mkdir -p "$RESULTS_DIR" | ||
| 75 | |||
| 76 | # Output files | ||
| 77 | READY_FILE="$RESULTS_DIR/ready-for-migration.txt" | ||
| 78 | RESYNC_FILE="$RESULTS_DIR/needs-resync.txt" | ||
| 79 | REVIEW_FILE="$RESULTS_DIR/manual-review.txt" | ||
| 80 | SUMMARY_FILE="$RESULTS_DIR/summary.txt" | ||
| 81 | |||
| 82 | # Temporary files for processing | ||
| 83 | TMP_DIR=$(mktemp -d) | ||
| 84 | trap 'rm -rf "$TMP_DIR"' EXIT | ||
| 85 | |||
| 86 | log_info "Starting classification with revised system (Option B)" | ||
| 87 | log_info "Analysis directory: $ANALYSIS_DIR" | ||
| 88 | |||
| 89 | # ============================================================================ | ||
| 90 | # Phase 1: Build lookup tables from source data | ||
| 91 | # ============================================================================ | ||
| 92 | |||
| 93 | log_info "Building lookup tables..." | ||
| 94 | |||
| 95 | # Build prod category lookup: repo|npub -> category | ||
| 96 | declare -A PROD_CAT | ||
| 97 | while IFS='|' read -r repo npub rest; do | ||
| 98 | repo=$(echo "$repo" | xargs) | ||
| 99 | npub=$(echo "$npub" | xargs) | ||
| 100 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 101 | PROD_CAT["$repo|$npub"]="cat1" | ||
| 102 | done < "$PROD_DIR/category1-complete-match.txt" | ||
| 103 | |||
| 104 | while IFS='|' read -r repo npub rest; do | ||
| 105 | repo=$(echo "$repo" | xargs) | ||
| 106 | npub=$(echo "$npub" | xargs) | ||
| 107 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 108 | PROD_CAT["$repo|$npub"]="cat2" | ||
| 109 | done < "$PROD_DIR/category2-empty-blank.txt" | ||
| 110 | |||
| 111 | while IFS='|' read -r repo npub rest; do | ||
| 112 | repo=$(echo "$repo" | xargs) | ||
| 113 | npub=$(echo "$npub" | xargs) | ||
| 114 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 115 | PROD_CAT["$repo|$npub"]="cat3" | ||
| 116 | done < "$PROD_DIR/category3-partial-match.txt" | ||
| 117 | |||
| 118 | while IFS='|' read -r repo npub rest; do | ||
| 119 | repo=$(echo "$repo" | xargs) | ||
| 120 | npub=$(echo "$npub" | xargs) | ||
| 121 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 122 | PROD_CAT["$repo|$npub"]="cat4" | ||
| 123 | done < "$PROD_DIR/category4-no-match.txt" | ||
| 124 | |||
| 125 | log_info "Loaded ${#PROD_CAT[@]} prod entries" | ||
| 126 | |||
| 127 | # Build archive category lookup: repo|npub -> category | ||
| 128 | declare -A ARCHIVE_CAT | ||
| 129 | while IFS='|' read -r repo npub rest; do | ||
| 130 | repo=$(echo "$repo" | xargs) | ||
| 131 | npub=$(echo "$npub" | xargs) | ||
| 132 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 133 | ARCHIVE_CAT["$repo|$npub"]="cat1" | ||
| 134 | done < "$ARCHIVE_DIR/category1-complete-match.txt" | ||
| 135 | |||
| 136 | while IFS='|' read -r repo npub rest; do | ||
| 137 | repo=$(echo "$repo" | xargs) | ||
| 138 | npub=$(echo "$npub" | xargs) | ||
| 139 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 140 | ARCHIVE_CAT["$repo|$npub"]="cat2" | ||
| 141 | done < "$ARCHIVE_DIR/category2-empty-blank.txt" | ||
| 142 | |||
| 143 | while IFS='|' read -r repo npub rest; do | ||
| 144 | repo=$(echo "$repo" | xargs) | ||
| 145 | npub=$(echo "$npub" | xargs) | ||
| 146 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 147 | ARCHIVE_CAT["$repo|$npub"]="cat3" | ||
| 148 | done < "$ARCHIVE_DIR/category3-partial-match.txt" | ||
| 149 | |||
| 150 | while IFS='|' read -r repo npub rest; do | ||
| 151 | repo=$(echo "$repo" | xargs) | ||
| 152 | npub=$(echo "$npub" | xargs) | ||
| 153 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 154 | ARCHIVE_CAT["$repo|$npub"]="cat4" | ||
| 155 | done < "$ARCHIVE_DIR/category4-no-match.txt" | ||
| 156 | |||
| 157 | log_info "Loaded ${#ARCHIVE_CAT[@]} archive entries" | ||
| 158 | |||
| 159 | # Build purgatory lookup: repo|npub -> 1 (if purgatory expired) | ||
| 160 | declare -A PURGATORY | ||
| 161 | if [[ -f "$LOGS_DIR/purgatory-expired.txt" ]]; then | ||
| 162 | while IFS=$'\t' read -r repo npub timestamp reason; do | ||
| 163 | # Skip comments and empty lines | ||
| 164 | [[ "$repo" =~ ^# ]] && continue | ||
| 165 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 166 | PURGATORY["$repo|$npub"]=1 | ||
| 167 | done < "$LOGS_DIR/purgatory-expired.txt" | ||
| 168 | fi | ||
| 169 | log_info "Loaded ${#PURGATORY[@]} purgatory entries" | ||
| 170 | |||
| 171 | # Build parse failure lookup: repo|npub -> 1 (if parse failure logged) | ||
| 172 | # Parse failures file format: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub | ||
| 173 | declare -A PARSE_FAIL | ||
| 174 | if [[ -f "$LOGS_DIR/parse-failures.txt" ]]; then | ||
| 175 | while IFS=$'\t' read -r event_id kind reason repo npub; do | ||
| 176 | # Skip comments and empty lines | ||
| 177 | [[ "$event_id" =~ ^# ]] && continue | ||
| 178 | [[ -z "$repo" || -z "$npub" ]] && continue | ||
| 179 | PARSE_FAIL["$repo|$npub"]=1 | ||
| 180 | done < "$LOGS_DIR/parse-failures.txt" | ||
| 181 | fi | ||
| 182 | log_info "Loaded ${#PARSE_FAIL[@]} parse failure entries" | ||
| 183 | |||
| 184 | # Build deletion lookup: repo|npub -> 1 (if kind 5 deletion event) | ||
| 185 | # Deletions are in NDJSON format with "a" tags like "30617:pubkey_hex:repo" | ||
| 186 | # We need to convert hex pubkeys to npub format using nak | ||
| 187 | declare -A DELETED | ||
| 188 | |||
| 189 | # Helper function to process deletion file (NDJSON format) | ||
| 190 | # Extracts unique pubkey_hex:repo pairs and converts to npub | ||
| 191 | process_deletions() { | ||
| 192 | local file="$1" | ||
| 193 | [[ ! -f "$file" ]] && return | ||
| 194 | |||
| 195 | # Extract unique pubkey_hex|repo pairs from NDJSON | ||
| 196 | # Each line is a JSON object, extract "a" tags | ||
| 197 | local pairs | ||
| 198 | pairs=$(jq -r '.tags[] | select(.[0] == "a") | .[1]' "$file" 2>/dev/null | \ | ||
| 199 | sed 's/^30617://' | awk -F: '{print $1 "|" $2}' | sort -u) | ||
| 200 | |||
| 201 | # Get unique hex pubkeys for batch conversion | ||
| 202 | local hex_keys | ||
| 203 | hex_keys=$(echo "$pairs" | cut -d'|' -f1 | sort -u) | ||
| 204 | |||
| 205 | # Build hex->npub lookup via batch nak call | ||
| 206 | declare -A HEX_TO_NPUB | ||
| 207 | while read -r hex; do | ||
| 208 | [[ -z "$hex" ]] && continue | ||
| 209 | local npub | ||
| 210 | npub=$(nak encode npub "$hex" 2>/dev/null || echo "") | ||
| 211 | [[ -n "$npub" ]] && HEX_TO_NPUB["$hex"]="$npub" | ||
| 212 | done <<< "$hex_keys" | ||
| 213 | |||
| 214 | # Now process pairs with cached npub values | ||
| 215 | while IFS='|' read -r pubkey_hex repo; do | ||
| 216 | [[ -z "$repo" || -z "$pubkey_hex" ]] && continue | ||
| 217 | local npub="${HEX_TO_NPUB[$pubkey_hex]:-}" | ||
| 218 | [[ -z "$npub" ]] && continue | ||
| 219 | DELETED["$repo|$npub"]=1 | ||
| 220 | done <<< "$pairs" | ||
| 221 | } | ||
| 222 | |||
| 223 | # Process prod and archive deletions | ||
| 224 | process_deletions "$PROD_DIR/raw/deletions.json" | ||
| 225 | process_deletions "$ARCHIVE_DIR/raw/deletions.json" | ||
| 226 | log_info "Loaded ${#DELETED[@]} deletion entries" | ||
| 227 | |||
| 228 | # ============================================================================ | ||
| 229 | # Phase 2: Build unique repo list from all sources | ||
| 230 | # ============================================================================ | ||
| 231 | |||
| 232 | log_info "Building unique repo list..." | ||
| 233 | |||
| 234 | declare -A ALL_REPOS | ||
| 235 | for key in "${!PROD_CAT[@]}"; do | ||
| 236 | ALL_REPOS["$key"]=1 | ||
| 237 | done | ||
| 238 | for key in "${!ARCHIVE_CAT[@]}"; do | ||
| 239 | ALL_REPOS["$key"]=1 | ||
| 240 | done | ||
| 241 | for key in "${!PURGATORY[@]}"; do | ||
| 242 | ALL_REPOS["$key"]=1 | ||
| 243 | done | ||
| 244 | |||
| 245 | log_info "Total unique repos: ${#ALL_REPOS[@]}" | ||
| 246 | |||
| 247 | # ============================================================================ | ||
| 248 | # Phase 3: Classify each repo according to revised decision tree | ||
| 249 | # ============================================================================ | ||
| 250 | |||
| 251 | log_info "Classifying repos..." | ||
| 252 | |||
| 253 | # Counters for summary | ||
| 254 | declare -A COUNTS | ||
| 255 | COUNTS[ready_complete_both]=0 | ||
| 256 | COUNTS[ready_deleted]=0 | ||
| 257 | COUNTS[ready_empty_prod]=0 | ||
| 258 | COUNTS[ready_archive_only]=0 | ||
| 259 | COUNTS[ready_not_in_prod]=0 | ||
| 260 | COUNTS[resync_missing_archive]=0 | ||
| 261 | COUNTS[resync_incomplete_archive]=0 | ||
| 262 | COUNTS[review_partial_prod]=0 | ||
| 263 | COUNTS[review_nomatch_prod]=0 | ||
| 264 | COUNTS[review_parse_failure]=0 | ||
| 265 | COUNTS[review_conflicting]=0 | ||
| 266 | |||
| 267 | # Output arrays | ||
| 268 | declare -a READY_LINES | ||
| 269 | declare -a RESYNC_LINES | ||
| 270 | declare -a REVIEW_LINES | ||
| 271 | |||
| 272 | # Helper function to get context string | ||
| 273 | get_context() { | ||
| 274 | local key="$1" | ||
| 275 | local prod_status="$2" | ||
| 276 | local archive_status="$3" | ||
| 277 | local context="" | ||
| 278 | |||
| 279 | # Check purgatory | ||
| 280 | if [[ -n "${PURGATORY[$key]:-}" ]]; then | ||
| 281 | context="purgatory-expired" | ||
| 282 | fi | ||
| 283 | |||
| 284 | # Check parse failure | ||
| 285 | if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then | ||
| 286 | if [[ -n "$context" ]]; then | ||
| 287 | context="$context, parse-failure" | ||
| 288 | else | ||
| 289 | context="parse-failure" | ||
| 290 | fi | ||
| 291 | fi | ||
| 292 | |||
| 293 | # Add archive context for unexpected states | ||
| 294 | if [[ "$prod_status" == "empty" && "$archive_status" != "missing" && "$archive_status" != "empty" ]]; then | ||
| 295 | if [[ -n "$context" ]]; then | ||
| 296 | context="$context, archive-has-data" | ||
| 297 | else | ||
| 298 | context="archive-has-data" | ||
| 299 | fi | ||
| 300 | fi | ||
| 301 | |||
| 302 | echo "${context:-none}" | ||
| 303 | } | ||
| 304 | |||
| 305 | # Helper to convert category to human-readable status | ||
| 306 | cat_to_status() { | ||
| 307 | case "$1" in | ||
| 308 | cat1) echo "complete" ;; | ||
| 309 | cat2) echo "empty" ;; | ||
| 310 | cat3) echo "partial" ;; | ||
| 311 | cat4) echo "no-match" ;; | ||
| 312 | missing) echo "missing" ;; | ||
| 313 | *) echo "$1" ;; | ||
| 314 | esac | ||
| 315 | } | ||
| 316 | |||
| 317 | LOOP_COUNT=0 | ||
| 318 | for key in "${!ALL_REPOS[@]}"; do | ||
| 319 | LOOP_COUNT=$((LOOP_COUNT + 1)) | ||
| 320 | [[ $((LOOP_COUNT % 100)) -eq 0 ]] && log_info "Processed $LOOP_COUNT repos..." | ||
| 321 | IFS='|' read -r repo npub <<< "$key" | ||
| 322 | |||
| 323 | prod_cat="${PROD_CAT[$key]:-missing}" | ||
| 324 | archive_cat="${ARCHIVE_CAT[$key]:-missing}" | ||
| 325 | prod_status=$(cat_to_status "$prod_cat") | ||
| 326 | archive_status=$(cat_to_status "$archive_cat") | ||
| 327 | |||
| 328 | # Decision tree implementation | ||
| 329 | |||
| 330 | # 1. Is there a kind 5 deletion event? | ||
| 331 | if [[ -n "${DELETED[$key]:-}" ]]; then | ||
| 332 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 333 | READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | deleted by user") | ||
| 334 | COUNTS[ready_deleted]=$((COUNTS[ready_deleted] + 1)) | ||
| 335 | continue | ||
| 336 | fi | ||
| 337 | |||
| 338 | # 2. What is the prod status? | ||
| 339 | case "$prod_cat" in | ||
| 340 | missing) | ||
| 341 | # Not in prod | ||
| 342 | if [[ "$archive_cat" != "missing" ]]; then | ||
| 343 | # In archive but not in prod -> no action (archive-only) | ||
| 344 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 345 | READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive-only (not in prod)") | ||
| 346 | COUNTS[ready_archive_only]=$((COUNTS[ready_archive_only] + 1)) | ||
| 347 | elif [[ -n "${PURGATORY[$key]:-}" ]]; then | ||
| 348 | # Purgatory only, not in prod -> no action | ||
| 349 | context="purgatory-expired" | ||
| 350 | READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | purgatory-only (not in prod)") | ||
| 351 | COUNTS[ready_not_in_prod]=$((COUNTS[ready_not_in_prod] + 1)) | ||
| 352 | fi | ||
| 353 | # Otherwise skip (not a real repo - no data anywhere) | ||
| 354 | ;; | ||
| 355 | |||
| 356 | cat2) | ||
| 357 | # Empty in prod -> ALWAYS no action required | ||
| 358 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 359 | READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | empty in prod (user never pushed)") | ||
| 360 | COUNTS[ready_empty_prod]=$((COUNTS[ready_empty_prod] + 1)) | ||
| 361 | ;; | ||
| 362 | |||
| 363 | cat1) | ||
| 364 | # Complete in prod | ||
| 365 | if [[ "$archive_cat" == "cat1" ]]; then | ||
| 366 | # Complete in both -> no action | ||
| 367 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 368 | READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in both") | ||
| 369 | COUNTS[ready_complete_both]=$((COUNTS[ready_complete_both] + 1)) | ||
| 370 | else | ||
| 371 | # Complete in prod, missing/incomplete in archive | ||
| 372 | # Check for parse failure - if so, needs manual review | ||
| 373 | if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then | ||
| 374 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 375 | REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in prod with parse failure") | ||
| 376 | COUNTS[review_parse_failure]=$((COUNTS[review_parse_failure] + 1)) | ||
| 377 | else | ||
| 378 | # Needs resync - include purgatory context | ||
| 379 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 380 | if [[ "$archive_cat" == "missing" ]]; then | ||
| 381 | RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync to archive") | ||
| 382 | COUNTS[resync_missing_archive]=$((COUNTS[resync_missing_archive] + 1)) | ||
| 383 | else | ||
| 384 | RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync (archive incomplete)") | ||
| 385 | COUNTS[resync_incomplete_archive]=$((COUNTS[resync_incomplete_archive] + 1)) | ||
| 386 | fi | ||
| 387 | fi | ||
| 388 | fi | ||
| 389 | ;; | ||
| 390 | |||
| 391 | cat3) | ||
| 392 | # Partial in prod -> ALWAYS manual investigation | ||
| 393 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 394 | REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | partial in prod (investigate git data)") | ||
| 395 | COUNTS[review_partial_prod]=$((COUNTS[review_partial_prod] + 1)) | ||
| 396 | ;; | ||
| 397 | |||
| 398 | cat4) | ||
| 399 | # No-match in prod -> ALWAYS manual investigation | ||
| 400 | context=$(get_context "$key" "$prod_status" "$archive_status") | ||
| 401 | REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | no-match in prod (git corruption)") | ||
| 402 | COUNTS[review_nomatch_prod]=$((COUNTS[review_nomatch_prod] + 1)) | ||
| 403 | ;; | ||
| 404 | esac | ||
| 405 | done | ||
| 406 | |||
| 407 | # ============================================================================ | ||
| 408 | # Phase 4: Write output files | ||
| 409 | # ============================================================================ | ||
| 410 | |||
| 411 | log_info "Writing output files..." | ||
| 412 | |||
| 413 | TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%S+00:00") | ||
| 414 | |||
| 415 | # Write ready-for-migration.txt | ||
| 416 | { | ||
| 417 | echo "# Ready for Migration - No action required" | ||
| 418 | echo "# Generated: $TIMESTAMP" | ||
| 419 | echo "# Format: repo | npub | prod_status | archive_status | context | reason" | ||
| 420 | echo "#" | ||
| 421 | for line in "${READY_LINES[@]}"; do | ||
| 422 | echo "$line" | ||
| 423 | done | ||
| 424 | } > "$READY_FILE" | ||
| 425 | |||
| 426 | # Write needs-resync.txt | ||
| 427 | { | ||
| 428 | echo "# Needs Re-sync - Action required" | ||
| 429 | echo "# Generated: $TIMESTAMP" | ||
| 430 | echo "# Format: repo | npub | prod_status | archive_status | context | action" | ||
| 431 | echo "#" | ||
| 432 | echo "# Context meanings:" | ||
| 433 | echo "# purgatory-expired = archive tried to sync but failed (30min timeout)" | ||
| 434 | echo "# none = archive never tried or announcement missing" | ||
| 435 | echo "#" | ||
| 436 | for line in "${RESYNC_LINES[@]}"; do | ||
| 437 | echo "$line" | ||
| 438 | done | ||
| 439 | } > "$RESYNC_FILE" | ||
| 440 | |||
| 441 | # Write manual-review.txt | ||
| 442 | { | ||
| 443 | echo "# Manual Review Required - Investigation needed" | ||
| 444 | echo "# Generated: $TIMESTAMP" | ||
| 445 | echo "# Format: repo | npub | prod_status | archive_status | context | reason" | ||
| 446 | echo "#" | ||
| 447 | for line in "${REVIEW_LINES[@]}"; do | ||
| 448 | echo "$line" | ||
| 449 | done | ||
| 450 | } > "$REVIEW_FILE" | ||
| 451 | |||
| 452 | # ============================================================================ | ||
| 453 | # Phase 5: Generate summary | ||
| 454 | # ============================================================================ | ||
| 455 | |||
| 456 | log_info "Generating summary..." | ||
| 457 | |||
| 458 | TOTAL_READY=${#READY_LINES[@]} | ||
| 459 | TOTAL_RESYNC=${#RESYNC_LINES[@]} | ||
| 460 | TOTAL_REVIEW=${#REVIEW_LINES[@]} | ||
| 461 | TOTAL=$((TOTAL_READY + TOTAL_RESYNC + TOTAL_REVIEW)) | ||
| 462 | |||
| 463 | # Calculate percentages | ||
| 464 | if [[ $TOTAL -gt 0 ]]; then | ||
| 465 | PCT_READY=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_READY / $TOTAL) * 100}") | ||
| 466 | PCT_RESYNC=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_RESYNC / $TOTAL) * 100}") | ||
| 467 | PCT_REVIEW=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_REVIEW / $TOTAL) * 100}") | ||
| 468 | else | ||
| 469 | PCT_READY="0.0" | ||
| 470 | PCT_RESYNC="0.0" | ||
| 471 | PCT_REVIEW="0.0" | ||
| 472 | fi | ||
| 473 | |||
| 474 | { | ||
| 475 | echo "# Migration Classification Summary" | ||
| 476 | echo "Generated: $TIMESTAMP" | ||
| 477 | echo "Analysis Directory: $ANALYSIS_DIR" | ||
| 478 | echo "" | ||
| 479 | echo "## Overview" | ||
| 480 | echo "" | ||
| 481 | echo "| Category | Count | Percentage |" | ||
| 482 | echo "|----------|-------|------------|" | ||
| 483 | echo "| Ready for Migration | $TOTAL_READY | $PCT_READY% |" | ||
| 484 | echo "| Needs Re-sync | $TOTAL_RESYNC | $PCT_RESYNC% |" | ||
| 485 | echo "| Manual Review | $TOTAL_REVIEW | $PCT_REVIEW% |" | ||
| 486 | echo "| **Total** | **$TOTAL** | **100%** |" | ||
| 487 | echo "" | ||
| 488 | echo "## Tier 1: Ready for Migration ($TOTAL_READY repos)" | ||
| 489 | echo "" | ||
| 490 | echo "These repositories are ready for migration or don't need migration:" | ||
| 491 | echo "" | ||
| 492 | echo "| Reason | Count |" | ||
| 493 | echo "|--------|-------|" | ||
| 494 | echo "| complete in both prod and archive | ${COUNTS[ready_complete_both]} |" | ||
| 495 | echo "| deleted by user | ${COUNTS[ready_deleted]} |" | ||
| 496 | echo "| empty in prod (user never pushed) | ${COUNTS[ready_empty_prod]} |" | ||
| 497 | echo "| archive-only (not in prod) | ${COUNTS[ready_archive_only]} |" | ||
| 498 | echo "| purgatory-only (not in prod) | ${COUNTS[ready_not_in_prod]} |" | ||
| 499 | echo "" | ||
| 500 | echo "## Tier 2: Needs Re-sync ($TOTAL_RESYNC repos)" | ||
| 501 | echo "" | ||
| 502 | echo "These repositories need re-sync to archive before migration:" | ||
| 503 | echo "" | ||
| 504 | echo "| Reason | Count | Action |" | ||
| 505 | echo "|--------|-------|--------|" | ||
| 506 | echo "| complete in prod, missing from archive | ${COUNTS[resync_missing_archive]} | trigger re-sync |" | ||
| 507 | echo "| complete in prod, incomplete in archive | ${COUNTS[resync_incomplete_archive]} | trigger re-sync |" | ||
| 508 | echo "" | ||
| 509 | echo "### Purgatory Context" | ||
| 510 | echo "" | ||
| 511 | echo "Repos in needs-resync.txt include purgatory context:" | ||
| 512 | echo "- **purgatory-expired**: Archive tried to sync but failed (30min timeout)" | ||
| 513 | echo "- **none**: Archive never tried or announcement missing" | ||
| 514 | echo "" | ||
| 515 | echo "## Tier 3: Manual Review ($TOTAL_REVIEW repos)" | ||
| 516 | echo "" | ||
| 517 | echo "These repositories require human investigation:" | ||
| 518 | echo "" | ||
| 519 | echo "| Reason | Count |" | ||
| 520 | echo "|--------|-------|" | ||
| 521 | echo "| partial in prod (cat3) | ${COUNTS[review_partial_prod]} |" | ||
| 522 | echo "| no-match in prod (cat4) | ${COUNTS[review_nomatch_prod]} |" | ||
| 523 | echo "| complete in prod with parse failure | ${COUNTS[review_parse_failure]} |" | ||
| 524 | echo "" | ||
| 525 | echo "## Input Data Summary" | ||
| 526 | echo "" | ||
| 527 | echo "### Prod Categories" | ||
| 528 | echo "- Category 1 (complete): $(wc -l < "$PROD_DIR/category1-complete-match.txt")" | ||
| 529 | echo "- Category 2 (empty): $(wc -l < "$PROD_DIR/category2-empty-blank.txt")" | ||
| 530 | echo "- Category 3 (partial): $(wc -l < "$PROD_DIR/category3-partial-match.txt")" | ||
| 531 | echo "- Category 4 (no match): $(wc -l < "$PROD_DIR/category4-no-match.txt")" | ||
| 532 | echo "" | ||
| 533 | echo "### Archive Categories" | ||
| 534 | echo "- Category 1 (complete): $(wc -l < "$ARCHIVE_DIR/category1-complete-match.txt")" | ||
| 535 | echo "- Category 2 (empty): $(wc -l < "$ARCHIVE_DIR/category2-empty-blank.txt")" | ||
| 536 | echo "- Category 3 (partial): $(wc -l < "$ARCHIVE_DIR/category3-partial-match.txt")" | ||
| 537 | echo "- Category 4 (no match): $(wc -l < "$ARCHIVE_DIR/category4-no-match.txt")" | ||
| 538 | echo "" | ||
| 539 | echo "### Logs" | ||
| 540 | echo "- Parse failures: $(grep -c -v '^#' "$LOGS_DIR/parse-failures.txt" 2>/dev/null || echo 0)" | ||
| 541 | echo "- Purgatory expired: $(grep -c -v '^#' "$LOGS_DIR/purgatory-expired.txt" 2>/dev/null || echo 0)" | ||
| 542 | echo "" | ||
| 543 | echo "## Output Files" | ||
| 544 | echo "" | ||
| 545 | echo "- \`results/ready-for-migration.txt\` - $TOTAL_READY repos ready for migration" | ||
| 546 | echo "- \`results/needs-resync.txt\` - $TOTAL_RESYNC repos needing re-sync" | ||
| 547 | echo "- \`results/manual-review.txt\` - $TOTAL_REVIEW repos needing investigation" | ||
| 548 | echo "- \`results/summary.txt\` - This summary file" | ||
| 549 | echo "" | ||
| 550 | echo "## Recommended Next Steps" | ||
| 551 | echo "" | ||
| 552 | echo "1. **Review needs-resync.txt** - Trigger re-sync for these repos" | ||
| 553 | echo "2. **Review manual-review.txt** - Investigate unusual states" | ||
| 554 | echo "3. **Verify ready-for-migration.txt** - Spot-check a few repos" | ||
| 555 | echo "4. **Plan migration window** - Schedule cutover when action items resolved" | ||
| 556 | } > "$SUMMARY_FILE" | ||
| 557 | |||
| 558 | # ============================================================================ | ||
| 559 | # Phase 6: Print summary to console | ||
| 560 | # ============================================================================ | ||
| 561 | |||
| 562 | echo "" | ||
| 563 | log_success "Classification complete!" | ||
| 564 | echo "" | ||
| 565 | echo "=== Summary ===" | ||
| 566 | echo "Ready for Migration: $TOTAL_READY ($PCT_READY%)" | ||
| 567 | echo " - Complete in both: ${COUNTS[ready_complete_both]}" | ||
| 568 | echo " - Deleted by user: ${COUNTS[ready_deleted]}" | ||
| 569 | echo " - Empty in prod: ${COUNTS[ready_empty_prod]}" | ||
| 570 | echo " - Archive-only: ${COUNTS[ready_archive_only]}" | ||
| 571 | echo " - Purgatory-only: ${COUNTS[ready_not_in_prod]}" | ||
| 572 | echo "" | ||
| 573 | echo "Needs Re-sync: $TOTAL_RESYNC ($PCT_RESYNC%)" | ||
| 574 | echo " - Missing from archive: ${COUNTS[resync_missing_archive]}" | ||
| 575 | echo " - Incomplete in archive: ${COUNTS[resync_incomplete_archive]}" | ||
| 576 | echo "" | ||
| 577 | echo "Manual Review: $TOTAL_REVIEW ($PCT_REVIEW%)" | ||
| 578 | echo " - Partial in prod: ${COUNTS[review_partial_prod]}" | ||
| 579 | echo " - No-match in prod: ${COUNTS[review_nomatch_prod]}" | ||
| 580 | echo " - Parse failures: ${COUNTS[review_parse_failure]}" | ||
| 581 | echo "" | ||
| 582 | echo "Total: $TOTAL repos" | ||
| 583 | echo "" | ||
| 584 | echo "Output files:" | ||
| 585 | echo " $READY_FILE" | ||
| 586 | echo " $RESYNC_FILE" | ||
| 587 | echo " $REVIEW_FILE" | ||
| 588 | echo " $SUMMARY_FILE" | ||