upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/docs/how-to/migration-scripts/40-classify-actions.sh
blob: 81559aa1a7e29231cde896be96130a88e8283c7d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
#!/usr/bin/env bash
#
# 40-classify-actions.sh - Classify repos by migration action required
#
# Implements the redesigned classification system (Option B) with user feedback:
#
# Tier 1: No Action Required (ready-for-migration.txt)
#   - Complete in both (prod=cat1, archive=cat1)
#   - Deleted by user (kind 5 event)
#   - Empty in prod (prod=cat2, any archive status)
#   - Archive-only (archive=any, prod=missing)
#   - Not in prod (purgatory-only, prod=missing)
#
# Tier 2: Action Required (needs-resync.txt)
#   - Complete in prod, missing from archive (with purgatory context)
#   - Complete in prod, incomplete in archive (with purgatory context)
#
# Tier 3: Manual Investigation (manual-review.txt)
#   - Partial in prod (prod=cat3)
#   - No-match in prod (prod=cat4)
#   - Parse failures
#   - Conflicting states
#
# Usage: ./40-classify-actions.sh <analysis-dir>
#
# Output format: repo | npub | prod_status | archive_status | context | action
#

set -euo pipefail

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

log_info() { echo -e "${BLUE}[INFO]${NC} $*"; }
log_success() { echo -e "${GREEN}[OK]${NC} $*"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; }

# Check arguments
if [[ $# -lt 1 ]]; then
    echo "Usage: $0 <analysis-dir>"
    echo "Example: $0 work/migration-analysis-20260123-200701"
    exit 1
fi

ANALYSIS_DIR="$1"

# Validate analysis directory
if [[ ! -d "$ANALYSIS_DIR" ]]; then
    log_error "Analysis directory not found: $ANALYSIS_DIR"
    exit 1
fi

# Define paths
PROD_DIR="$ANALYSIS_DIR/prod"
ARCHIVE_DIR="$ANALYSIS_DIR/archive"
COMPARISON_DIR="$ANALYSIS_DIR/comparison"
LOGS_DIR="$ANALYSIS_DIR/logs"
RESULTS_DIR="$ANALYSIS_DIR/results"

# Validate required directories
for dir in "$PROD_DIR" "$ARCHIVE_DIR" "$COMPARISON_DIR" "$LOGS_DIR"; do
    if [[ ! -d "$dir" ]]; then
        log_error "Required directory not found: $dir"
        exit 1
    fi
done

# Create results directory
mkdir -p "$RESULTS_DIR"

# Output files
READY_FILE="$RESULTS_DIR/ready-for-migration.txt"
RESYNC_FILE="$RESULTS_DIR/needs-resync.txt"
REVIEW_FILE="$RESULTS_DIR/manual-review.txt"
SUMMARY_FILE="$RESULTS_DIR/summary.txt"

# Temporary files for processing
TMP_DIR=$(mktemp -d)
trap 'rm -rf "$TMP_DIR"' EXIT

log_info "Starting classification with revised system (Option B)"
log_info "Analysis directory: $ANALYSIS_DIR"

# ============================================================================
# Phase 1: Build lookup tables from source data
# ============================================================================

log_info "Building lookup tables..."

# Build prod category lookup: repo|npub -> category
declare -A PROD_CAT
while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do
    repo="${repo// /}"  # Remove all spaces
    npub="${npub// /}"  # Remove all spaces
    [[ -z "$repo" || -z "$npub" ]] && continue
    PROD_CAT["$repo|$npub"]="cat1"
done < "$PROD_DIR/category1-complete-match.txt"

while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do
    repo="${repo// /}"
    npub="${npub// /}"
    [[ -z "$repo" || -z "$npub" ]] && continue
    PROD_CAT["$repo|$npub"]="cat2"
done < "$PROD_DIR/category2-empty-blank.txt"

while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do
    repo="${repo// /}"
    npub="${npub// /}"
    [[ -z "$repo" || -z "$npub" ]] && continue
    PROD_CAT["$repo|$npub"]="cat3"
done < "$PROD_DIR/category3-partial-match.txt"

while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do
    repo="${repo// /}"
    npub="${npub// /}"
    [[ -z "$repo" || -z "$npub" ]] && continue
    PROD_CAT["$repo|$npub"]="cat4"
done < "$PROD_DIR/category4-no-match.txt"

log_info "Loaded ${#PROD_CAT[@]} prod entries"

# Build archive category lookup: repo|npub -> category
declare -A ARCHIVE_CAT
while IFS='|' read -r repo npub rest; do
    repo="${repo// /}"
    npub="${npub// /}"
    [[ -z "$repo" || -z "$npub" ]] && continue
    ARCHIVE_CAT["$repo|$npub"]="cat1"
done < "$ARCHIVE_DIR/category1-complete-match.txt"

while IFS='|' read -r repo npub rest; do
    repo="${repo// /}"
    npub="${npub// /}"
    [[ -z "$repo" || -z "$npub" ]] && continue
    ARCHIVE_CAT["$repo|$npub"]="cat2"
done < "$ARCHIVE_DIR/category2-empty-blank.txt"

while IFS='|' read -r repo npub rest; do
    repo="${repo// /}"
    npub="${npub// /}"
    [[ -z "$repo" || -z "$npub" ]] && continue
    ARCHIVE_CAT["$repo|$npub"]="cat3"
done < "$ARCHIVE_DIR/category3-partial-match.txt"

while IFS='|' read -r repo npub rest; do
    repo="${repo// /}"
    npub="${npub// /}"
    [[ -z "$repo" || -z "$npub" ]] && continue
    ARCHIVE_CAT["$repo|$npub"]="cat4"
done < "$ARCHIVE_DIR/category4-no-match.txt"

log_info "Loaded ${#ARCHIVE_CAT[@]} archive entries"

# Build purgatory lookup: repo|npub -> 1 (if purgatory expired)
declare -A PURGATORY
PURGATORY_COUNT=0
if [[ -f "$LOGS_DIR/purgatory-expired.txt" ]]; then
    while IFS=$'\t' read -r repo npub timestamp reason || [[ -n "$repo" ]]; do
        # Skip comments and empty lines
        [[ "$repo" =~ ^# ]] && continue
        [[ -z "$repo" || -z "$npub" ]] && continue
        PURGATORY["$repo|$npub"]=1
        ((PURGATORY_COUNT++))
    done < "$LOGS_DIR/purgatory-expired.txt"
fi
log_info "Loaded $PURGATORY_COUNT purgatory entries"

# Build parse failure lookup: repo|npub -> 1 (if parse failure logged)
# Parse failures file format: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub
declare -A PARSE_FAIL
PARSE_FAIL_COUNT=0
if [[ -f "$LOGS_DIR/parse-failures.txt" ]]; then
    while IFS=$'\t' read -r event_id kind reason repo npub || [[ -n "$event_id" ]]; do
        # Skip comments and empty lines
        [[ "$event_id" =~ ^# ]] && continue
        [[ -z "$repo" || -z "$npub" ]] && continue
        PARSE_FAIL["$repo|$npub"]=1
        ((PARSE_FAIL_COUNT++))
    done < "$LOGS_DIR/parse-failures.txt"
fi
log_info "Loaded $PARSE_FAIL_COUNT parse failure entries"

# Build deletion lookup: repo|npub -> 1 (if kind 5 deletion event)
# Deletions are in NDJSON format with "a" tags like "30617:pubkey_hex:repo"
# We need to convert hex pubkeys to npub format using nak
declare -A DELETED

# Helper function to process deletion file (NDJSON format)
# Extracts unique pubkey_hex:repo pairs and converts to npub
process_deletions() {
    local file="$1"
    [[ ! -f "$file" ]] && return
    
    # Extract unique pubkey_hex|repo pairs from NDJSON
    # Each line is a JSON object, extract "a" tags
    local pairs
    pairs=$(jq -r '.tags[] | select(.[0] == "a") | .[1]' "$file" 2>/dev/null | \
            sed 's/^30617://' | awk -F: '{print $1 "|" $2}' | sort -u)
    
    # Get unique hex pubkeys for batch conversion
    local hex_keys
    hex_keys=$(echo "$pairs" | cut -d'|' -f1 | sort -u)
    
    # Build hex->npub lookup via batch nak call
    declare -A HEX_TO_NPUB
    while read -r hex; do
        [[ -z "$hex" ]] && continue
        local npub
        npub=$(nak encode npub "$hex" 2>/dev/null || echo "")
        [[ -n "$npub" ]] && HEX_TO_NPUB["$hex"]="$npub"
    done <<< "$hex_keys"
    
    # Now process pairs with cached npub values
    while IFS='|' read -r pubkey_hex repo; do
        [[ -z "$repo" || -z "$pubkey_hex" ]] && continue
        local npub="${HEX_TO_NPUB[$pubkey_hex]:-}"
        [[ -z "$npub" ]] && continue
        DELETED["$repo|$npub"]=1
    done <<< "$pairs"
}

# Process prod and archive deletions
process_deletions "$PROD_DIR/raw/deletions.json"
process_deletions "$ARCHIVE_DIR/raw/deletions.json"
DELETED_COUNT=0
[[ ${#DELETED[@]} -gt 0 ]] && DELETED_COUNT=${#DELETED[@]}
log_info "Loaded $DELETED_COUNT deletion entries"

# ============================================================================
# Phase 2: Build unique repo list from all sources
# ============================================================================

log_info "Building unique repo list..."

declare -A ALL_REPOS
for key in "${!PROD_CAT[@]}"; do
    ALL_REPOS["$key"]=1
done
for key in "${!ARCHIVE_CAT[@]}"; do
    ALL_REPOS["$key"]=1
done
for key in "${!PURGATORY[@]}"; do
    ALL_REPOS["$key"]=1
done

log_info "Total unique repos: ${#ALL_REPOS[@]}"

# ============================================================================
# Phase 3: Classify each repo according to revised decision tree
# ============================================================================

log_info "Classifying repos..."

# Counters for summary
declare -A COUNTS
COUNTS[ready_complete_both]=0
COUNTS[ready_deleted]=0
COUNTS[ready_empty_prod]=0
COUNTS[ready_archive_only]=0
COUNTS[ready_not_in_prod]=0
COUNTS[resync_missing_archive]=0
COUNTS[resync_incomplete_archive]=0
COUNTS[review_partial_prod]=0
COUNTS[review_nomatch_prod]=0
COUNTS[review_parse_failure]=0
COUNTS[review_conflicting]=0

# Output arrays
declare -a READY_LINES
declare -a RESYNC_LINES
declare -a REVIEW_LINES

# Helper function to get context string
get_context() {
    local key="$1"
    local prod_status="$2"
    local archive_status="$3"
    local context=""
    
    # Check purgatory
    if [[ -n "${PURGATORY[$key]:-}" ]]; then
        context="purgatory-expired"
    fi
    
    # Check parse failure
    if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then
        if [[ -n "$context" ]]; then
            context="$context, parse-failure"
        else
            context="parse-failure"
        fi
    fi
    
    # Add archive context for unexpected states
    if [[ "$prod_status" == "empty" && "$archive_status" != "missing" && "$archive_status" != "empty" ]]; then
        if [[ -n "$context" ]]; then
            context="$context, archive-has-data"
        else
            context="archive-has-data"
        fi
    fi
    
    echo "${context:-none}"
}

# Helper to convert category to human-readable status
cat_to_status() {
    case "$1" in
        cat1) echo "complete" ;;
        cat2) echo "empty" ;;
        cat3) echo "partial" ;;
        cat4) echo "no-match" ;;
        missing) echo "missing" ;;
        *) echo "$1" ;;
    esac
}

LOOP_COUNT=0
for key in "${!ALL_REPOS[@]}"; do
    LOOP_COUNT=$((LOOP_COUNT + 1))
    [[ $((LOOP_COUNT % 100)) -eq 0 ]] && log_info "Processed $LOOP_COUNT repos..."
    IFS='|' read -r repo npub <<< "$key"
    
    prod_cat="${PROD_CAT[$key]:-missing}"
    archive_cat="${ARCHIVE_CAT[$key]:-missing}"
    prod_status=$(cat_to_status "$prod_cat")
    archive_status=$(cat_to_status "$archive_cat")
    
    # Decision tree implementation
    
    # 1. Is there a kind 5 deletion event?
    if [[ -n "${DELETED[$key]:-}" ]]; then
        context=$(get_context "$key" "$prod_status" "$archive_status")
        READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | deleted by user")
        COUNTS[ready_deleted]=$((COUNTS[ready_deleted] + 1))
        continue
    fi
    
    # 2. What is the prod status?
    case "$prod_cat" in
        missing)
            # Not in prod
            if [[ "$archive_cat" != "missing" ]]; then
                # In archive but not in prod -> no action (archive-only)
                context=$(get_context "$key" "$prod_status" "$archive_status")
                READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive-only (not in prod)")
                COUNTS[ready_archive_only]=$((COUNTS[ready_archive_only] + 1))
            elif [[ -n "${PURGATORY[$key]:-}" ]]; then
                # Purgatory only, not in prod -> no action
                context="purgatory-expired"
                READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | purgatory-only (not in prod)")
                COUNTS[ready_not_in_prod]=$((COUNTS[ready_not_in_prod] + 1))
            fi
            # Otherwise skip (not a real repo - no data anywhere)
            ;;
            
        cat2)
            # Empty in prod -> ALWAYS no action required
            context=$(get_context "$key" "$prod_status" "$archive_status")
            READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | empty in prod (user never pushed)")
            COUNTS[ready_empty_prod]=$((COUNTS[ready_empty_prod] + 1))
            ;;
            
        cat1)
            # Complete in prod
            if [[ "$archive_cat" == "cat1" ]]; then
                # Complete in both -> no action
                context=$(get_context "$key" "$prod_status" "$archive_status")
                READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in both")
                COUNTS[ready_complete_both]=$((COUNTS[ready_complete_both] + 1))
            else
                # Complete in prod, missing/incomplete in archive
                # Check for parse failure - if so, needs manual review
                if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then
                    context=$(get_context "$key" "$prod_status" "$archive_status")
                    REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in prod with parse failure")
                    COUNTS[review_parse_failure]=$((COUNTS[review_parse_failure] + 1))
                else
                    # Needs resync - include purgatory context
                    context=$(get_context "$key" "$prod_status" "$archive_status")
                    if [[ "$archive_cat" == "missing" ]]; then
                        RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync to archive")
                        COUNTS[resync_missing_archive]=$((COUNTS[resync_missing_archive] + 1))
                    else
                        RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync (archive incomplete)")
                        COUNTS[resync_incomplete_archive]=$((COUNTS[resync_incomplete_archive] + 1))
                    fi
                fi
            fi
            ;;
            
        cat3)
            # Partial in prod -> ALWAYS manual investigation
            context=$(get_context "$key" "$prod_status" "$archive_status")
            REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | partial in prod (investigate git data)")
            COUNTS[review_partial_prod]=$((COUNTS[review_partial_prod] + 1))
            ;;
            
        cat4)
            # No-match in prod -> ALWAYS manual investigation
            context=$(get_context "$key" "$prod_status" "$archive_status")
            REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | no-match in prod (git corruption)")
            COUNTS[review_nomatch_prod]=$((COUNTS[review_nomatch_prod] + 1))
            ;;
    esac
done

# ============================================================================
# Phase 4: Write output files
# ============================================================================

log_info "Writing output files..."

TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%S+00:00")

# Write ready-for-migration.txt
{
    echo "# Ready for Migration - No action required"
    echo "# Generated: $TIMESTAMP"
    echo "# Format: repo | npub | prod_status | archive_status | context | reason"
    echo "#"
    for line in "${READY_LINES[@]}"; do
        echo "$line"
    done
} > "$READY_FILE"

# Write needs-resync.txt
{
    echo "# Needs Re-sync - Action required"
    echo "# Generated: $TIMESTAMP"
    echo "# Format: repo | npub | prod_status | archive_status | context | action"
    echo "#"
    echo "# Context meanings:"
    echo "#   purgatory-expired = archive tried to sync but failed (30min timeout)"
    echo "#   none = archive never tried or announcement missing"
    echo "#"
    for line in "${RESYNC_LINES[@]}"; do
        echo "$line"
    done
} > "$RESYNC_FILE"

# Write manual-review.txt
{
    echo "# Manual Review Required - Investigation needed"
    echo "# Generated: $TIMESTAMP"
    echo "# Format: repo | npub | prod_status | archive_status | context | reason"
    echo "#"
    for line in "${REVIEW_LINES[@]}"; do
        echo "$line"
    done
} > "$REVIEW_FILE"

# ============================================================================
# Phase 5: Generate summary
# ============================================================================

log_info "Generating summary..."

TOTAL_READY="${#READY_LINES[@]}"
TOTAL_RESYNC="${#RESYNC_LINES[@]}"
TOTAL_REVIEW="${#REVIEW_LINES[@]}"
TOTAL=$((TOTAL_READY + TOTAL_RESYNC + TOTAL_REVIEW))

# Calculate percentages
if [[ $TOTAL -gt 0 ]]; then
    PCT_READY=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_READY / $TOTAL) * 100}")
    PCT_RESYNC=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_RESYNC / $TOTAL) * 100}")
    PCT_REVIEW=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_REVIEW / $TOTAL) * 100}")
else
    PCT_READY="0.0"
    PCT_RESYNC="0.0"
    PCT_REVIEW="0.0"
fi

{
    echo "# Migration Classification Summary"
    echo "Generated: $TIMESTAMP"
    echo "Analysis Directory: $ANALYSIS_DIR"
    echo ""
    echo "## Overview"
    echo ""
    echo "| Category | Count | Percentage |"
    echo "|----------|-------|------------|"
    echo "| Ready for Migration | $TOTAL_READY | $PCT_READY% |"
    echo "| Needs Re-sync | $TOTAL_RESYNC | $PCT_RESYNC% |"
    echo "| Manual Review | $TOTAL_REVIEW | $PCT_REVIEW% |"
    echo "| **Total** | **$TOTAL** | **100%** |"
    echo ""
    echo "## Tier 1: Ready for Migration ($TOTAL_READY repos)"
    echo ""
    echo "These repositories are ready for migration or don't need migration:"
    echo ""
    echo "| Reason | Count |"
    echo "|--------|-------|"
    echo "| complete in both prod and archive | ${COUNTS[ready_complete_both]} |"
    echo "| deleted by user | ${COUNTS[ready_deleted]} |"
    echo "| empty in prod (user never pushed) | ${COUNTS[ready_empty_prod]} |"
    echo "| archive-only (not in prod) | ${COUNTS[ready_archive_only]} |"
    echo "| purgatory-only (not in prod) | ${COUNTS[ready_not_in_prod]} |"
    echo ""
    echo "## Tier 2: Needs Re-sync ($TOTAL_RESYNC repos)"
    echo ""
    echo "These repositories need re-sync to archive before migration:"
    echo ""
    echo "| Reason | Count | Action |"
    echo "|--------|-------|--------|"
    echo "| complete in prod, missing from archive | ${COUNTS[resync_missing_archive]} | trigger re-sync |"
    echo "| complete in prod, incomplete in archive | ${COUNTS[resync_incomplete_archive]} | trigger re-sync |"
    echo ""
    echo "### Purgatory Context"
    echo ""
    echo "Repos in needs-resync.txt include purgatory context:"
    echo "- **purgatory-expired**: Archive tried to sync but failed (30min timeout)"
    echo "- **none**: Archive never tried or announcement missing"
    echo ""
    echo "## Tier 3: Manual Review ($TOTAL_REVIEW repos)"
    echo ""
    echo "These repositories require human investigation:"
    echo ""
    echo "| Reason | Count |"
    echo "|--------|-------|"
    echo "| partial in prod (cat3) | ${COUNTS[review_partial_prod]} |"
    echo "| no-match in prod (cat4) | ${COUNTS[review_nomatch_prod]} |"
    echo "| complete in prod with parse failure | ${COUNTS[review_parse_failure]} |"
    echo ""
    echo "## Input Data Summary"
    echo ""
    echo "### Prod Categories"
    echo "- Category 1 (complete): $(wc -l < "$PROD_DIR/category1-complete-match.txt")"
    echo "- Category 2 (empty): $(wc -l < "$PROD_DIR/category2-empty-blank.txt")"
    echo "- Category 3 (partial): $(wc -l < "$PROD_DIR/category3-partial-match.txt")"
    echo "- Category 4 (no match): $(wc -l < "$PROD_DIR/category4-no-match.txt")"
    echo ""
    echo "### Archive Categories"
    echo "- Category 1 (complete): $(wc -l < "$ARCHIVE_DIR/category1-complete-match.txt")"
    echo "- Category 2 (empty): $(wc -l < "$ARCHIVE_DIR/category2-empty-blank.txt")"
    echo "- Category 3 (partial): $(wc -l < "$ARCHIVE_DIR/category3-partial-match.txt")"
    echo "- Category 4 (no match): $(wc -l < "$ARCHIVE_DIR/category4-no-match.txt")"
    echo ""
    echo "### Logs"
    echo "- Parse failures: $(grep -c -v '^#' "$LOGS_DIR/parse-failures.txt" 2>/dev/null || echo 0)"
    echo "- Purgatory expired: $(grep -c -v '^#' "$LOGS_DIR/purgatory-expired.txt" 2>/dev/null || echo 0)"
    echo ""
    echo "## Output Files"
    echo ""
    echo "- \`results/ready-for-migration.txt\` - $TOTAL_READY repos ready for migration"
    echo "- \`results/needs-resync.txt\` - $TOTAL_RESYNC repos needing re-sync"
    echo "- \`results/manual-review.txt\` - $TOTAL_REVIEW repos needing investigation"
    echo "- \`results/summary.txt\` - This summary file"
    echo ""
    echo "## Recommended Next Steps"
    echo ""
    echo "1. **Review needs-resync.txt** - Trigger re-sync for these repos"
    echo "2. **Review manual-review.txt** - Investigate unusual states"
    echo "3. **Verify ready-for-migration.txt** - Spot-check a few repos"
    echo "4. **Plan migration window** - Schedule cutover when action items resolved"
} > "$SUMMARY_FILE"

# ============================================================================
# Phase 6: Print summary to console
# ============================================================================

echo ""
log_success "Classification complete!"
echo ""
echo "=== Summary ==="
echo "Ready for Migration: $TOTAL_READY ($PCT_READY%)"
echo "  - Complete in both: ${COUNTS[ready_complete_both]}"
echo "  - Deleted by user: ${COUNTS[ready_deleted]}"
echo "  - Empty in prod: ${COUNTS[ready_empty_prod]}"
echo "  - Archive-only: ${COUNTS[ready_archive_only]}"
echo "  - Purgatory-only: ${COUNTS[ready_not_in_prod]}"
echo ""
echo "Needs Re-sync: $TOTAL_RESYNC ($PCT_RESYNC%)"
echo "  - Missing from archive: ${COUNTS[resync_missing_archive]}"
echo "  - Incomplete in archive: ${COUNTS[resync_incomplete_archive]}"
echo ""
echo "Manual Review: $TOTAL_REVIEW ($PCT_REVIEW%)"
echo "  - Partial in prod: ${COUNTS[review_partial_prod]}"
echo "  - No-match in prod: ${COUNTS[review_nomatch_prod]}"
echo "  - Parse failures: ${COUNTS[review_parse_failure]}"
echo ""
echo "Total: $TOTAL repos"
echo ""
echo "Output files:"
echo "  $READY_FILE"
echo "  $RESYNC_FILE"
echo "  $REVIEW_FILE"
echo "  $SUMMARY_FILE"