upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/docs/how-to/migration-scripts
diff options
context:
space:
mode:
authorDanConwayDev <DanConwayDev@protonmail.com>2026-01-23 11:41:12 +0000
committerDanConwayDev <DanConwayDev@protonmail.com>2026-01-27 20:37:58 +0000
commit323336c84613b74921ebc75bf46ccd2d8f9cd6cc (patch)
tree81381119ffa818160b6ec3faa45a9e97d2fa8d04 /docs/how-to/migration-scripts
parent73a366cbd7be4edf9c74194cd0891c80a15236a5 (diff)
Add Phase 5 migration script for final classification
- Combines all data sources from Phases 1-4 - Produces three actionable outputs: no-action, action-required, manual-investigation - Generates comprehensive summary with recommendations - Handles missing Phase 4 logs gracefully - Classification logic for migration decision-making
Diffstat (limited to 'docs/how-to/migration-scripts')
-rwxr-xr-xdocs/how-to/migration-scripts/40-classify-actions.sh770
1 files changed, 770 insertions, 0 deletions
diff --git a/docs/how-to/migration-scripts/40-classify-actions.sh b/docs/how-to/migration-scripts/40-classify-actions.sh
new file mode 100755
index 0000000..9fc718f
--- /dev/null
+++ b/docs/how-to/migration-scripts/40-classify-actions.sh
@@ -0,0 +1,770 @@
1#!/usr/bin/env bash
2#
3# 40-classify-actions.sh - Final classification of repos for migration action
4#
5# PHASE 5 of the ngit-relay to ngit-grasp migration analysis pipeline.
6# Combines all data sources from previous phases to produce actionable results.
7#
8# USAGE:
9# ./40-classify-actions.sh <analysis-dir>
10#
11# EXAMPLES:
12# ./40-classify-actions.sh work/migration-analysis-20260122-1430
13#
14# INPUT DIRECTORY STRUCTURE:
15# <analysis-dir>/
16# ├── prod/
17# │ ├── raw/
18# │ │ └── deletions.json # Phase 1: kind 5 deletion events
19# │ ├── category1-complete-match.txt # Phase 3: complete git sync
20# │ ├── category2-empty-blank.txt # Phase 3: no git data
21# │ ├── category3-partial-match.txt # Phase 3: partial git sync
22# │ └── category4-no-match.txt # Phase 3: git exists, refs don't match
23# ├── archive/
24# │ ├── raw/
25# │ │ └── deletions.json
26# │ ├── category1-complete-match.txt
27# │ ├── category2-empty-blank.txt
28# │ ├── category3-partial-match.txt
29# │ └── category4-no-match.txt
30# ├── comparison/
31# │ ├── complete-in-both.txt # Phase 3: no action needed
32# │ ├── complete-prod-missing-archive.txt # Phase 3: needs investigation
33# │ ├── complete-prod-incomplete-archive.txt # Phase 3: sync in progress?
34# │ ├── incomplete-in-both.txt # Phase 3: git incomplete
35# │ └── in-archive-not-prod.txt # Phase 3: deleted or new
36# └── logs/
37# ├── parse-failures.txt # Phase 4: events that failed to parse
38# └── purgatory-expired.txt # Phase 4: repos that expired from purgatory
39#
40# OUTPUT:
41# <analysis-dir>/results/
42# ├── no-action-required.txt # Repos that are fine as-is
43# ├── action-required.txt # Repos needing intervention
44# ├── manual-investigation.txt # Repos needing human review
45# └── summary.txt # Human-readable summary
46#
47# OUTPUT FORMATS:
48# no-action-required.txt:
49# repo | npub | reason
50#
51# action-required.txt:
52# repo | npub | reason | suggested_action
53#
54# manual-investigation.txt:
55# repo | npub | reason | context
56#
57# CLASSIFICATION LOGIC:
58#
59# NO ACTION REQUIRED:
60# - Complete in both prod and archive (successfully migrated)
61# - Empty/blank in both (user never pushed any data)
62# - Deleted by user (kind 5 deletion event exists)
63# - In purgatory expiry logs (system already handled it)
64#
65# ACTION REQUIRED:
66# - Complete in prod, missing from archive → Re-sync needed
67# - Complete in prod, incomplete in archive → Wait for sync or re-trigger
68# - Partial match in prod → Investigate why refs don't match
69# - No match (category 4) → Investigate git data corruption
70# - Parse failures → Fix event format or re-announce
71#
72# MANUAL INVESTIGATION:
73# - Conflicting states (e.g., complete in prod but parse failure logged)
74# - In archive but not prod (deleted? or new announcement?)
75# - Multiple issues for same repo
76# - Unexpected state combinations
77#
78# PREREQUISITES:
79# - jq (for parsing JSON)
80# - awk, sort, comm (standard Unix tools)
81#
82# RUNTIME: < 5 seconds (local processing only)
83#
84# SEE ALSO:
85# docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide
86# 01-fetch-events.sh - Phase 1 (fetch events)
87# 10-check-git-sync.sh - Phase 2 (git sync check)
88# 20-categorize.sh, 21-compare-relays.sh - Phase 3 (categorize and compare)
89# 30-extract-parse-failures.sh, 31-extract-purgatory-expiry.sh - Phase 4 (logs)
90#
91
92set -euo pipefail
93
94# Colors for output (disabled if not a terminal)
95if [[ -t 1 ]]; then
96 RED='\033[0;31m'
97 GREEN='\033[0;32m'
98 YELLOW='\033[0;33m'
99 BLUE='\033[0;34m'
100 BOLD='\033[1m'
101 NC='\033[0m'
102else
103 RED=''
104 GREEN=''
105 YELLOW=''
106 BLUE=''
107 BOLD=''
108 NC=''
109fi
110
111log_info() {
112 echo -e "${BLUE}[INFO]${NC} $*" >&2
113}
114
115log_success() {
116 echo -e "${GREEN}[OK]${NC} $*" >&2
117}
118
119log_warn() {
120 echo -e "${YELLOW}[WARN]${NC} $*" >&2
121}
122
123log_error() {
124 echo -e "${RED}[ERROR]${NC} $*" >&2
125}
126
127usage() {
128 echo "Usage: $0 <analysis-dir>"
129 echo ""
130 echo "Arguments:"
131 echo " analysis-dir Directory containing Phase 1-4 output"
132 echo ""
133 echo "Examples:"
134 echo " $0 work/migration-analysis-20260122-1430"
135 echo ""
136 echo "Required input structure:"
137 echo " <analysis-dir>/prod/category*.txt"
138 echo " <analysis-dir>/archive/category*.txt"
139 echo " <analysis-dir>/comparison/*.txt"
140 echo " <analysis-dir>/logs/*.txt (optional)"
141 echo " <analysis-dir>/prod/raw/deletions.json"
142 echo ""
143 echo "Output:"
144 echo " <analysis-dir>/results/no-action-required.txt"
145 echo " <analysis-dir>/results/action-required.txt"
146 echo " <analysis-dir>/results/manual-investigation.txt"
147 echo " <analysis-dir>/results/summary.txt"
148 exit 1
149}
150
151# Extract repo|npub key from category line
152# Input: "repo | npub | state_refs=N | ..."
153# Output: "repo|npub"
154extract_key() {
155 awk -F' \\| ' '{print $1 "|" $2}'
156}
157
158# Extract repo from category line
159# Input: "repo | npub | ..."
160# Output: "repo"
161extract_repo() {
162 awk -F' \\| ' '{print $1}'
163}
164
165# Extract npub from category line
166# Input: "repo | npub | ..."
167# Output: "npub"
168extract_npub() {
169 awk -F' \\| ' '{print $2}'
170}
171
172# Check if a file exists and has content (ignoring comment lines)
173file_has_content() {
174 local file="$1"
175 if [[ ! -f "$file" ]]; then
176 return 1
177 fi
178 # Check for non-comment, non-empty lines
179 grep -v '^#' "$file" 2>/dev/null | grep -q '.' 2>/dev/null
180}
181
182# Count non-comment lines in a file
183count_lines() {
184 local file="$1"
185 if [[ ! -f "$file" ]]; then
186 echo "0"
187 return
188 fi
189 local count
190 count=$(grep -v '^#' "$file" 2>/dev/null | grep -c '.' 2>/dev/null) || count=0
191 # Ensure we return a clean integer
192 echo "${count:-0}"
193}
194
195# Parse deletions.json to extract deleted repo identifiers
196# Kind 5 events have "e" tags pointing to the deleted event
197# We need to cross-reference with announcements to get repo/npub
198# For now, we extract the pubkey and any "a" tags (addressable event references)
199parse_deletions() {
200 local deletions_file="$1"
201 local output_file="$2"
202
203 if [[ ! -f "$deletions_file" ]]; then
204 touch "$output_file"
205 return
206 fi
207
208 # Extract deletion targets from kind 5 events
209 # Kind 5 events can reference:
210 # - "e" tag: specific event ID
211 # - "a" tag: addressable event (kind:pubkey:identifier)
212 # For 30617 announcements, "a" tag format is: 30617:<pubkey>:<repo-identifier>
213 jq -r '
214 select(.kind == 5) |
215 .pubkey as $pubkey |
216 .tags[] |
217 select(.[0] == "a") |
218 .[1] |
219 split(":") |
220 select(.[0] == "30617") |
221 "\(.[2])|\($pubkey)"
222 ' "$deletions_file" 2>/dev/null | sort -u > "$output_file" || touch "$output_file"
223}
224
225# Build a lookup set from a file (repo|npub format)
226# Returns keys one per line
227build_key_set() {
228 local file="$1"
229 if [[ ! -f "$file" ]]; then
230 return 0
231 fi
232 # Use || true to prevent pipefail from exiting on empty grep
233 { grep -v '^#' "$file" 2>/dev/null || true; } | extract_key | sort -u
234}
235
236# Main classification logic
237main() {
238 if [[ $# -ne 1 ]]; then
239 usage
240 fi
241
242 local analysis_dir="$1"
243
244 # Validate input directory
245 if [[ ! -d "$analysis_dir" ]]; then
246 log_error "Analysis directory not found: $analysis_dir"
247 exit 1
248 fi
249
250 # Check for required subdirectories
251 local prod_dir="$analysis_dir/prod"
252 local archive_dir="$analysis_dir/archive"
253 local comparison_dir="$analysis_dir/comparison"
254 local logs_dir="$analysis_dir/logs"
255 local results_dir="$analysis_dir/results"
256
257 for dir in "$prod_dir" "$archive_dir" "$comparison_dir"; do
258 if [[ ! -d "$dir" ]]; then
259 log_error "Required directory not found: $dir"
260 log_error "Run Phases 1-3 first to generate input data."
261 exit 1
262 fi
263 done
264
265 # Check for required category files
266 if [[ ! -f "$prod_dir/category1-complete-match.txt" ]]; then
267 log_error "Missing category files in $prod_dir"
268 log_error "Run Phase 3 (20-categorize.sh) first."
269 exit 1
270 fi
271
272 log_info "Starting final classification"
273 log_info "Analysis directory: $analysis_dir"
274
275 # Create output directory
276 mkdir -p "$results_dir"
277
278 # Create temp directory for intermediate files
279 local tmp_dir
280 tmp_dir=$(mktemp -d)
281 # shellcheck disable=SC2064
282 trap "rm -rf '$tmp_dir'" EXIT
283
284 # Initialize output files
285 local no_action="$results_dir/no-action-required.txt"
286 local action_req="$results_dir/action-required.txt"
287 local manual_inv="$results_dir/manual-investigation.txt"
288 local summary="$results_dir/summary.txt"
289
290 # Write headers
291 {
292 echo "# No Action Required - Repos that are fine as-is"
293 echo "# Generated: $(date -Iseconds)"
294 echo "# Format: repo | npub | reason"
295 echo "#"
296 } > "$no_action"
297
298 {
299 echo "# Action Required - Repos needing intervention"
300 echo "# Generated: $(date -Iseconds)"
301 echo "# Format: repo | npub | reason | suggested_action"
302 echo "#"
303 } > "$action_req"
304
305 {
306 echo "# Manual Investigation Required - Repos needing human review"
307 echo "# Generated: $(date -Iseconds)"
308 echo "# Format: repo | npub | reason | context"
309 echo "#"
310 } > "$manual_inv"
311
312 # =========================================================================
313 # STEP 1: Parse deletion events
314 # =========================================================================
315 log_info "Parsing deletion events..."
316
317 parse_deletions "$prod_dir/raw/deletions.json" "$tmp_dir/prod_deletions.txt"
318 parse_deletions "$archive_dir/raw/deletions.json" "$tmp_dir/archive_deletions.txt"
319
320 # Combine deletions (union of both)
321 cat "$tmp_dir/prod_deletions.txt" "$tmp_dir/archive_deletions.txt" 2>/dev/null | sort -u > "$tmp_dir/all_deletions.txt"
322
323 local deletion_count
324 deletion_count=$(wc -l < "$tmp_dir/all_deletions.txt" | tr -d ' ')
325 log_info "Found $deletion_count deletion requests"
326
327 # =========================================================================
328 # STEP 2: Parse log-based categories (Phase 4)
329 # =========================================================================
330 log_info "Parsing log-based categories..."
331
332 # Parse failures: repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason
333 if [[ -f "$logs_dir/parse-failures.txt" ]] && file_has_content "$logs_dir/parse-failures.txt"; then
334 grep -v '^#' "$logs_dir/parse-failures.txt" | awk -F'\t' '{print $1 "|" $2}' | sort -u > "$tmp_dir/parse_failures.txt"
335 log_info "Found $(wc -l < "$tmp_dir/parse_failures.txt" | tr -d ' ') parse failure entries"
336 else
337 touch "$tmp_dir/parse_failures.txt"
338 log_info "No parse failures found (logs may be empty or not yet generated)"
339 fi
340
341 # Purgatory expired: repo<TAB>npub<TAB>timestamp<TAB>reason
342 if [[ -f "$logs_dir/purgatory-expired.txt" ]] && file_has_content "$logs_dir/purgatory-expired.txt"; then
343 grep -v '^#' "$logs_dir/purgatory-expired.txt" | awk -F'\t' '{print $1 "|" $2}' | sort -u > "$tmp_dir/purgatory_expired.txt"
344 log_info "Found $(wc -l < "$tmp_dir/purgatory_expired.txt" | tr -d ' ') purgatory expiry entries"
345 else
346 touch "$tmp_dir/purgatory_expired.txt"
347 log_info "No purgatory expiry entries found (logs may be empty or not yet generated)"
348 fi
349
350 # =========================================================================
351 # STEP 3: Build lookup tables from category files
352 # =========================================================================
353 log_info "Building lookup tables..."
354
355 # Build key sets for each category (prod)
356 build_key_set "$prod_dir/category1-complete-match.txt" > "$tmp_dir/prod_cat1.txt"
357 build_key_set "$prod_dir/category2-empty-blank.txt" > "$tmp_dir/prod_cat2.txt"
358 build_key_set "$prod_dir/category3-partial-match.txt" > "$tmp_dir/prod_cat3.txt"
359 build_key_set "$prod_dir/category4-no-match.txt" > "$tmp_dir/prod_cat4.txt"
360
361 # Build key sets for each category (archive)
362 build_key_set "$archive_dir/category1-complete-match.txt" > "$tmp_dir/archive_cat1.txt"
363 build_key_set "$archive_dir/category2-empty-blank.txt" > "$tmp_dir/archive_cat2.txt"
364 build_key_set "$archive_dir/category3-partial-match.txt" > "$tmp_dir/archive_cat3.txt"
365 build_key_set "$archive_dir/category4-no-match.txt" > "$tmp_dir/archive_cat4.txt"
366
367 # All repos in prod
368 cat "$tmp_dir"/prod_cat*.txt 2>/dev/null | sort -u > "$tmp_dir/all_prod.txt" || true
369
370 # All repos in archive
371 cat "$tmp_dir"/archive_cat*.txt 2>/dev/null | sort -u > "$tmp_dir/all_archive.txt" || true
372
373 # =========================================================================
374 # STEP 4: Process comparison files and apply classification
375 # =========================================================================
376 log_info "Applying classification logic..."
377
378 # Track processed repos to detect duplicates/conflicts
379 > "$tmp_dir/processed.txt"
380
381 # Counters
382 local count_no_action=0
383 local count_action=0
384 local count_manual=0
385
386 # --- NO ACTION: Complete in both ---
387 if [[ -f "$comparison_dir/complete-in-both.txt" ]]; then
388 while IFS= read -r line; do
389 [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue
390
391 repo=$(echo "$line" | extract_repo)
392 npub=$(echo "$line" | extract_npub)
393 key="${repo}|${npub}"
394
395 # Check if deleted (still no action, but different reason)
396 if grep -qF "$key" "$tmp_dir/all_deletions.txt" 2>/dev/null; then
397 echo "$repo | $npub | deleted by user (also complete in both)" >> "$no_action"
398 else
399 echo "$repo | $npub | complete in both prod and archive" >> "$no_action"
400 fi
401 echo "$key" >> "$tmp_dir/processed.txt"
402 ((count_no_action++)) || true
403 done < "$comparison_dir/complete-in-both.txt"
404 fi
405
406 # --- NO ACTION: Deleted by user (not already processed) ---
407 while IFS='|' read -r repo npub; do
408 [[ -z "$repo" ]] && continue
409 key="${repo}|${npub}"
410
411 # Skip if already processed
412 if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then
413 continue
414 fi
415
416 # Convert pubkey to npub if needed (deletions use hex pubkey)
417 # For now, just use the pubkey as-is since we're matching by repo
418 echo "$repo | $npub | deleted by user" >> "$no_action"
419 echo "$key" >> "$tmp_dir/processed.txt"
420 ((count_no_action++)) || true
421 done < "$tmp_dir/all_deletions.txt"
422
423 # --- NO ACTION: Empty/blank in both ---
424 # Find repos that are category 2 in both prod and archive
425 comm -12 "$tmp_dir/prod_cat2.txt" "$tmp_dir/archive_cat2.txt" 2>/dev/null | while IFS='|' read -r repo npub; do
426 [[ -z "$repo" ]] && continue
427 key="${repo}|${npub}"
428
429 if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then
430 continue
431 fi
432
433 echo "$repo | $npub | empty/blank in both (user never pushed)" >> "$no_action"
434 echo "$key" >> "$tmp_dir/processed.txt"
435 done
436
437 # --- NO ACTION: Purgatory expired (system handled it) ---
438 while IFS='|' read -r repo npub; do
439 [[ -z "$repo" ]] && continue
440 key="${repo}|${npub}"
441
442 if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then
443 continue
444 fi
445
446 echo "$repo | $npub | purgatory expired (system already handled)" >> "$no_action"
447 echo "$key" >> "$tmp_dir/processed.txt"
448 ((count_no_action++)) || true
449 done < "$tmp_dir/purgatory_expired.txt"
450
451 # --- ACTION REQUIRED: Complete in prod, missing from archive ---
452 if [[ -f "$comparison_dir/complete-prod-missing-archive.txt" ]]; then
453 while IFS= read -r line; do
454 [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue
455
456 repo=$(echo "$line" | extract_repo)
457 npub=$(echo "$line" | extract_npub)
458 key="${repo}|${npub}"
459
460 if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then
461 continue
462 fi
463
464 # Check for parse failure
465 if grep -qF "$key" "$tmp_dir/parse_failures.txt" 2>/dev/null; then
466 echo "$repo | $npub | complete in prod, missing from archive, parse failure logged | investigate parse failure, may need re-announcement" >> "$manual_inv"
467 echo "$key" >> "$tmp_dir/processed.txt"
468 ((count_manual++)) || true
469 else
470 echo "$repo | $npub | complete in prod, missing from archive | trigger re-sync or investigate why not archived" >> "$action_req"
471 echo "$key" >> "$tmp_dir/processed.txt"
472 ((count_action++)) || true
473 fi
474 done < "$comparison_dir/complete-prod-missing-archive.txt"
475 fi
476
477 # --- ACTION REQUIRED: Complete in prod, incomplete in archive ---
478 if [[ -f "$comparison_dir/complete-prod-incomplete-archive.txt" ]]; then
479 while IFS= read -r line; do
480 [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue
481
482 repo=$(echo "$line" | extract_repo)
483 npub=$(echo "$line" | extract_npub)
484 key="${repo}|${npub}"
485
486 if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then
487 continue
488 fi
489
490 # Extract archive status from line
491 archive_status=$(echo "$line" | grep -oP 'archive=\K[^ ]+' || echo "unknown")
492
493 echo "$repo | $npub | complete in prod, $archive_status in archive | wait for sync to complete or trigger re-sync" >> "$action_req"
494 echo "$key" >> "$tmp_dir/processed.txt"
495 ((count_action++)) || true
496 done < "$comparison_dir/complete-prod-incomplete-archive.txt"
497 fi
498
499 # --- ACTION REQUIRED: Incomplete in both ---
500 if [[ -f "$comparison_dir/incomplete-in-both.txt" ]]; then
501 while IFS= read -r line; do
502 [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue
503
504 repo=$(echo "$line" | extract_repo)
505 npub=$(echo "$line" | extract_npub)
506 key="${repo}|${npub}"
507
508 if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then
509 continue
510 fi
511
512 # Extract statuses
513 prod_status=$(echo "$line" | grep -oP 'prod=\K[^ ]+' | tr -d '|' || echo "unknown")
514 archive_status=$(echo "$line" | grep -oP 'archive=\K[^ ]+' || echo "unknown")
515
516 echo "$repo | $npub | incomplete in both (prod=$prod_status, archive=$archive_status) | investigate git data source, may need user to re-push" >> "$action_req"
517 echo "$key" >> "$tmp_dir/processed.txt"
518 ((count_action++)) || true
519 done < "$comparison_dir/incomplete-in-both.txt"
520 fi
521
522 # --- MANUAL INVESTIGATION: In archive but not prod ---
523 if [[ -f "$comparison_dir/in-archive-not-prod.txt" ]]; then
524 while IFS= read -r line; do
525 [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue
526
527 repo=$(echo "$line" | extract_repo)
528 npub=$(echo "$line" | extract_npub)
529 key="${repo}|${npub}"
530
531 if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then
532 continue
533 fi
534
535 archive_status=$(echo "$line" | grep -oP 'archive=\K[^ ]+' || echo "unknown")
536
537 # Check if it was deleted
538 if grep -qF "$key" "$tmp_dir/all_deletions.txt" 2>/dev/null; then
539 echo "$repo | $npub | in archive not prod, deletion exists | verify deletion was intentional" >> "$manual_inv"
540 else
541 echo "$repo | $npub | in archive ($archive_status) but not in prod | may be new announcement or deleted from prod" >> "$manual_inv"
542 fi
543 echo "$key" >> "$tmp_dir/processed.txt"
544 ((count_manual++)) || true
545 done < "$comparison_dir/in-archive-not-prod.txt"
546 fi
547
548 # --- ACTION REQUIRED: Parse failures not yet processed ---
549 while IFS='|' read -r repo npub; do
550 [[ -z "$repo" ]] && continue
551 key="${repo}|${npub}"
552
553 if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then
554 continue
555 fi
556
557 echo "$repo | $npub | parse failure logged | fix event format or request user to re-announce" >> "$action_req"
558 echo "$key" >> "$tmp_dir/processed.txt"
559 ((count_action++)) || true
560 done < "$tmp_dir/parse_failures.txt"
561
562 # --- MANUAL INVESTIGATION: Prod category 3/4 not yet processed ---
563 for cat_file in "$tmp_dir/prod_cat3.txt" "$tmp_dir/prod_cat4.txt"; do
564 [[ ! -f "$cat_file" ]] && continue
565 cat_name=$(basename "$cat_file" .txt | sed 's/prod_//')
566 while IFS='|' read -r repo npub; do
567 [[ -z "$repo" ]] && continue
568 key="${repo}|${npub}"
569
570 if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then
571 continue
572 fi
573
574 if [[ "$cat_name" == "cat3" ]]; then
575 echo "$repo | $npub | partial match in prod, not in comparison results | investigate git ref mismatch" >> "$manual_inv"
576 else
577 echo "$repo | $npub | no match in prod (git exists but refs don't match) | investigate git data corruption" >> "$manual_inv"
578 fi
579 echo "$key" >> "$tmp_dir/processed.txt"
580 ((count_manual++)) || true
581 done < "$cat_file"
582 done
583
584 # =========================================================================
585 # STEP 5: Count final results
586 # =========================================================================
587 count_no_action=$(count_lines "$no_action")
588 count_action=$(count_lines "$action_req")
589 count_manual=$(count_lines "$manual_inv")
590
591 # Ensure counts are valid integers
592 count_no_action=${count_no_action:-0}
593 count_action=${count_action:-0}
594 count_manual=${count_manual:-0}
595
596 local total=$((count_no_action + count_action + count_manual))
597
598 # Handle division by zero
599 if [[ $total -eq 0 ]]; then
600 total=1 # Avoid division by zero in percentage calculations
601 log_warn "No repos were classified. Check input files."
602 fi
603
604 # =========================================================================
605 # STEP 6: Generate summary
606 # =========================================================================
607 log_info "Generating summary..."
608
609 cat > "$summary" << EOF
610# Migration Classification Summary
611Generated: $(date -Iseconds)
612Analysis Directory: $analysis_dir
613
614## Overview
615
616| Category | Count | Percentage |
617|----------|-------|------------|
618| No Action Required | $count_no_action | $(awk "BEGIN {printf \"%.1f\", ($count_no_action/$total)*100}")% |
619| Action Required | $count_action | $(awk "BEGIN {printf \"%.1f\", ($count_action/$total)*100}")% |
620| Manual Investigation | $count_manual | $(awk "BEGIN {printf \"%.1f\", ($count_manual/$total)*100}")% |
621| **Total** | **$total** | **100%** |
622
623## No Action Required ($count_no_action repos)
624
625These repositories are ready for migration or don't need migration:
626
627EOF
628
629 # Breakdown of no-action reasons
630 echo "| Reason | Count |" >> "$summary"
631 echo "|--------|-------|" >> "$summary"
632 grep -v '^#' "$no_action" 2>/dev/null | awk -F' \\| ' '{print $3}' | sort | uniq -c | sort -rn > "$tmp_dir/no_action_breakdown.txt" || true
633 while read -r cnt reason; do
634 echo "| $reason | $cnt |" >> "$summary"
635 done < "$tmp_dir/no_action_breakdown.txt"
636
637 cat >> "$summary" << EOF
638
639## Action Required ($count_action repos)
640
641These repositories need intervention before migration:
642
643EOF
644
645 # Breakdown of action reasons
646 echo "| Reason | Count | Suggested Action |" >> "$summary"
647 echo "|--------|-------|------------------|" >> "$summary"
648 grep -v '^#' "$action_req" 2>/dev/null | awk -F' \\| ' '{print $3 "|" $4}' | sort | uniq -c | sort -rn > "$tmp_dir/action_breakdown.txt" || true
649 while read -r cnt reason_action; do
650 reason=$(echo "$reason_action" | cut -d'|' -f1)
651 action=$(echo "$reason_action" | cut -d'|' -f2)
652 echo "| $reason | $cnt | $action |" >> "$summary"
653 done < "$tmp_dir/action_breakdown.txt"
654
655 cat >> "$summary" << EOF
656
657## Manual Investigation ($count_manual repos)
658
659These repositories have conflicting or unexpected states requiring human review:
660
661EOF
662
663 # Breakdown of manual investigation reasons
664 echo "| Reason | Count |" >> "$summary"
665 echo "|--------|-------|" >> "$summary"
666 grep -v '^#' "$manual_inv" 2>/dev/null | awk -F' \\| ' '{print $3}' | sort | uniq -c | sort -rn > "$tmp_dir/manual_breakdown.txt" || true
667 while read -r cnt reason; do
668 echo "| $reason | $cnt |" >> "$summary"
669 done < "$tmp_dir/manual_breakdown.txt"
670
671 # Pre-compute counts from temp files before they might be cleaned up
672 local prod_del_count archive_del_count
673 local prod_cat1_count prod_cat2_count prod_cat3_count prod_cat4_count
674 local archive_cat1_count archive_cat2_count archive_cat3_count archive_cat4_count
675 local parse_fail_count purgatory_count
676
677 prod_del_count=$(wc -l < "$tmp_dir/prod_deletions.txt" 2>/dev/null | tr -d ' ') || prod_del_count=0
678 archive_del_count=$(wc -l < "$tmp_dir/archive_deletions.txt" 2>/dev/null | tr -d ' ') || archive_del_count=0
679 prod_cat1_count=$(wc -l < "$tmp_dir/prod_cat1.txt" 2>/dev/null | tr -d ' ') || prod_cat1_count=0
680 prod_cat2_count=$(wc -l < "$tmp_dir/prod_cat2.txt" 2>/dev/null | tr -d ' ') || prod_cat2_count=0
681 prod_cat3_count=$(wc -l < "$tmp_dir/prod_cat3.txt" 2>/dev/null | tr -d ' ') || prod_cat3_count=0
682 prod_cat4_count=$(wc -l < "$tmp_dir/prod_cat4.txt" 2>/dev/null | tr -d ' ') || prod_cat4_count=0
683 archive_cat1_count=$(wc -l < "$tmp_dir/archive_cat1.txt" 2>/dev/null | tr -d ' ') || archive_cat1_count=0
684 archive_cat2_count=$(wc -l < "$tmp_dir/archive_cat2.txt" 2>/dev/null | tr -d ' ') || archive_cat2_count=0
685 archive_cat3_count=$(wc -l < "$tmp_dir/archive_cat3.txt" 2>/dev/null | tr -d ' ') || archive_cat3_count=0
686 archive_cat4_count=$(wc -l < "$tmp_dir/archive_cat4.txt" 2>/dev/null | tr -d ' ') || archive_cat4_count=0
687 parse_fail_count=$(wc -l < "$tmp_dir/parse_failures.txt" 2>/dev/null | tr -d ' ') || parse_fail_count=0
688 purgatory_count=$(wc -l < "$tmp_dir/purgatory_expired.txt" 2>/dev/null | tr -d ' ') || purgatory_count=0
689
690 cat >> "$summary" << EOF
691
692## Input Data Summary
693
694### Phase 1 (Events)
695- Prod deletions: $prod_del_count
696- Archive deletions: $archive_del_count
697
698### Phase 3 (Categories)
699**Prod:**
700- Category 1 (complete): $prod_cat1_count
701- Category 2 (empty): $prod_cat2_count
702- Category 3 (partial): $prod_cat3_count
703- Category 4 (no match): $prod_cat4_count
704
705**Archive:**
706- Category 1 (complete): $archive_cat1_count
707- Category 2 (empty): $archive_cat2_count
708- Category 3 (partial): $archive_cat3_count
709- Category 4 (no match): $archive_cat4_count
710
711### Phase 4 (Logs)
712- Parse failures: $parse_fail_count
713- Purgatory expired: $purgatory_count
714
715## Recommended Next Steps
716
7171. **Review action-required.txt** - Address these repos before migration
7182. **Review manual-investigation.txt** - Investigate unusual states
7193. **Verify no-action-required.txt** - Spot-check a few repos to confirm
7204. **Plan migration window** - Schedule cutover when action items are resolved
721
722## Output Files
723
724- \`results/no-action-required.txt\` - $count_no_action repos ready for migration
725- \`results/action-required.txt\` - $count_action repos needing intervention
726- \`results/manual-investigation.txt\` - $count_manual repos needing human review
727- \`results/summary.txt\` - This summary file
728EOF
729
730 # =========================================================================
731 # STEP 7: Display results
732 # =========================================================================
733 echo ""
734 log_info "=== Classification Complete ==="
735 echo ""
736 log_success "No Action Required: $count_no_action repos"
737 log_warn "Action Required: $count_action repos"
738 log_error "Manual Investigation: $count_manual repos"
739 echo ""
740 log_info "Total: $total repos classified"
741 echo ""
742 log_info "Output files:"
743 echo " $no_action"
744 echo " $action_req"
745 echo " $manual_inv"
746 echo " $summary"
747 echo ""
748
749 # Show top action items
750 if [[ $count_action -gt 0 ]]; then
751 log_info "Top action items:"
752 grep -v '^#' "$action_req" 2>/dev/null | awk -F' \\| ' '{print $3}' | sort | uniq -c | sort -rn | head -5 | while read -r cnt reason; do
753 echo " - $reason: $cnt repos"
754 done
755 echo ""
756 fi
757
758 # Show top investigation items
759 if [[ $count_manual -gt 0 ]]; then
760 log_info "Top investigation items:"
761 grep -v '^#' "$manual_inv" 2>/dev/null | awk -F' \\| ' '{print $3}' | sort | uniq -c | sort -rn | head -5 | while read -r cnt reason; do
762 echo " - $reason: $cnt repos"
763 done
764 echo ""
765 fi
766
767 log_info "See $summary for full details and recommended next steps."
768}
769
770main "$@"