upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/docs/how-to/migration-scripts
diff options
context:
space:
mode:
Diffstat (limited to 'docs/how-to/migration-scripts')
-rwxr-xr-xdocs/how-to/migration-scripts/40-classify-actions.sh1260
1 files changed, 541 insertions, 719 deletions
diff --git a/docs/how-to/migration-scripts/40-classify-actions.sh b/docs/how-to/migration-scripts/40-classify-actions.sh
index 53c0f9d..81559aa 100755
--- a/docs/how-to/migration-scripts/40-classify-actions.sh
+++ b/docs/how-to/migration-scripts/40-classify-actions.sh
@@ -1,772 +1,594 @@
1#!/usr/bin/env bash 1#!/usr/bin/env bash
2# 2#
3# 40-classify-actions.sh - Final classification of repos for migration action 3# 40-classify-actions.sh - Classify repos by migration action required
4# 4#
5# PHASE 5 of the GRASP relay to ngit-grasp migration analysis pipeline. 5# Implements the redesigned classification system (Option B) with user feedback:
6# Combines all data sources from previous phases to produce actionable results.
7# 6#
8# USAGE: 7# Tier 1: No Action Required (ready-for-migration.txt)
9# ./40-classify-actions.sh <analysis-dir> 8# - Complete in both (prod=cat1, archive=cat1)
9# - Deleted by user (kind 5 event)
10# - Empty in prod (prod=cat2, any archive status)
11# - Archive-only (archive=any, prod=missing)
12# - Not in prod (purgatory-only, prod=missing)
10# 13#
11# EXAMPLES: 14# Tier 2: Action Required (needs-resync.txt)
12# ./40-classify-actions.sh work/migration-analysis-20260122-1430 15# - Complete in prod, missing from archive (with purgatory context)
16# - Complete in prod, incomplete in archive (with purgatory context)
13# 17#
14# INPUT DIRECTORY STRUCTURE: 18# Tier 3: Manual Investigation (manual-review.txt)
15# <analysis-dir>/ 19# - Partial in prod (prod=cat3)
16# ├── prod/ 20# - No-match in prod (prod=cat4)
17# │ ├── raw/ 21# - Parse failures
18# │ │ └── deletions.json # Phase 1: kind 5 deletion events 22# - Conflicting states
19# │ ├── category1-complete-match.txt # Phase 3: complete git sync
20# │ ├── category2-empty-blank.txt # Phase 3: no git data
21# │ ├── category3-partial-match.txt # Phase 3: partial git sync
22# │ └── category4-no-match.txt # Phase 3: git exists, refs don't match
23# ├── archive/
24# │ ├── raw/
25# │ │ └── deletions.json
26# │ ├── category1-complete-match.txt
27# │ ├── category2-empty-blank.txt
28# │ ├── category3-partial-match.txt
29# │ └── category4-no-match.txt
30# ├── comparison/
31# │ ├── complete-in-both.txt # Phase 3: no action needed
32# │ ├── complete-prod-missing-archive.txt # Phase 3: needs investigation
33# │ ├── complete-prod-incomplete-archive.txt # Phase 3: sync in progress?
34# │ ├── incomplete-in-both.txt # Phase 3: git incomplete
35# │ └── in-archive-not-prod.txt # Phase 3: deleted or new
36# └── logs/
37# ├── parse-failures.txt # Phase 4: events that failed to parse
38# └── purgatory-expired.txt # Phase 4: repos that expired from purgatory
39# 23#
40# OUTPUT: 24# Usage: ./40-classify-actions.sh <analysis-dir>
41# <analysis-dir>/results/
42# ├── no-action-required.txt # Repos that are fine as-is
43# ├── action-required.txt # Repos needing intervention
44# ├── manual-investigation.txt # Repos needing human review
45# └── summary.txt # Human-readable summary
46# 25#
47# OUTPUT FORMATS: 26# Output format: repo | npub | prod_status | archive_status | context | action
48# no-action-required.txt:
49# repo | npub | reason
50#
51# action-required.txt:
52# repo | npub | reason | suggested_action
53#
54# manual-investigation.txt:
55# repo | npub | reason | context
56#
57# CLASSIFICATION LOGIC:
58#
59# NO ACTION REQUIRED:
60# - Complete in both prod and archive (successfully migrated)
61# - Empty/blank in both (user never pushed any data)
62# - Deleted by user (kind 5 deletion event exists)
63# - In purgatory expiry logs (system already handled it)
64#
65# ACTION REQUIRED:
66# - Complete in prod, missing from archive → Re-sync needed
67# - Complete in prod, incomplete in archive → Wait for sync or re-trigger
68# - Partial match in prod → Investigate why refs don't match
69# - No match (category 4) → Investigate git data corruption
70# - Parse failures → Fix event format or re-announce
71#
72# MANUAL INVESTIGATION:
73# - Conflicting states (e.g., complete in prod but parse failure logged)
74# - In archive but not prod (deleted? or new announcement?)
75# - Multiple issues for same repo
76# - Unexpected state combinations
77#
78# PREREQUISITES:
79# - jq (for parsing JSON)
80# - awk, sort, comm (standard Unix tools)
81#
82# RUNTIME: < 5 seconds (local processing only)
83#
84# SEE ALSO:
85# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide
86# 01-fetch-events.sh - Phase 1 (fetch events)
87# 10-check-git-sync.sh - Phase 2 (git sync check)
88# 20-categorize.sh, 21-compare-relays.sh - Phase 3 (categorize and compare)
89# 30-extract-parse-failures.sh, 31-extract-purgatory-expiry.sh - Phase 4 (logs)
90# 27#
91 28
92set -euo pipefail 29set -euo pipefail
93 30
94# Colors for output (disabled if not a terminal) 31# Colors for output
95if [[ -t 1 ]]; then 32RED='\033[0;31m'
96 RED='\033[0;31m' 33GREEN='\033[0;32m'
97 GREEN='\033[0;32m' 34YELLOW='\033[1;33m'
98 YELLOW='\033[0;33m' 35BLUE='\033[0;34m'
99 BLUE='\033[0;34m' 36NC='\033[0m' # No Color
100 BOLD='\033[1m'
101 NC='\033[0m'
102else
103 RED=''
104 GREEN=''
105 YELLOW=''
106 BLUE=''
107 BOLD=''
108 NC=''
109fi
110 37
111log_info() { 38log_info() { echo -e "${BLUE}[INFO]${NC} $*"; }
112 echo -e "${BLUE}[INFO]${NC} $*" >&2 39log_success() { echo -e "${GREEN}[OK]${NC} $*"; }
113} 40log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
41log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; }
114 42
115log_success() { 43# Check arguments
116 echo -e "${GREEN}[OK]${NC} $*" >&2 44if [[ $# -lt 1 ]]; then
117}
118
119log_warn() {
120 echo -e "${YELLOW}[WARN]${NC} $*" >&2
121}
122
123log_error() {
124 echo -e "${RED}[ERROR]${NC} $*" >&2
125}
126
127usage() {
128 echo "Usage: $0 <analysis-dir>" 45 echo "Usage: $0 <analysis-dir>"
129 echo "" 46 echo "Example: $0 work/migration-analysis-20260123-200701"
130 echo "Arguments:"
131 echo " analysis-dir Directory containing Phase 1-4 output"
132 echo ""
133 echo "Examples:"
134 echo " $0 work/migration-analysis-20260122-1430"
135 echo ""
136 echo "Required input structure:"
137 echo " <analysis-dir>/prod/category*.txt"
138 echo " <analysis-dir>/archive/category*.txt"
139 echo " <analysis-dir>/comparison/*.txt"
140 echo " <analysis-dir>/logs/*.txt (optional)"
141 echo " <analysis-dir>/prod/raw/deletions.json"
142 echo ""
143 echo "Output:"
144 echo " <analysis-dir>/results/no-action-required.txt"
145 echo " <analysis-dir>/results/action-required.txt"
146 echo " <analysis-dir>/results/manual-investigation.txt"
147 echo " <analysis-dir>/results/summary.txt"
148 exit 1 47 exit 1
149} 48fi
150
151# Extract repo|npub key from category line
152# Input: "repo | npub | state_refs=N | ..."
153# Output: "repo|npub"
154extract_key() {
155 awk -F' \\| ' '{print $1 "|" $2}'
156}
157
158# Extract repo from category line
159# Input: "repo | npub | ..."
160# Output: "repo"
161extract_repo() {
162 awk -F' \\| ' '{print $1}'
163}
164 49
165# Extract npub from category line 50ANALYSIS_DIR="$1"
166# Input: "repo | npub | ..."
167# Output: "npub"
168extract_npub() {
169 awk -F' \\| ' '{print $2}'
170}
171 51
172# Check if a file exists and has content (ignoring comment lines) 52# Validate analysis directory
173file_has_content() { 53if [[ ! -d "$ANALYSIS_DIR" ]]; then
174 local file="$1" 54 log_error "Analysis directory not found: $ANALYSIS_DIR"
175 if [[ ! -f "$file" ]]; then 55 exit 1
176 return 1 56fi
177 fi
178 # Check for non-comment, non-empty lines
179 grep -v '^#' "$file" 2>/dev/null | grep -q '.' 2>/dev/null
180}
181 57
182# Count non-comment lines in a file 58# Define paths
183count_lines() { 59PROD_DIR="$ANALYSIS_DIR/prod"
184 local file="$1" 60ARCHIVE_DIR="$ANALYSIS_DIR/archive"
185 if [[ ! -f "$file" ]]; then 61COMPARISON_DIR="$ANALYSIS_DIR/comparison"
186 echo "0" 62LOGS_DIR="$ANALYSIS_DIR/logs"
187 return 63RESULTS_DIR="$ANALYSIS_DIR/results"
64
65# Validate required directories
66for dir in "$PROD_DIR" "$ARCHIVE_DIR" "$COMPARISON_DIR" "$LOGS_DIR"; do
67 if [[ ! -d "$dir" ]]; then
68 log_error "Required directory not found: $dir"
69 exit 1
188 fi 70 fi
189 local count 71done
190 count=$(grep -v '^#' "$file" 2>/dev/null | grep -c '.' 2>/dev/null) || count=0 72
191 # Ensure we return a clean integer 73# Create results directory
192 echo "${count:-0}" 74mkdir -p "$RESULTS_DIR"
193} 75
76# Output files
77READY_FILE="$RESULTS_DIR/ready-for-migration.txt"
78RESYNC_FILE="$RESULTS_DIR/needs-resync.txt"
79REVIEW_FILE="$RESULTS_DIR/manual-review.txt"
80SUMMARY_FILE="$RESULTS_DIR/summary.txt"
81
82# Temporary files for processing
83TMP_DIR=$(mktemp -d)
84trap 'rm -rf "$TMP_DIR"' EXIT
85
86log_info "Starting classification with revised system (Option B)"
87log_info "Analysis directory: $ANALYSIS_DIR"
88
89# ============================================================================
90# Phase 1: Build lookup tables from source data
91# ============================================================================
92
93log_info "Building lookup tables..."
94
95# Build prod category lookup: repo|npub -> category
96declare -A PROD_CAT
97while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do
98 repo="${repo// /}" # Remove all spaces
99 npub="${npub// /}" # Remove all spaces
100 [[ -z "$repo" || -z "$npub" ]] && continue
101 PROD_CAT["$repo|$npub"]="cat1"
102done < "$PROD_DIR/category1-complete-match.txt"
103
104while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do
105 repo="${repo// /}"
106 npub="${npub// /}"
107 [[ -z "$repo" || -z "$npub" ]] && continue
108 PROD_CAT["$repo|$npub"]="cat2"
109done < "$PROD_DIR/category2-empty-blank.txt"
110
111while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do
112 repo="${repo// /}"
113 npub="${npub// /}"
114 [[ -z "$repo" || -z "$npub" ]] && continue
115 PROD_CAT["$repo|$npub"]="cat3"
116done < "$PROD_DIR/category3-partial-match.txt"
117
118while IFS='|' read -r repo npub rest || [[ -n "$repo" ]]; do
119 repo="${repo// /}"
120 npub="${npub// /}"
121 [[ -z "$repo" || -z "$npub" ]] && continue
122 PROD_CAT["$repo|$npub"]="cat4"
123done < "$PROD_DIR/category4-no-match.txt"
124
125log_info "Loaded ${#PROD_CAT[@]} prod entries"
126
127# Build archive category lookup: repo|npub -> category
128declare -A ARCHIVE_CAT
129while IFS='|' read -r repo npub rest; do
130 repo="${repo// /}"
131 npub="${npub// /}"
132 [[ -z "$repo" || -z "$npub" ]] && continue
133 ARCHIVE_CAT["$repo|$npub"]="cat1"
134done < "$ARCHIVE_DIR/category1-complete-match.txt"
135
136while IFS='|' read -r repo npub rest; do
137 repo="${repo// /}"
138 npub="${npub// /}"
139 [[ -z "$repo" || -z "$npub" ]] && continue
140 ARCHIVE_CAT["$repo|$npub"]="cat2"
141done < "$ARCHIVE_DIR/category2-empty-blank.txt"
142
143while IFS='|' read -r repo npub rest; do
144 repo="${repo// /}"
145 npub="${npub// /}"
146 [[ -z "$repo" || -z "$npub" ]] && continue
147 ARCHIVE_CAT["$repo|$npub"]="cat3"
148done < "$ARCHIVE_DIR/category3-partial-match.txt"
149
150while IFS='|' read -r repo npub rest; do
151 repo="${repo// /}"
152 npub="${npub// /}"
153 [[ -z "$repo" || -z "$npub" ]] && continue
154 ARCHIVE_CAT["$repo|$npub"]="cat4"
155done < "$ARCHIVE_DIR/category4-no-match.txt"
156
157log_info "Loaded ${#ARCHIVE_CAT[@]} archive entries"
158
159# Build purgatory lookup: repo|npub -> 1 (if purgatory expired)
160declare -A PURGATORY
161PURGATORY_COUNT=0
162if [[ -f "$LOGS_DIR/purgatory-expired.txt" ]]; then
163 while IFS=$'\t' read -r repo npub timestamp reason || [[ -n "$repo" ]]; do
164 # Skip comments and empty lines
165 [[ "$repo" =~ ^# ]] && continue
166 [[ -z "$repo" || -z "$npub" ]] && continue
167 PURGATORY["$repo|$npub"]=1
168 ((PURGATORY_COUNT++))
169 done < "$LOGS_DIR/purgatory-expired.txt"
170fi
171log_info "Loaded $PURGATORY_COUNT purgatory entries"
172
173# Build parse failure lookup: repo|npub -> 1 (if parse failure logged)
174# Parse failures file format: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub
175declare -A PARSE_FAIL
176PARSE_FAIL_COUNT=0
177if [[ -f "$LOGS_DIR/parse-failures.txt" ]]; then
178 while IFS=$'\t' read -r event_id kind reason repo npub || [[ -n "$event_id" ]]; do
179 # Skip comments and empty lines
180 [[ "$event_id" =~ ^# ]] && continue
181 [[ -z "$repo" || -z "$npub" ]] && continue
182 PARSE_FAIL["$repo|$npub"]=1
183 ((PARSE_FAIL_COUNT++))
184 done < "$LOGS_DIR/parse-failures.txt"
185fi
186log_info "Loaded $PARSE_FAIL_COUNT parse failure entries"
194 187
195# Parse deletions.json to extract deleted repo identifiers 188# Build deletion lookup: repo|npub -> 1 (if kind 5 deletion event)
196# Kind 5 events have "e" tags pointing to the deleted event 189# Deletions are in NDJSON format with "a" tags like "30617:pubkey_hex:repo"
197# We need to cross-reference with announcements to get repo/npub 190# We need to convert hex pubkeys to npub format using nak
198# For now, we extract the pubkey and any "a" tags (addressable event references) 191declare -A DELETED
199parse_deletions() {
200 local deletions_file="$1"
201 local output_file="$2"
202
203 if [[ ! -f "$deletions_file" ]]; then
204 touch "$output_file"
205 return
206 fi
207
208 # Extract deletion targets from kind 5 events
209 # Kind 5 events can reference:
210 # - "e" tag: specific event ID
211 # - "a" tag: addressable event (kind:pubkey:identifier)
212 # For 30617 announcements, "a" tag format is: 30617:<pubkey>:<repo-identifier>
213 jq -r '
214 select(.kind == 5) |
215 .pubkey as $pubkey |
216 .tags[] |
217 select(.[0] == "a") |
218 .[1] |
219 split(":") |
220 select(.[0] == "30617") |
221 "\(.[2])|\($pubkey)"
222 ' "$deletions_file" 2>/dev/null | sort -u > "$output_file" || touch "$output_file"
223}
224 192
225# Build a lookup set from a file (repo|npub format) 193# Helper function to process deletion file (NDJSON format)
226# Returns keys one per line 194# Extracts unique pubkey_hex:repo pairs and converts to npub
227build_key_set() { 195process_deletions() {
228 local file="$1" 196 local file="$1"
229 if [[ ! -f "$file" ]]; then 197 [[ ! -f "$file" ]] && return
230 return 0 198
231 fi 199 # Extract unique pubkey_hex|repo pairs from NDJSON
232 # Use || true to prevent pipefail from exiting on empty grep 200 # Each line is a JSON object, extract "a" tags
233 { grep -v '^#' "$file" 2>/dev/null || true; } | extract_key | sort -u 201 local pairs
202 pairs=$(jq -r '.tags[] | select(.[0] == "a") | .[1]' "$file" 2>/dev/null | \
203 sed 's/^30617://' | awk -F: '{print $1 "|" $2}' | sort -u)
204
205 # Get unique hex pubkeys for batch conversion
206 local hex_keys
207 hex_keys=$(echo "$pairs" | cut -d'|' -f1 | sort -u)
208
209 # Build hex->npub lookup via batch nak call
210 declare -A HEX_TO_NPUB
211 while read -r hex; do
212 [[ -z "$hex" ]] && continue
213 local npub
214 npub=$(nak encode npub "$hex" 2>/dev/null || echo "")
215 [[ -n "$npub" ]] && HEX_TO_NPUB["$hex"]="$npub"
216 done <<< "$hex_keys"
217
218 # Now process pairs with cached npub values
219 while IFS='|' read -r pubkey_hex repo; do
220 [[ -z "$repo" || -z "$pubkey_hex" ]] && continue
221 local npub="${HEX_TO_NPUB[$pubkey_hex]:-}"
222 [[ -z "$npub" ]] && continue
223 DELETED["$repo|$npub"]=1
224 done <<< "$pairs"
234} 225}
235 226
236# Main classification logic 227# Process prod and archive deletions
237main() { 228process_deletions "$PROD_DIR/raw/deletions.json"
238 if [[ $# -ne 1 ]]; then 229process_deletions "$ARCHIVE_DIR/raw/deletions.json"
239 usage 230DELETED_COUNT=0
231[[ ${#DELETED[@]} -gt 0 ]] && DELETED_COUNT=${#DELETED[@]}
232log_info "Loaded $DELETED_COUNT deletion entries"
233
234# ============================================================================
235# Phase 2: Build unique repo list from all sources
236# ============================================================================
237
238log_info "Building unique repo list..."
239
240declare -A ALL_REPOS
241for key in "${!PROD_CAT[@]}"; do
242 ALL_REPOS["$key"]=1
243done
244for key in "${!ARCHIVE_CAT[@]}"; do
245 ALL_REPOS["$key"]=1
246done
247for key in "${!PURGATORY[@]}"; do
248 ALL_REPOS["$key"]=1
249done
250
251log_info "Total unique repos: ${#ALL_REPOS[@]}"
252
253# ============================================================================
254# Phase 3: Classify each repo according to revised decision tree
255# ============================================================================
256
257log_info "Classifying repos..."
258
259# Counters for summary
260declare -A COUNTS
261COUNTS[ready_complete_both]=0
262COUNTS[ready_deleted]=0
263COUNTS[ready_empty_prod]=0
264COUNTS[ready_archive_only]=0
265COUNTS[ready_not_in_prod]=0
266COUNTS[resync_missing_archive]=0
267COUNTS[resync_incomplete_archive]=0
268COUNTS[review_partial_prod]=0
269COUNTS[review_nomatch_prod]=0
270COUNTS[review_parse_failure]=0
271COUNTS[review_conflicting]=0
272
273# Output arrays
274declare -a READY_LINES
275declare -a RESYNC_LINES
276declare -a REVIEW_LINES
277
278# Helper function to get context string
279get_context() {
280 local key="$1"
281 local prod_status="$2"
282 local archive_status="$3"
283 local context=""
284
285 # Check purgatory
286 if [[ -n "${PURGATORY[$key]:-}" ]]; then
287 context="purgatory-expired"
240 fi 288 fi
241 289
242 local analysis_dir="$1" 290 # Check parse failure
243 291 if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then
244 # Validate input directory 292 if [[ -n "$context" ]]; then
245 if [[ ! -d "$analysis_dir" ]]; then 293 context="$context, parse-failure"
246 log_error "Analysis directory not found: $analysis_dir" 294 else
247 exit 1 295 context="parse-failure"
248 fi
249
250 # Check for required subdirectories
251 local prod_dir="$analysis_dir/prod"
252 local archive_dir="$analysis_dir/archive"
253 local comparison_dir="$analysis_dir/comparison"
254 local logs_dir="$analysis_dir/logs"
255 local results_dir="$analysis_dir/results"
256
257 for dir in "$prod_dir" "$archive_dir" "$comparison_dir"; do
258 if [[ ! -d "$dir" ]]; then
259 log_error "Required directory not found: $dir"
260 log_error "Run Phases 1-3 first to generate input data."
261 exit 1
262 fi 296 fi
263 done
264
265 # Check for required category files
266 if [[ ! -f "$prod_dir/category1-complete-match.txt" ]]; then
267 log_error "Missing category files in $prod_dir"
268 log_error "Run Phase 3 (20-categorize.sh) first."
269 exit 1
270 fi
271
272 log_info "Starting final classification"
273 log_info "Analysis directory: $analysis_dir"
274
275 # Create output directory
276 mkdir -p "$results_dir"
277
278 # Create temp directory for intermediate files
279 local tmp_dir
280 tmp_dir=$(mktemp -d)
281 # shellcheck disable=SC2064
282 trap "rm -rf '$tmp_dir'" EXIT
283
284 # Initialize output files
285 local no_action="$results_dir/no-action-required.txt"
286 local action_req="$results_dir/action-required.txt"
287 local manual_inv="$results_dir/manual-investigation.txt"
288 local summary="$results_dir/summary.txt"
289
290 # Write headers
291 {
292 echo "# No Action Required - Repos that are fine as-is"
293 echo "# Generated: $(date -Iseconds)"
294 echo "# Format: repo | npub | reason"
295 echo "#"
296 } > "$no_action"
297
298 {
299 echo "# Action Required - Repos needing intervention"
300 echo "# Generated: $(date -Iseconds)"
301 echo "# Format: repo | npub | reason | suggested_action"
302 echo "#"
303 } > "$action_req"
304
305 {
306 echo "# Manual Investigation Required - Repos needing human review"
307 echo "# Generated: $(date -Iseconds)"
308 echo "# Format: repo | npub | reason | context"
309 echo "#"
310 } > "$manual_inv"
311
312 # =========================================================================
313 # STEP 1: Parse deletion events
314 # =========================================================================
315 log_info "Parsing deletion events..."
316
317 parse_deletions "$prod_dir/raw/deletions.json" "$tmp_dir/prod_deletions.txt"
318 parse_deletions "$archive_dir/raw/deletions.json" "$tmp_dir/archive_deletions.txt"
319
320 # Combine deletions (union of both)
321 cat "$tmp_dir/prod_deletions.txt" "$tmp_dir/archive_deletions.txt" 2>/dev/null | sort -u > "$tmp_dir/all_deletions.txt"
322
323 local deletion_count
324 deletion_count=$(wc -l < "$tmp_dir/all_deletions.txt" | tr -d ' ')
325 log_info "Found $deletion_count deletion requests"
326
327 # =========================================================================
328 # STEP 2: Parse log-based categories (Phase 4)
329 # =========================================================================
330 log_info "Parsing log-based categories..."
331
332 # Parse failures: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub
333 # Note: repo and npub are in columns 4 and 5 (enriched by Phase 4 from announcements.json)
334 # Some entries may have empty repo/npub if the event_id wasn't found in announcements
335 if [[ -f "$logs_dir/parse-failures.txt" ]] && file_has_content "$logs_dir/parse-failures.txt"; then
336 grep -v '^#' "$logs_dir/parse-failures.txt" | awk -F'\t' '{print $4 "|" $5}' | sort -u > "$tmp_dir/parse_failures.txt"
337 log_info "Found $(wc -l < "$tmp_dir/parse_failures.txt" | tr -d ' ') parse failure entries"
338 else
339 touch "$tmp_dir/parse_failures.txt"
340 log_info "No parse failures found (logs may be empty or not yet generated)"
341 fi
342
343 # Purgatory expired: repo<TAB>npub<TAB>timestamp<TAB>reason
344 if [[ -f "$logs_dir/purgatory-expired.txt" ]] && file_has_content "$logs_dir/purgatory-expired.txt"; then
345 grep -v '^#' "$logs_dir/purgatory-expired.txt" | awk -F'\t' '{print $1 "|" $2}' | sort -u > "$tmp_dir/purgatory_expired.txt"
346 log_info "Found $(wc -l < "$tmp_dir/purgatory_expired.txt" | tr -d ' ') purgatory expiry entries"
347 else
348 touch "$tmp_dir/purgatory_expired.txt"
349 log_info "No purgatory expiry entries found (logs may be empty or not yet generated)"
350 fi
351
352 # =========================================================================
353 # STEP 3: Build lookup tables from category files
354 # =========================================================================
355 log_info "Building lookup tables..."
356
357 # Build key sets for each category (prod)
358 build_key_set "$prod_dir/category1-complete-match.txt" > "$tmp_dir/prod_cat1.txt"
359 build_key_set "$prod_dir/category2-empty-blank.txt" > "$tmp_dir/prod_cat2.txt"
360 build_key_set "$prod_dir/category3-partial-match.txt" > "$tmp_dir/prod_cat3.txt"
361 build_key_set "$prod_dir/category4-no-match.txt" > "$tmp_dir/prod_cat4.txt"
362
363 # Build key sets for each category (archive)
364 build_key_set "$archive_dir/category1-complete-match.txt" > "$tmp_dir/archive_cat1.txt"
365 build_key_set "$archive_dir/category2-empty-blank.txt" > "$tmp_dir/archive_cat2.txt"
366 build_key_set "$archive_dir/category3-partial-match.txt" > "$tmp_dir/archive_cat3.txt"
367 build_key_set "$archive_dir/category4-no-match.txt" > "$tmp_dir/archive_cat4.txt"
368
369 # All repos in prod
370 cat "$tmp_dir"/prod_cat*.txt 2>/dev/null | sort -u > "$tmp_dir/all_prod.txt" || true
371
372 # All repos in archive
373 cat "$tmp_dir"/archive_cat*.txt 2>/dev/null | sort -u > "$tmp_dir/all_archive.txt" || true
374
375 # =========================================================================
376 # STEP 4: Process comparison files and apply classification
377 # =========================================================================
378 log_info "Applying classification logic..."
379
380 # Track processed repos to detect duplicates/conflicts
381 > "$tmp_dir/processed.txt"
382
383 # Counters
384 local count_no_action=0
385 local count_action=0
386 local count_manual=0
387
388 # --- NO ACTION: Complete in both ---
389 if [[ -f "$comparison_dir/complete-in-both.txt" ]]; then
390 while IFS= read -r line; do
391 [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue
392
393 repo=$(echo "$line" | extract_repo)
394 npub=$(echo "$line" | extract_npub)
395 key="${repo}|${npub}"
396
397 # Check if deleted (still no action, but different reason)
398 if grep -qF "$key" "$tmp_dir/all_deletions.txt" 2>/dev/null; then
399 echo "$repo | $npub | deleted by user (also complete in both)" >> "$no_action"
400 else
401 echo "$repo | $npub | complete in both prod and archive" >> "$no_action"
402 fi
403 echo "$key" >> "$tmp_dir/processed.txt"
404 ((count_no_action++)) || true
405 done < "$comparison_dir/complete-in-both.txt"
406 fi 297 fi
407 298
408 # --- NO ACTION: Deleted by user (not already processed) --- 299 # Add archive context for unexpected states
409 while IFS='|' read -r repo npub; do 300 if [[ "$prod_status" == "empty" && "$archive_status" != "missing" && "$archive_status" != "empty" ]]; then
410 [[ -z "$repo" ]] && continue 301 if [[ -n "$context" ]]; then
411 key="${repo}|${npub}" 302 context="$context, archive-has-data"
412 303 else
413 # Skip if already processed 304 context="archive-has-data"
414 if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then
415 continue
416 fi
417
418 # Convert pubkey to npub if needed (deletions use hex pubkey)
419 # For now, just use the pubkey as-is since we're matching by repo
420 echo "$repo | $npub | deleted by user" >> "$no_action"
421 echo "$key" >> "$tmp_dir/processed.txt"
422 ((count_no_action++)) || true
423 done < "$tmp_dir/all_deletions.txt"
424
425 # --- NO ACTION: Empty/blank in both ---
426 # Find repos that are category 2 in both prod and archive
427 comm -12 "$tmp_dir/prod_cat2.txt" "$tmp_dir/archive_cat2.txt" 2>/dev/null | while IFS='|' read -r repo npub; do
428 [[ -z "$repo" ]] && continue
429 key="${repo}|${npub}"
430
431 if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then
432 continue
433 fi 305 fi
434
435 echo "$repo | $npub | empty/blank in both (user never pushed)" >> "$no_action"
436 echo "$key" >> "$tmp_dir/processed.txt"
437 done
438
439 # --- NO ACTION: Purgatory expired (system handled it) ---
440 while IFS='|' read -r repo npub; do
441 [[ -z "$repo" ]] && continue
442 key="${repo}|${npub}"
443
444 if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then
445 continue
446 fi
447
448 echo "$repo | $npub | purgatory expired (system already handled)" >> "$no_action"
449 echo "$key" >> "$tmp_dir/processed.txt"
450 ((count_no_action++)) || true
451 done < "$tmp_dir/purgatory_expired.txt"
452
453 # --- ACTION REQUIRED: Complete in prod, missing from archive ---
454 if [[ -f "$comparison_dir/complete-prod-missing-archive.txt" ]]; then
455 while IFS= read -r line; do
456 [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue
457
458 repo=$(echo "$line" | extract_repo)
459 npub=$(echo "$line" | extract_npub)
460 key="${repo}|${npub}"
461
462 if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then
463 continue
464 fi
465
466 # Check for parse failure
467 if grep -qF "$key" "$tmp_dir/parse_failures.txt" 2>/dev/null; then
468 echo "$repo | $npub | complete in prod, missing from archive, parse failure logged | investigate parse failure, may need re-announcement" >> "$manual_inv"
469 echo "$key" >> "$tmp_dir/processed.txt"
470 ((count_manual++)) || true
471 else
472 echo "$repo | $npub | complete in prod, missing from archive | trigger re-sync or investigate why not archived" >> "$action_req"
473 echo "$key" >> "$tmp_dir/processed.txt"
474 ((count_action++)) || true
475 fi
476 done < "$comparison_dir/complete-prod-missing-archive.txt"
477 fi
478
479 # --- ACTION REQUIRED: Complete in prod, incomplete in archive ---
480 if [[ -f "$comparison_dir/complete-prod-incomplete-archive.txt" ]]; then
481 while IFS= read -r line; do
482 [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue
483
484 repo=$(echo "$line" | extract_repo)
485 npub=$(echo "$line" | extract_npub)
486 key="${repo}|${npub}"
487
488 if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then
489 continue
490 fi
491
492 # Extract archive status from line
493 archive_status=$(echo "$line" | grep -oP 'archive=\K[^ ]+' || echo "unknown")
494
495 echo "$repo | $npub | complete in prod, $archive_status in archive | wait for sync to complete or trigger re-sync" >> "$action_req"
496 echo "$key" >> "$tmp_dir/processed.txt"
497 ((count_action++)) || true
498 done < "$comparison_dir/complete-prod-incomplete-archive.txt"
499 fi 306 fi
500 307
501 # --- ACTION REQUIRED: Incomplete in both --- 308 echo "${context:-none}"
502 if [[ -f "$comparison_dir/incomplete-in-both.txt" ]]; then 309}
503 while IFS= read -r line; do 310
504 [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue 311# Helper to convert category to human-readable status
505 312cat_to_status() {
506 repo=$(echo "$line" | extract_repo) 313 case "$1" in
507 npub=$(echo "$line" | extract_npub) 314 cat1) echo "complete" ;;
508 key="${repo}|${npub}" 315 cat2) echo "empty" ;;
509 316 cat3) echo "partial" ;;
510 if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then 317 cat4) echo "no-match" ;;
511 continue 318 missing) echo "missing" ;;
512 fi 319 *) echo "$1" ;;
513 320 esac
514 # Extract statuses 321}
515 prod_status=$(echo "$line" | grep -oP 'prod=\K[^ ]+' | tr -d '|' || echo "unknown") 322
516 archive_status=$(echo "$line" | grep -oP 'archive=\K[^ ]+' || echo "unknown") 323LOOP_COUNT=0
517 324for key in "${!ALL_REPOS[@]}"; do
518 echo "$repo | $npub | incomplete in both (prod=$prod_status, archive=$archive_status) | investigate git data source, may need user to re-push" >> "$action_req" 325 LOOP_COUNT=$((LOOP_COUNT + 1))
519 echo "$key" >> "$tmp_dir/processed.txt" 326 [[ $((LOOP_COUNT % 100)) -eq 0 ]] && log_info "Processed $LOOP_COUNT repos..."
520 ((count_action++)) || true 327 IFS='|' read -r repo npub <<< "$key"
521 done < "$comparison_dir/incomplete-in-both.txt" 328
329 prod_cat="${PROD_CAT[$key]:-missing}"
330 archive_cat="${ARCHIVE_CAT[$key]:-missing}"
331 prod_status=$(cat_to_status "$prod_cat")
332 archive_status=$(cat_to_status "$archive_cat")
333
334 # Decision tree implementation
335
336 # 1. Is there a kind 5 deletion event?
337 if [[ -n "${DELETED[$key]:-}" ]]; then
338 context=$(get_context "$key" "$prod_status" "$archive_status")
339 READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | deleted by user")
340 COUNTS[ready_deleted]=$((COUNTS[ready_deleted] + 1))
341 continue
522 fi 342 fi
523 343
524 # --- MANUAL INVESTIGATION: In archive but not prod --- 344 # 2. What is the prod status?
525 if [[ -f "$comparison_dir/in-archive-not-prod.txt" ]]; then 345 case "$prod_cat" in
526 while IFS= read -r line; do 346 missing)
527 [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue 347 # Not in prod
528 348 if [[ "$archive_cat" != "missing" ]]; then
529 repo=$(echo "$line" | extract_repo) 349 # In archive but not in prod -> no action (archive-only)
530 npub=$(echo "$line" | extract_npub) 350 context=$(get_context "$key" "$prod_status" "$archive_status")
531 key="${repo}|${npub}" 351 READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | archive-only (not in prod)")
532 352 COUNTS[ready_archive_only]=$((COUNTS[ready_archive_only] + 1))
533 if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then 353 elif [[ -n "${PURGATORY[$key]:-}" ]]; then
534 continue 354 # Purgatory only, not in prod -> no action
355 context="purgatory-expired"
356 READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | purgatory-only (not in prod)")
357 COUNTS[ready_not_in_prod]=$((COUNTS[ready_not_in_prod] + 1))
535 fi 358 fi
359 # Otherwise skip (not a real repo - no data anywhere)
360 ;;
536 361
537 archive_status=$(echo "$line" | grep -oP 'archive=\K[^ ]+' || echo "unknown") 362 cat2)
363 # Empty in prod -> ALWAYS no action required
364 context=$(get_context "$key" "$prod_status" "$archive_status")
365 READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | empty in prod (user never pushed)")
366 COUNTS[ready_empty_prod]=$((COUNTS[ready_empty_prod] + 1))
367 ;;
538 368
539 # Check if it was deleted 369 cat1)
540 if grep -qF "$key" "$tmp_dir/all_deletions.txt" 2>/dev/null; then 370 # Complete in prod
541 echo "$repo | $npub | in archive not prod, deletion exists | verify deletion was intentional" >> "$manual_inv" 371 if [[ "$archive_cat" == "cat1" ]]; then
372 # Complete in both -> no action
373 context=$(get_context "$key" "$prod_status" "$archive_status")
374 READY_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in both")
375 COUNTS[ready_complete_both]=$((COUNTS[ready_complete_both] + 1))
542 else 376 else
543 echo "$repo | $npub | in archive ($archive_status) but not in prod | may be new announcement or deleted from prod" >> "$manual_inv" 377 # Complete in prod, missing/incomplete in archive
378 # Check for parse failure - if so, needs manual review
379 if [[ -n "${PARSE_FAIL[$key]:-}" ]]; then
380 context=$(get_context "$key" "$prod_status" "$archive_status")
381 REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | complete in prod with parse failure")
382 COUNTS[review_parse_failure]=$((COUNTS[review_parse_failure] + 1))
383 else
384 # Needs resync - include purgatory context
385 context=$(get_context "$key" "$prod_status" "$archive_status")
386 if [[ "$archive_cat" == "missing" ]]; then
387 RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync to archive")
388 COUNTS[resync_missing_archive]=$((COUNTS[resync_missing_archive] + 1))
389 else
390 RESYNC_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | trigger re-sync (archive incomplete)")
391 COUNTS[resync_incomplete_archive]=$((COUNTS[resync_incomplete_archive] + 1))
392 fi
393 fi
544 fi 394 fi
545 echo "$key" >> "$tmp_dir/processed.txt" 395 ;;
546 ((count_manual++)) || true
547 done < "$comparison_dir/in-archive-not-prod.txt"
548 fi
549
550 # --- ACTION REQUIRED: Parse failures not yet processed ---
551 while IFS='|' read -r repo npub; do
552 [[ -z "$repo" ]] && continue
553 key="${repo}|${npub}"
554
555 if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then
556 continue
557 fi
558
559 echo "$repo | $npub | parse failure logged | fix event format or request user to re-announce" >> "$action_req"
560 echo "$key" >> "$tmp_dir/processed.txt"
561 ((count_action++)) || true
562 done < "$tmp_dir/parse_failures.txt"
563
564 # --- MANUAL INVESTIGATION: Prod category 3/4 not yet processed ---
565 for cat_file in "$tmp_dir/prod_cat3.txt" "$tmp_dir/prod_cat4.txt"; do
566 [[ ! -f "$cat_file" ]] && continue
567 cat_name=$(basename "$cat_file" .txt | sed 's/prod_//')
568 while IFS='|' read -r repo npub; do
569 [[ -z "$repo" ]] && continue
570 key="${repo}|${npub}"
571 396
572 if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then 397 cat3)
573 continue 398 # Partial in prod -> ALWAYS manual investigation
574 fi 399 context=$(get_context "$key" "$prod_status" "$archive_status")
400 REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | partial in prod (investigate git data)")
401 COUNTS[review_partial_prod]=$((COUNTS[review_partial_prod] + 1))
402 ;;
575 403
576 if [[ "$cat_name" == "cat3" ]]; then 404 cat4)
577 echo "$repo | $npub | partial match in prod, not in comparison results | investigate git ref mismatch" >> "$manual_inv" 405 # No-match in prod -> ALWAYS manual investigation
578 else 406 context=$(get_context "$key" "$prod_status" "$archive_status")
579 echo "$repo | $npub | no match in prod (git exists but refs don't match) | investigate git data corruption" >> "$manual_inv" 407 REVIEW_LINES+=("$repo | $npub | $prod_status | $archive_status | $context | no-match in prod (git corruption)")
580 fi 408 COUNTS[review_nomatch_prod]=$((COUNTS[review_nomatch_prod] + 1))
581 echo "$key" >> "$tmp_dir/processed.txt" 409 ;;
582 ((count_manual++)) || true 410 esac
583 done < "$cat_file" 411done
412
413# ============================================================================
414# Phase 4: Write output files
415# ============================================================================
416
417log_info "Writing output files..."
418
419TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%S+00:00")
420
421# Write ready-for-migration.txt
422{
423 echo "# Ready for Migration - No action required"
424 echo "# Generated: $TIMESTAMP"
425 echo "# Format: repo | npub | prod_status | archive_status | context | reason"
426 echo "#"
427 for line in "${READY_LINES[@]}"; do
428 echo "$line"
584 done 429 done
585 430} > "$READY_FILE"
586 # ========================================================================= 431
587 # STEP 5: Count final results 432# Write needs-resync.txt
588 # ========================================================================= 433{
589 count_no_action=$(count_lines "$no_action") 434 echo "# Needs Re-sync - Action required"
590 count_action=$(count_lines "$action_req") 435 echo "# Generated: $TIMESTAMP"
591 count_manual=$(count_lines "$manual_inv") 436 echo "# Format: repo | npub | prod_status | archive_status | context | action"
592 437 echo "#"
593 # Ensure counts are valid integers 438 echo "# Context meanings:"
594 count_no_action=${count_no_action:-0} 439 echo "# purgatory-expired = archive tried to sync but failed (30min timeout)"
595 count_action=${count_action:-0} 440 echo "# none = archive never tried or announcement missing"
596 count_manual=${count_manual:-0} 441 echo "#"
597 442 for line in "${RESYNC_LINES[@]}"; do
598 local total=$((count_no_action + count_action + count_manual)) 443 echo "$line"
599 444 done
600 # Handle division by zero 445} > "$RESYNC_FILE"
601 if [[ $total -eq 0 ]]; then 446
602 total=1 # Avoid division by zero in percentage calculations 447# Write manual-review.txt
603 log_warn "No repos were classified. Check input files." 448{
604 fi 449 echo "# Manual Review Required - Investigation needed"
605 450 echo "# Generated: $TIMESTAMP"
606 # ========================================================================= 451 echo "# Format: repo | npub | prod_status | archive_status | context | reason"
607 # STEP 6: Generate summary 452 echo "#"
608 # ========================================================================= 453 for line in "${REVIEW_LINES[@]}"; do
609 log_info "Generating summary..." 454 echo "$line"
610 455 done
611 cat > "$summary" << EOF 456} > "$REVIEW_FILE"
612# Migration Classification Summary
613Generated: $(date -Iseconds)
614Analysis Directory: $analysis_dir
615
616## Overview
617
618| Category | Count | Percentage |
619|----------|-------|------------|
620| No Action Required | $count_no_action | $(awk "BEGIN {printf \"%.1f\", ($count_no_action/$total)*100}")% |
621| Action Required | $count_action | $(awk "BEGIN {printf \"%.1f\", ($count_action/$total)*100}")% |
622| Manual Investigation | $count_manual | $(awk "BEGIN {printf \"%.1f\", ($count_manual/$total)*100}")% |
623| **Total** | **$total** | **100%** |
624
625## No Action Required ($count_no_action repos)
626
627These repositories are ready for migration or don't need migration:
628
629EOF
630
631 # Breakdown of no-action reasons
632 echo "| Reason | Count |" >> "$summary"
633 echo "|--------|-------|" >> "$summary"
634 grep -v '^#' "$no_action" 2>/dev/null | awk -F' \\| ' '{print $3}' | sort | uniq -c | sort -rn > "$tmp_dir/no_action_breakdown.txt" || true
635 while read -r cnt reason; do
636 echo "| $reason | $cnt |" >> "$summary"
637 done < "$tmp_dir/no_action_breakdown.txt"
638
639 cat >> "$summary" << EOF
640
641## Action Required ($count_action repos)
642
643These repositories need intervention before migration:
644
645EOF
646
647 # Breakdown of action reasons
648 echo "| Reason | Count | Suggested Action |" >> "$summary"
649 echo "|--------|-------|------------------|" >> "$summary"
650 grep -v '^#' "$action_req" 2>/dev/null | awk -F' \\| ' '{print $3 "|" $4}' | sort | uniq -c | sort -rn > "$tmp_dir/action_breakdown.txt" || true
651 while read -r cnt reason_action; do
652 reason=$(echo "$reason_action" | cut -d'|' -f1)
653 action=$(echo "$reason_action" | cut -d'|' -f2)
654 echo "| $reason | $cnt | $action |" >> "$summary"
655 done < "$tmp_dir/action_breakdown.txt"
656
657 cat >> "$summary" << EOF
658
659## Manual Investigation ($count_manual repos)
660
661These repositories have conflicting or unexpected states requiring human review:
662
663EOF
664
665 # Breakdown of manual investigation reasons
666 echo "| Reason | Count |" >> "$summary"
667 echo "|--------|-------|" >> "$summary"
668 grep -v '^#' "$manual_inv" 2>/dev/null | awk -F' \\| ' '{print $3}' | sort | uniq -c | sort -rn > "$tmp_dir/manual_breakdown.txt" || true
669 while read -r cnt reason; do
670 echo "| $reason | $cnt |" >> "$summary"
671 done < "$tmp_dir/manual_breakdown.txt"
672
673 # Pre-compute counts from temp files before they might be cleaned up
674 local prod_del_count archive_del_count
675 local prod_cat1_count prod_cat2_count prod_cat3_count prod_cat4_count
676 local archive_cat1_count archive_cat2_count archive_cat3_count archive_cat4_count
677 local parse_fail_count purgatory_count
678
679 prod_del_count=$(wc -l < "$tmp_dir/prod_deletions.txt" 2>/dev/null | tr -d ' ') || prod_del_count=0
680 archive_del_count=$(wc -l < "$tmp_dir/archive_deletions.txt" 2>/dev/null | tr -d ' ') || archive_del_count=0
681 prod_cat1_count=$(wc -l < "$tmp_dir/prod_cat1.txt" 2>/dev/null | tr -d ' ') || prod_cat1_count=0
682 prod_cat2_count=$(wc -l < "$tmp_dir/prod_cat2.txt" 2>/dev/null | tr -d ' ') || prod_cat2_count=0
683 prod_cat3_count=$(wc -l < "$tmp_dir/prod_cat3.txt" 2>/dev/null | tr -d ' ') || prod_cat3_count=0
684 prod_cat4_count=$(wc -l < "$tmp_dir/prod_cat4.txt" 2>/dev/null | tr -d ' ') || prod_cat4_count=0
685 archive_cat1_count=$(wc -l < "$tmp_dir/archive_cat1.txt" 2>/dev/null | tr -d ' ') || archive_cat1_count=0
686 archive_cat2_count=$(wc -l < "$tmp_dir/archive_cat2.txt" 2>/dev/null | tr -d ' ') || archive_cat2_count=0
687 archive_cat3_count=$(wc -l < "$tmp_dir/archive_cat3.txt" 2>/dev/null | tr -d ' ') || archive_cat3_count=0
688 archive_cat4_count=$(wc -l < "$tmp_dir/archive_cat4.txt" 2>/dev/null | tr -d ' ') || archive_cat4_count=0
689 parse_fail_count=$(wc -l < "$tmp_dir/parse_failures.txt" 2>/dev/null | tr -d ' ') || parse_fail_count=0
690 purgatory_count=$(wc -l < "$tmp_dir/purgatory_expired.txt" 2>/dev/null | tr -d ' ') || purgatory_count=0
691
692 cat >> "$summary" << EOF
693
694## Input Data Summary
695
696### Phase 1 (Events)
697- Prod deletions: $prod_del_count
698- Archive deletions: $archive_del_count
699
700### Phase 3 (Categories)
701**Prod:**
702- Category 1 (complete): $prod_cat1_count
703- Category 2 (empty): $prod_cat2_count
704- Category 3 (partial): $prod_cat3_count
705- Category 4 (no match): $prod_cat4_count
706
707**Archive:**
708- Category 1 (complete): $archive_cat1_count
709- Category 2 (empty): $archive_cat2_count
710- Category 3 (partial): $archive_cat3_count
711- Category 4 (no match): $archive_cat4_count
712
713### Phase 4 (Logs)
714- Parse failures: $parse_fail_count
715- Purgatory expired: $purgatory_count
716 457
717## Recommended Next Steps 458# ============================================================================
459# Phase 5: Generate summary
460# ============================================================================
718 461
7191. **Review action-required.txt** - Address these repos before migration 462log_info "Generating summary..."
7202. **Review manual-investigation.txt** - Investigate unusual states
7213. **Verify no-action-required.txt** - Spot-check a few repos to confirm
7224. **Plan migration window** - Schedule cutover when action items are resolved
723 463
724## Output Files 464TOTAL_READY="${#READY_LINES[@]}"
465TOTAL_RESYNC="${#RESYNC_LINES[@]}"
466TOTAL_REVIEW="${#REVIEW_LINES[@]}"
467TOTAL=$((TOTAL_READY + TOTAL_RESYNC + TOTAL_REVIEW))
725 468
726- \`results/no-action-required.txt\` - $count_no_action repos ready for migration 469# Calculate percentages
727- \`results/action-required.txt\` - $count_action repos needing intervention 470if [[ $TOTAL -gt 0 ]]; then
728- \`results/manual-investigation.txt\` - $count_manual repos needing human review 471 PCT_READY=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_READY / $TOTAL) * 100}")
729- \`results/summary.txt\` - This summary file 472 PCT_RESYNC=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_RESYNC / $TOTAL) * 100}")
730EOF 473 PCT_REVIEW=$(awk "BEGIN {printf \"%.1f\", ($TOTAL_REVIEW / $TOTAL) * 100}")
474else
475 PCT_READY="0.0"
476 PCT_RESYNC="0.0"
477 PCT_REVIEW="0.0"
478fi
731 479
732 # ========================================================================= 480{
733 # STEP 7: Display results 481 echo "# Migration Classification Summary"
734 # ========================================================================= 482 echo "Generated: $TIMESTAMP"
483 echo "Analysis Directory: $ANALYSIS_DIR"
735 echo "" 484 echo ""
736 log_info "=== Classification Complete ===" 485 echo "## Overview"
737 echo "" 486 echo ""
738 log_success "No Action Required: $count_no_action repos" 487 echo "| Category | Count | Percentage |"
739 log_warn "Action Required: $count_action repos" 488 echo "|----------|-------|------------|"
740 log_error "Manual Investigation: $count_manual repos" 489 echo "| Ready for Migration | $TOTAL_READY | $PCT_READY% |"
490 echo "| Needs Re-sync | $TOTAL_RESYNC | $PCT_RESYNC% |"
491 echo "| Manual Review | $TOTAL_REVIEW | $PCT_REVIEW% |"
492 echo "| **Total** | **$TOTAL** | **100%** |"
741 echo "" 493 echo ""
742 log_info "Total: $total repos classified" 494 echo "## Tier 1: Ready for Migration ($TOTAL_READY repos)"
743 echo "" 495 echo ""
744 log_info "Output files:" 496 echo "These repositories are ready for migration or don't need migration:"
745 echo " $no_action"
746 echo " $action_req"
747 echo " $manual_inv"
748 echo " $summary"
749 echo "" 497 echo ""
750 498 echo "| Reason | Count |"
751 # Show top action items 499 echo "|--------|-------|"
752 if [[ $count_action -gt 0 ]]; then 500 echo "| complete in both prod and archive | ${COUNTS[ready_complete_both]} |"
753 log_info "Top action items:" 501 echo "| deleted by user | ${COUNTS[ready_deleted]} |"
754 grep -v '^#' "$action_req" 2>/dev/null | awk -F' \\| ' '{print $3}' | sort | uniq -c | sort -rn | head -5 | while read -r cnt reason; do 502 echo "| empty in prod (user never pushed) | ${COUNTS[ready_empty_prod]} |"
755 echo " - $reason: $cnt repos" 503 echo "| archive-only (not in prod) | ${COUNTS[ready_archive_only]} |"
756 done 504 echo "| purgatory-only (not in prod) | ${COUNTS[ready_not_in_prod]} |"
757 echo "" 505 echo ""
758 fi 506 echo "## Tier 2: Needs Re-sync ($TOTAL_RESYNC repos)"
759 507 echo ""
760 # Show top investigation items 508 echo "These repositories need re-sync to archive before migration:"
761 if [[ $count_manual -gt 0 ]]; then 509 echo ""
762 log_info "Top investigation items:" 510 echo "| Reason | Count | Action |"
763 grep -v '^#' "$manual_inv" 2>/dev/null | awk -F' \\| ' '{print $3}' | sort | uniq -c | sort -rn | head -5 | while read -r cnt reason; do 511 echo "|--------|-------|--------|"
764 echo " - $reason: $cnt repos" 512 echo "| complete in prod, missing from archive | ${COUNTS[resync_missing_archive]} | trigger re-sync |"
765 done 513 echo "| complete in prod, incomplete in archive | ${COUNTS[resync_incomplete_archive]} | trigger re-sync |"
766 echo "" 514 echo ""
767 fi 515 echo "### Purgatory Context"
768 516 echo ""
769 log_info "See $summary for full details and recommended next steps." 517 echo "Repos in needs-resync.txt include purgatory context:"
770} 518 echo "- **purgatory-expired**: Archive tried to sync but failed (30min timeout)"
771 519 echo "- **none**: Archive never tried or announcement missing"
772main "$@" 520 echo ""
521 echo "## Tier 3: Manual Review ($TOTAL_REVIEW repos)"
522 echo ""
523 echo "These repositories require human investigation:"
524 echo ""
525 echo "| Reason | Count |"
526 echo "|--------|-------|"
527 echo "| partial in prod (cat3) | ${COUNTS[review_partial_prod]} |"
528 echo "| no-match in prod (cat4) | ${COUNTS[review_nomatch_prod]} |"
529 echo "| complete in prod with parse failure | ${COUNTS[review_parse_failure]} |"
530 echo ""
531 echo "## Input Data Summary"
532 echo ""
533 echo "### Prod Categories"
534 echo "- Category 1 (complete): $(wc -l < "$PROD_DIR/category1-complete-match.txt")"
535 echo "- Category 2 (empty): $(wc -l < "$PROD_DIR/category2-empty-blank.txt")"
536 echo "- Category 3 (partial): $(wc -l < "$PROD_DIR/category3-partial-match.txt")"
537 echo "- Category 4 (no match): $(wc -l < "$PROD_DIR/category4-no-match.txt")"
538 echo ""
539 echo "### Archive Categories"
540 echo "- Category 1 (complete): $(wc -l < "$ARCHIVE_DIR/category1-complete-match.txt")"
541 echo "- Category 2 (empty): $(wc -l < "$ARCHIVE_DIR/category2-empty-blank.txt")"
542 echo "- Category 3 (partial): $(wc -l < "$ARCHIVE_DIR/category3-partial-match.txt")"
543 echo "- Category 4 (no match): $(wc -l < "$ARCHIVE_DIR/category4-no-match.txt")"
544 echo ""
545 echo "### Logs"
546 echo "- Parse failures: $(grep -c -v '^#' "$LOGS_DIR/parse-failures.txt" 2>/dev/null || echo 0)"
547 echo "- Purgatory expired: $(grep -c -v '^#' "$LOGS_DIR/purgatory-expired.txt" 2>/dev/null || echo 0)"
548 echo ""
549 echo "## Output Files"
550 echo ""
551 echo "- \`results/ready-for-migration.txt\` - $TOTAL_READY repos ready for migration"
552 echo "- \`results/needs-resync.txt\` - $TOTAL_RESYNC repos needing re-sync"
553 echo "- \`results/manual-review.txt\` - $TOTAL_REVIEW repos needing investigation"
554 echo "- \`results/summary.txt\` - This summary file"
555 echo ""
556 echo "## Recommended Next Steps"
557 echo ""
558 echo "1. **Review needs-resync.txt** - Trigger re-sync for these repos"
559 echo "2. **Review manual-review.txt** - Investigate unusual states"
560 echo "3. **Verify ready-for-migration.txt** - Spot-check a few repos"
561 echo "4. **Plan migration window** - Schedule cutover when action items resolved"
562} > "$SUMMARY_FILE"
563
564# ============================================================================
565# Phase 6: Print summary to console
566# ============================================================================
567
568echo ""
569log_success "Classification complete!"
570echo ""
571echo "=== Summary ==="
572echo "Ready for Migration: $TOTAL_READY ($PCT_READY%)"
573echo " - Complete in both: ${COUNTS[ready_complete_both]}"
574echo " - Deleted by user: ${COUNTS[ready_deleted]}"
575echo " - Empty in prod: ${COUNTS[ready_empty_prod]}"
576echo " - Archive-only: ${COUNTS[ready_archive_only]}"
577echo " - Purgatory-only: ${COUNTS[ready_not_in_prod]}"
578echo ""
579echo "Needs Re-sync: $TOTAL_RESYNC ($PCT_RESYNC%)"
580echo " - Missing from archive: ${COUNTS[resync_missing_archive]}"
581echo " - Incomplete in archive: ${COUNTS[resync_incomplete_archive]}"
582echo ""
583echo "Manual Review: $TOTAL_REVIEW ($PCT_REVIEW%)"
584echo " - Partial in prod: ${COUNTS[review_partial_prod]}"
585echo " - No-match in prod: ${COUNTS[review_nomatch_prod]}"
586echo " - Parse failures: ${COUNTS[review_parse_failure]}"
587echo ""
588echo "Total: $TOTAL repos"
589echo ""
590echo "Output files:"
591echo " $READY_FILE"
592echo " $RESYNC_FILE"
593echo " $REVIEW_FILE"
594echo " $SUMMARY_FILE"