diff options
| author | DanConwayDev <DanConwayDev@protonmail.com> | 2026-01-23 11:41:12 +0000 |
|---|---|---|
| committer | DanConwayDev <DanConwayDev@protonmail.com> | 2026-01-27 20:37:58 +0000 |
| commit | 323336c84613b74921ebc75bf46ccd2d8f9cd6cc (patch) | |
| tree | 81381119ffa818160b6ec3faa45a9e97d2fa8d04 /docs/how-to/migration-scripts | |
| parent | 73a366cbd7be4edf9c74194cd0891c80a15236a5 (diff) | |
Add Phase 5 migration script for final classification
- Combines all data sources from Phases 1-4
- Produces three actionable outputs: no-action, action-required, manual-investigation
- Generates comprehensive summary with recommendations
- Handles missing Phase 4 logs gracefully
- Classification logic for migration decision-making
Diffstat (limited to 'docs/how-to/migration-scripts')
| -rwxr-xr-x | docs/how-to/migration-scripts/40-classify-actions.sh | 770 |
1 files changed, 770 insertions, 0 deletions
diff --git a/docs/how-to/migration-scripts/40-classify-actions.sh b/docs/how-to/migration-scripts/40-classify-actions.sh new file mode 100755 index 0000000..9fc718f --- /dev/null +++ b/docs/how-to/migration-scripts/40-classify-actions.sh | |||
| @@ -0,0 +1,770 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | # | ||
| 3 | # 40-classify-actions.sh - Final classification of repos for migration action | ||
| 4 | # | ||
| 5 | # PHASE 5 of the ngit-relay to ngit-grasp migration analysis pipeline. | ||
| 6 | # Combines all data sources from previous phases to produce actionable results. | ||
| 7 | # | ||
| 8 | # USAGE: | ||
| 9 | # ./40-classify-actions.sh <analysis-dir> | ||
| 10 | # | ||
| 11 | # EXAMPLES: | ||
| 12 | # ./40-classify-actions.sh work/migration-analysis-20260122-1430 | ||
| 13 | # | ||
| 14 | # INPUT DIRECTORY STRUCTURE: | ||
| 15 | # <analysis-dir>/ | ||
| 16 | # ├── prod/ | ||
| 17 | # │ ├── raw/ | ||
| 18 | # │ │ └── deletions.json # Phase 1: kind 5 deletion events | ||
| 19 | # │ ├── category1-complete-match.txt # Phase 3: complete git sync | ||
| 20 | # │ ├── category2-empty-blank.txt # Phase 3: no git data | ||
| 21 | # │ ├── category3-partial-match.txt # Phase 3: partial git sync | ||
| 22 | # │ └── category4-no-match.txt # Phase 3: git exists, refs don't match | ||
| 23 | # ├── archive/ | ||
| 24 | # │ ├── raw/ | ||
| 25 | # │ │ └── deletions.json | ||
| 26 | # │ ├── category1-complete-match.txt | ||
| 27 | # │ ├── category2-empty-blank.txt | ||
| 28 | # │ ├── category3-partial-match.txt | ||
| 29 | # │ └── category4-no-match.txt | ||
| 30 | # ├── comparison/ | ||
| 31 | # │ ├── complete-in-both.txt # Phase 3: no action needed | ||
| 32 | # │ ├── complete-prod-missing-archive.txt # Phase 3: needs investigation | ||
| 33 | # │ ├── complete-prod-incomplete-archive.txt # Phase 3: sync in progress? | ||
| 34 | # │ ├── incomplete-in-both.txt # Phase 3: git incomplete | ||
| 35 | # │ └── in-archive-not-prod.txt # Phase 3: deleted or new | ||
| 36 | # └── logs/ | ||
| 37 | # ├── parse-failures.txt # Phase 4: events that failed to parse | ||
| 38 | # └── purgatory-expired.txt # Phase 4: repos that expired from purgatory | ||
| 39 | # | ||
| 40 | # OUTPUT: | ||
| 41 | # <analysis-dir>/results/ | ||
| 42 | # ├── no-action-required.txt # Repos that are fine as-is | ||
| 43 | # ├── action-required.txt # Repos needing intervention | ||
| 44 | # ├── manual-investigation.txt # Repos needing human review | ||
| 45 | # └── summary.txt # Human-readable summary | ||
| 46 | # | ||
| 47 | # OUTPUT FORMATS: | ||
| 48 | # no-action-required.txt: | ||
| 49 | # repo | npub | reason | ||
| 50 | # | ||
| 51 | # action-required.txt: | ||
| 52 | # repo | npub | reason | suggested_action | ||
| 53 | # | ||
| 54 | # manual-investigation.txt: | ||
| 55 | # repo | npub | reason | context | ||
| 56 | # | ||
| 57 | # CLASSIFICATION LOGIC: | ||
| 58 | # | ||
| 59 | # NO ACTION REQUIRED: | ||
| 60 | # - Complete in both prod and archive (successfully migrated) | ||
| 61 | # - Empty/blank in both (user never pushed any data) | ||
| 62 | # - Deleted by user (kind 5 deletion event exists) | ||
| 63 | # - In purgatory expiry logs (system already handled it) | ||
| 64 | # | ||
| 65 | # ACTION REQUIRED: | ||
| 66 | # - Complete in prod, missing from archive → Re-sync needed | ||
| 67 | # - Complete in prod, incomplete in archive → Wait for sync or re-trigger | ||
| 68 | # - Partial match in prod → Investigate why refs don't match | ||
| 69 | # - No match (category 4) → Investigate git data corruption | ||
| 70 | # - Parse failures → Fix event format or re-announce | ||
| 71 | # | ||
| 72 | # MANUAL INVESTIGATION: | ||
| 73 | # - Conflicting states (e.g., complete in prod but parse failure logged) | ||
| 74 | # - In archive but not prod (deleted? or new announcement?) | ||
| 75 | # - Multiple issues for same repo | ||
| 76 | # - Unexpected state combinations | ||
| 77 | # | ||
| 78 | # PREREQUISITES: | ||
| 79 | # - jq (for parsing JSON) | ||
| 80 | # - awk, sort, comm (standard Unix tools) | ||
| 81 | # | ||
| 82 | # RUNTIME: < 5 seconds (local processing only) | ||
| 83 | # | ||
| 84 | # SEE ALSO: | ||
| 85 | # docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide | ||
| 86 | # 01-fetch-events.sh - Phase 1 (fetch events) | ||
| 87 | # 10-check-git-sync.sh - Phase 2 (git sync check) | ||
| 88 | # 20-categorize.sh, 21-compare-relays.sh - Phase 3 (categorize and compare) | ||
| 89 | # 30-extract-parse-failures.sh, 31-extract-purgatory-expiry.sh - Phase 4 (logs) | ||
| 90 | # | ||
| 91 | |||
| 92 | set -euo pipefail | ||
| 93 | |||
| 94 | # Colors for output (disabled if not a terminal) | ||
| 95 | if [[ -t 1 ]]; then | ||
| 96 | RED='\033[0;31m' | ||
| 97 | GREEN='\033[0;32m' | ||
| 98 | YELLOW='\033[0;33m' | ||
| 99 | BLUE='\033[0;34m' | ||
| 100 | BOLD='\033[1m' | ||
| 101 | NC='\033[0m' | ||
| 102 | else | ||
| 103 | RED='' | ||
| 104 | GREEN='' | ||
| 105 | YELLOW='' | ||
| 106 | BLUE='' | ||
| 107 | BOLD='' | ||
| 108 | NC='' | ||
| 109 | fi | ||
| 110 | |||
| 111 | log_info() { | ||
| 112 | echo -e "${BLUE}[INFO]${NC} $*" >&2 | ||
| 113 | } | ||
| 114 | |||
| 115 | log_success() { | ||
| 116 | echo -e "${GREEN}[OK]${NC} $*" >&2 | ||
| 117 | } | ||
| 118 | |||
| 119 | log_warn() { | ||
| 120 | echo -e "${YELLOW}[WARN]${NC} $*" >&2 | ||
| 121 | } | ||
| 122 | |||
| 123 | log_error() { | ||
| 124 | echo -e "${RED}[ERROR]${NC} $*" >&2 | ||
| 125 | } | ||
| 126 | |||
| 127 | usage() { | ||
| 128 | echo "Usage: $0 <analysis-dir>" | ||
| 129 | echo "" | ||
| 130 | echo "Arguments:" | ||
| 131 | echo " analysis-dir Directory containing Phase 1-4 output" | ||
| 132 | echo "" | ||
| 133 | echo "Examples:" | ||
| 134 | echo " $0 work/migration-analysis-20260122-1430" | ||
| 135 | echo "" | ||
| 136 | echo "Required input structure:" | ||
| 137 | echo " <analysis-dir>/prod/category*.txt" | ||
| 138 | echo " <analysis-dir>/archive/category*.txt" | ||
| 139 | echo " <analysis-dir>/comparison/*.txt" | ||
| 140 | echo " <analysis-dir>/logs/*.txt (optional)" | ||
| 141 | echo " <analysis-dir>/prod/raw/deletions.json" | ||
| 142 | echo "" | ||
| 143 | echo "Output:" | ||
| 144 | echo " <analysis-dir>/results/no-action-required.txt" | ||
| 145 | echo " <analysis-dir>/results/action-required.txt" | ||
| 146 | echo " <analysis-dir>/results/manual-investigation.txt" | ||
| 147 | echo " <analysis-dir>/results/summary.txt" | ||
| 148 | exit 1 | ||
| 149 | } | ||
| 150 | |||
| 151 | # Extract repo|npub key from category line | ||
| 152 | # Input: "repo | npub | state_refs=N | ..." | ||
| 153 | # Output: "repo|npub" | ||
| 154 | extract_key() { | ||
| 155 | awk -F' \\| ' '{print $1 "|" $2}' | ||
| 156 | } | ||
| 157 | |||
| 158 | # Extract repo from category line | ||
| 159 | # Input: "repo | npub | ..." | ||
| 160 | # Output: "repo" | ||
| 161 | extract_repo() { | ||
| 162 | awk -F' \\| ' '{print $1}' | ||
| 163 | } | ||
| 164 | |||
| 165 | # Extract npub from category line | ||
| 166 | # Input: "repo | npub | ..." | ||
| 167 | # Output: "npub" | ||
| 168 | extract_npub() { | ||
| 169 | awk -F' \\| ' '{print $2}' | ||
| 170 | } | ||
| 171 | |||
| 172 | # Check if a file exists and has content (ignoring comment lines) | ||
| 173 | file_has_content() { | ||
| 174 | local file="$1" | ||
| 175 | if [[ ! -f "$file" ]]; then | ||
| 176 | return 1 | ||
| 177 | fi | ||
| 178 | # Check for non-comment, non-empty lines | ||
| 179 | grep -v '^#' "$file" 2>/dev/null | grep -q '.' 2>/dev/null | ||
| 180 | } | ||
| 181 | |||
| 182 | # Count non-comment lines in a file | ||
| 183 | count_lines() { | ||
| 184 | local file="$1" | ||
| 185 | if [[ ! -f "$file" ]]; then | ||
| 186 | echo "0" | ||
| 187 | return | ||
| 188 | fi | ||
| 189 | local count | ||
| 190 | count=$(grep -v '^#' "$file" 2>/dev/null | grep -c '.' 2>/dev/null) || count=0 | ||
| 191 | # Ensure we return a clean integer | ||
| 192 | echo "${count:-0}" | ||
| 193 | } | ||
| 194 | |||
| 195 | # Parse deletions.json to extract deleted repo identifiers | ||
| 196 | # Kind 5 events have "e" tags pointing to the deleted event | ||
| 197 | # We need to cross-reference with announcements to get repo/npub | ||
| 198 | # For now, we extract the pubkey and any "a" tags (addressable event references) | ||
| 199 | parse_deletions() { | ||
| 200 | local deletions_file="$1" | ||
| 201 | local output_file="$2" | ||
| 202 | |||
| 203 | if [[ ! -f "$deletions_file" ]]; then | ||
| 204 | touch "$output_file" | ||
| 205 | return | ||
| 206 | fi | ||
| 207 | |||
| 208 | # Extract deletion targets from kind 5 events | ||
| 209 | # Kind 5 events can reference: | ||
| 210 | # - "e" tag: specific event ID | ||
| 211 | # - "a" tag: addressable event (kind:pubkey:identifier) | ||
| 212 | # For 30617 announcements, "a" tag format is: 30617:<pubkey>:<repo-identifier> | ||
| 213 | jq -r ' | ||
| 214 | select(.kind == 5) | | ||
| 215 | .pubkey as $pubkey | | ||
| 216 | .tags[] | | ||
| 217 | select(.[0] == "a") | | ||
| 218 | .[1] | | ||
| 219 | split(":") | | ||
| 220 | select(.[0] == "30617") | | ||
| 221 | "\(.[2])|\($pubkey)" | ||
| 222 | ' "$deletions_file" 2>/dev/null | sort -u > "$output_file" || touch "$output_file" | ||
| 223 | } | ||
| 224 | |||
| 225 | # Build a lookup set from a file (repo|npub format) | ||
| 226 | # Returns keys one per line | ||
| 227 | build_key_set() { | ||
| 228 | local file="$1" | ||
| 229 | if [[ ! -f "$file" ]]; then | ||
| 230 | return 0 | ||
| 231 | fi | ||
| 232 | # Use || true to prevent pipefail from exiting on empty grep | ||
| 233 | { grep -v '^#' "$file" 2>/dev/null || true; } | extract_key | sort -u | ||
| 234 | } | ||
| 235 | |||
| 236 | # Main classification logic | ||
| 237 | main() { | ||
| 238 | if [[ $# -ne 1 ]]; then | ||
| 239 | usage | ||
| 240 | fi | ||
| 241 | |||
| 242 | local analysis_dir="$1" | ||
| 243 | |||
| 244 | # Validate input directory | ||
| 245 | if [[ ! -d "$analysis_dir" ]]; then | ||
| 246 | log_error "Analysis directory not found: $analysis_dir" | ||
| 247 | exit 1 | ||
| 248 | fi | ||
| 249 | |||
| 250 | # Check for required subdirectories | ||
| 251 | local prod_dir="$analysis_dir/prod" | ||
| 252 | local archive_dir="$analysis_dir/archive" | ||
| 253 | local comparison_dir="$analysis_dir/comparison" | ||
| 254 | local logs_dir="$analysis_dir/logs" | ||
| 255 | local results_dir="$analysis_dir/results" | ||
| 256 | |||
| 257 | for dir in "$prod_dir" "$archive_dir" "$comparison_dir"; do | ||
| 258 | if [[ ! -d "$dir" ]]; then | ||
| 259 | log_error "Required directory not found: $dir" | ||
| 260 | log_error "Run Phases 1-3 first to generate input data." | ||
| 261 | exit 1 | ||
| 262 | fi | ||
| 263 | done | ||
| 264 | |||
| 265 | # Check for required category files | ||
| 266 | if [[ ! -f "$prod_dir/category1-complete-match.txt" ]]; then | ||
| 267 | log_error "Missing category files in $prod_dir" | ||
| 268 | log_error "Run Phase 3 (20-categorize.sh) first." | ||
| 269 | exit 1 | ||
| 270 | fi | ||
| 271 | |||
| 272 | log_info "Starting final classification" | ||
| 273 | log_info "Analysis directory: $analysis_dir" | ||
| 274 | |||
| 275 | # Create output directory | ||
| 276 | mkdir -p "$results_dir" | ||
| 277 | |||
| 278 | # Create temp directory for intermediate files | ||
| 279 | local tmp_dir | ||
| 280 | tmp_dir=$(mktemp -d) | ||
| 281 | # shellcheck disable=SC2064 | ||
| 282 | trap "rm -rf '$tmp_dir'" EXIT | ||
| 283 | |||
| 284 | # Initialize output files | ||
| 285 | local no_action="$results_dir/no-action-required.txt" | ||
| 286 | local action_req="$results_dir/action-required.txt" | ||
| 287 | local manual_inv="$results_dir/manual-investigation.txt" | ||
| 288 | local summary="$results_dir/summary.txt" | ||
| 289 | |||
| 290 | # Write headers | ||
| 291 | { | ||
| 292 | echo "# No Action Required - Repos that are fine as-is" | ||
| 293 | echo "# Generated: $(date -Iseconds)" | ||
| 294 | echo "# Format: repo | npub | reason" | ||
| 295 | echo "#" | ||
| 296 | } > "$no_action" | ||
| 297 | |||
| 298 | { | ||
| 299 | echo "# Action Required - Repos needing intervention" | ||
| 300 | echo "# Generated: $(date -Iseconds)" | ||
| 301 | echo "# Format: repo | npub | reason | suggested_action" | ||
| 302 | echo "#" | ||
| 303 | } > "$action_req" | ||
| 304 | |||
| 305 | { | ||
| 306 | echo "# Manual Investigation Required - Repos needing human review" | ||
| 307 | echo "# Generated: $(date -Iseconds)" | ||
| 308 | echo "# Format: repo | npub | reason | context" | ||
| 309 | echo "#" | ||
| 310 | } > "$manual_inv" | ||
| 311 | |||
| 312 | # ========================================================================= | ||
| 313 | # STEP 1: Parse deletion events | ||
| 314 | # ========================================================================= | ||
| 315 | log_info "Parsing deletion events..." | ||
| 316 | |||
| 317 | parse_deletions "$prod_dir/raw/deletions.json" "$tmp_dir/prod_deletions.txt" | ||
| 318 | parse_deletions "$archive_dir/raw/deletions.json" "$tmp_dir/archive_deletions.txt" | ||
| 319 | |||
| 320 | # Combine deletions (union of both) | ||
| 321 | cat "$tmp_dir/prod_deletions.txt" "$tmp_dir/archive_deletions.txt" 2>/dev/null | sort -u > "$tmp_dir/all_deletions.txt" | ||
| 322 | |||
| 323 | local deletion_count | ||
| 324 | deletion_count=$(wc -l < "$tmp_dir/all_deletions.txt" | tr -d ' ') | ||
| 325 | log_info "Found $deletion_count deletion requests" | ||
| 326 | |||
| 327 | # ========================================================================= | ||
| 328 | # STEP 2: Parse log-based categories (Phase 4) | ||
| 329 | # ========================================================================= | ||
| 330 | log_info "Parsing log-based categories..." | ||
| 331 | |||
| 332 | # Parse failures: repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason | ||
| 333 | if [[ -f "$logs_dir/parse-failures.txt" ]] && file_has_content "$logs_dir/parse-failures.txt"; then | ||
| 334 | grep -v '^#' "$logs_dir/parse-failures.txt" | awk -F'\t' '{print $1 "|" $2}' | sort -u > "$tmp_dir/parse_failures.txt" | ||
| 335 | log_info "Found $(wc -l < "$tmp_dir/parse_failures.txt" | tr -d ' ') parse failure entries" | ||
| 336 | else | ||
| 337 | touch "$tmp_dir/parse_failures.txt" | ||
| 338 | log_info "No parse failures found (logs may be empty or not yet generated)" | ||
| 339 | fi | ||
| 340 | |||
| 341 | # Purgatory expired: repo<TAB>npub<TAB>timestamp<TAB>reason | ||
| 342 | if [[ -f "$logs_dir/purgatory-expired.txt" ]] && file_has_content "$logs_dir/purgatory-expired.txt"; then | ||
| 343 | grep -v '^#' "$logs_dir/purgatory-expired.txt" | awk -F'\t' '{print $1 "|" $2}' | sort -u > "$tmp_dir/purgatory_expired.txt" | ||
| 344 | log_info "Found $(wc -l < "$tmp_dir/purgatory_expired.txt" | tr -d ' ') purgatory expiry entries" | ||
| 345 | else | ||
| 346 | touch "$tmp_dir/purgatory_expired.txt" | ||
| 347 | log_info "No purgatory expiry entries found (logs may be empty or not yet generated)" | ||
| 348 | fi | ||
| 349 | |||
| 350 | # ========================================================================= | ||
| 351 | # STEP 3: Build lookup tables from category files | ||
| 352 | # ========================================================================= | ||
| 353 | log_info "Building lookup tables..." | ||
| 354 | |||
| 355 | # Build key sets for each category (prod) | ||
| 356 | build_key_set "$prod_dir/category1-complete-match.txt" > "$tmp_dir/prod_cat1.txt" | ||
| 357 | build_key_set "$prod_dir/category2-empty-blank.txt" > "$tmp_dir/prod_cat2.txt" | ||
| 358 | build_key_set "$prod_dir/category3-partial-match.txt" > "$tmp_dir/prod_cat3.txt" | ||
| 359 | build_key_set "$prod_dir/category4-no-match.txt" > "$tmp_dir/prod_cat4.txt" | ||
| 360 | |||
| 361 | # Build key sets for each category (archive) | ||
| 362 | build_key_set "$archive_dir/category1-complete-match.txt" > "$tmp_dir/archive_cat1.txt" | ||
| 363 | build_key_set "$archive_dir/category2-empty-blank.txt" > "$tmp_dir/archive_cat2.txt" | ||
| 364 | build_key_set "$archive_dir/category3-partial-match.txt" > "$tmp_dir/archive_cat3.txt" | ||
| 365 | build_key_set "$archive_dir/category4-no-match.txt" > "$tmp_dir/archive_cat4.txt" | ||
| 366 | |||
| 367 | # All repos in prod | ||
| 368 | cat "$tmp_dir"/prod_cat*.txt 2>/dev/null | sort -u > "$tmp_dir/all_prod.txt" || true | ||
| 369 | |||
| 370 | # All repos in archive | ||
| 371 | cat "$tmp_dir"/archive_cat*.txt 2>/dev/null | sort -u > "$tmp_dir/all_archive.txt" || true | ||
| 372 | |||
| 373 | # ========================================================================= | ||
| 374 | # STEP 4: Process comparison files and apply classification | ||
| 375 | # ========================================================================= | ||
| 376 | log_info "Applying classification logic..." | ||
| 377 | |||
| 378 | # Track processed repos to detect duplicates/conflicts | ||
| 379 | > "$tmp_dir/processed.txt" | ||
| 380 | |||
| 381 | # Counters | ||
| 382 | local count_no_action=0 | ||
| 383 | local count_action=0 | ||
| 384 | local count_manual=0 | ||
| 385 | |||
| 386 | # --- NO ACTION: Complete in both --- | ||
| 387 | if [[ -f "$comparison_dir/complete-in-both.txt" ]]; then | ||
| 388 | while IFS= read -r line; do | ||
| 389 | [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue | ||
| 390 | |||
| 391 | repo=$(echo "$line" | extract_repo) | ||
| 392 | npub=$(echo "$line" | extract_npub) | ||
| 393 | key="${repo}|${npub}" | ||
| 394 | |||
| 395 | # Check if deleted (still no action, but different reason) | ||
| 396 | if grep -qF "$key" "$tmp_dir/all_deletions.txt" 2>/dev/null; then | ||
| 397 | echo "$repo | $npub | deleted by user (also complete in both)" >> "$no_action" | ||
| 398 | else | ||
| 399 | echo "$repo | $npub | complete in both prod and archive" >> "$no_action" | ||
| 400 | fi | ||
| 401 | echo "$key" >> "$tmp_dir/processed.txt" | ||
| 402 | ((count_no_action++)) || true | ||
| 403 | done < "$comparison_dir/complete-in-both.txt" | ||
| 404 | fi | ||
| 405 | |||
| 406 | # --- NO ACTION: Deleted by user (not already processed) --- | ||
| 407 | while IFS='|' read -r repo npub; do | ||
| 408 | [[ -z "$repo" ]] && continue | ||
| 409 | key="${repo}|${npub}" | ||
| 410 | |||
| 411 | # Skip if already processed | ||
| 412 | if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then | ||
| 413 | continue | ||
| 414 | fi | ||
| 415 | |||
| 416 | # Convert pubkey to npub if needed (deletions use hex pubkey) | ||
| 417 | # For now, just use the pubkey as-is since we're matching by repo | ||
| 418 | echo "$repo | $npub | deleted by user" >> "$no_action" | ||
| 419 | echo "$key" >> "$tmp_dir/processed.txt" | ||
| 420 | ((count_no_action++)) || true | ||
| 421 | done < "$tmp_dir/all_deletions.txt" | ||
| 422 | |||
| 423 | # --- NO ACTION: Empty/blank in both --- | ||
| 424 | # Find repos that are category 2 in both prod and archive | ||
| 425 | comm -12 "$tmp_dir/prod_cat2.txt" "$tmp_dir/archive_cat2.txt" 2>/dev/null | while IFS='|' read -r repo npub; do | ||
| 426 | [[ -z "$repo" ]] && continue | ||
| 427 | key="${repo}|${npub}" | ||
| 428 | |||
| 429 | if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then | ||
| 430 | continue | ||
| 431 | fi | ||
| 432 | |||
| 433 | echo "$repo | $npub | empty/blank in both (user never pushed)" >> "$no_action" | ||
| 434 | echo "$key" >> "$tmp_dir/processed.txt" | ||
| 435 | done | ||
| 436 | |||
| 437 | # --- NO ACTION: Purgatory expired (system handled it) --- | ||
| 438 | while IFS='|' read -r repo npub; do | ||
| 439 | [[ -z "$repo" ]] && continue | ||
| 440 | key="${repo}|${npub}" | ||
| 441 | |||
| 442 | if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then | ||
| 443 | continue | ||
| 444 | fi | ||
| 445 | |||
| 446 | echo "$repo | $npub | purgatory expired (system already handled)" >> "$no_action" | ||
| 447 | echo "$key" >> "$tmp_dir/processed.txt" | ||
| 448 | ((count_no_action++)) || true | ||
| 449 | done < "$tmp_dir/purgatory_expired.txt" | ||
| 450 | |||
| 451 | # --- ACTION REQUIRED: Complete in prod, missing from archive --- | ||
| 452 | if [[ -f "$comparison_dir/complete-prod-missing-archive.txt" ]]; then | ||
| 453 | while IFS= read -r line; do | ||
| 454 | [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue | ||
| 455 | |||
| 456 | repo=$(echo "$line" | extract_repo) | ||
| 457 | npub=$(echo "$line" | extract_npub) | ||
| 458 | key="${repo}|${npub}" | ||
| 459 | |||
| 460 | if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then | ||
| 461 | continue | ||
| 462 | fi | ||
| 463 | |||
| 464 | # Check for parse failure | ||
| 465 | if grep -qF "$key" "$tmp_dir/parse_failures.txt" 2>/dev/null; then | ||
| 466 | echo "$repo | $npub | complete in prod, missing from archive, parse failure logged | investigate parse failure, may need re-announcement" >> "$manual_inv" | ||
| 467 | echo "$key" >> "$tmp_dir/processed.txt" | ||
| 468 | ((count_manual++)) || true | ||
| 469 | else | ||
| 470 | echo "$repo | $npub | complete in prod, missing from archive | trigger re-sync or investigate why not archived" >> "$action_req" | ||
| 471 | echo "$key" >> "$tmp_dir/processed.txt" | ||
| 472 | ((count_action++)) || true | ||
| 473 | fi | ||
| 474 | done < "$comparison_dir/complete-prod-missing-archive.txt" | ||
| 475 | fi | ||
| 476 | |||
| 477 | # --- ACTION REQUIRED: Complete in prod, incomplete in archive --- | ||
| 478 | if [[ -f "$comparison_dir/complete-prod-incomplete-archive.txt" ]]; then | ||
| 479 | while IFS= read -r line; do | ||
| 480 | [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue | ||
| 481 | |||
| 482 | repo=$(echo "$line" | extract_repo) | ||
| 483 | npub=$(echo "$line" | extract_npub) | ||
| 484 | key="${repo}|${npub}" | ||
| 485 | |||
| 486 | if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then | ||
| 487 | continue | ||
| 488 | fi | ||
| 489 | |||
| 490 | # Extract archive status from line | ||
| 491 | archive_status=$(echo "$line" | grep -oP 'archive=\K[^ ]+' || echo "unknown") | ||
| 492 | |||
| 493 | echo "$repo | $npub | complete in prod, $archive_status in archive | wait for sync to complete or trigger re-sync" >> "$action_req" | ||
| 494 | echo "$key" >> "$tmp_dir/processed.txt" | ||
| 495 | ((count_action++)) || true | ||
| 496 | done < "$comparison_dir/complete-prod-incomplete-archive.txt" | ||
| 497 | fi | ||
| 498 | |||
| 499 | # --- ACTION REQUIRED: Incomplete in both --- | ||
| 500 | if [[ -f "$comparison_dir/incomplete-in-both.txt" ]]; then | ||
| 501 | while IFS= read -r line; do | ||
| 502 | [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue | ||
| 503 | |||
| 504 | repo=$(echo "$line" | extract_repo) | ||
| 505 | npub=$(echo "$line" | extract_npub) | ||
| 506 | key="${repo}|${npub}" | ||
| 507 | |||
| 508 | if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then | ||
| 509 | continue | ||
| 510 | fi | ||
| 511 | |||
| 512 | # Extract statuses | ||
| 513 | prod_status=$(echo "$line" | grep -oP 'prod=\K[^ ]+' | tr -d '|' || echo "unknown") | ||
| 514 | archive_status=$(echo "$line" | grep -oP 'archive=\K[^ ]+' || echo "unknown") | ||
| 515 | |||
| 516 | echo "$repo | $npub | incomplete in both (prod=$prod_status, archive=$archive_status) | investigate git data source, may need user to re-push" >> "$action_req" | ||
| 517 | echo "$key" >> "$tmp_dir/processed.txt" | ||
| 518 | ((count_action++)) || true | ||
| 519 | done < "$comparison_dir/incomplete-in-both.txt" | ||
| 520 | fi | ||
| 521 | |||
| 522 | # --- MANUAL INVESTIGATION: In archive but not prod --- | ||
| 523 | if [[ -f "$comparison_dir/in-archive-not-prod.txt" ]]; then | ||
| 524 | while IFS= read -r line; do | ||
| 525 | [[ "$line" =~ ^#.*$ || -z "$line" ]] && continue | ||
| 526 | |||
| 527 | repo=$(echo "$line" | extract_repo) | ||
| 528 | npub=$(echo "$line" | extract_npub) | ||
| 529 | key="${repo}|${npub}" | ||
| 530 | |||
| 531 | if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then | ||
| 532 | continue | ||
| 533 | fi | ||
| 534 | |||
| 535 | archive_status=$(echo "$line" | grep -oP 'archive=\K[^ ]+' || echo "unknown") | ||
| 536 | |||
| 537 | # Check if it was deleted | ||
| 538 | if grep -qF "$key" "$tmp_dir/all_deletions.txt" 2>/dev/null; then | ||
| 539 | echo "$repo | $npub | in archive not prod, deletion exists | verify deletion was intentional" >> "$manual_inv" | ||
| 540 | else | ||
| 541 | echo "$repo | $npub | in archive ($archive_status) but not in prod | may be new announcement or deleted from prod" >> "$manual_inv" | ||
| 542 | fi | ||
| 543 | echo "$key" >> "$tmp_dir/processed.txt" | ||
| 544 | ((count_manual++)) || true | ||
| 545 | done < "$comparison_dir/in-archive-not-prod.txt" | ||
| 546 | fi | ||
| 547 | |||
| 548 | # --- ACTION REQUIRED: Parse failures not yet processed --- | ||
| 549 | while IFS='|' read -r repo npub; do | ||
| 550 | [[ -z "$repo" ]] && continue | ||
| 551 | key="${repo}|${npub}" | ||
| 552 | |||
| 553 | if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then | ||
| 554 | continue | ||
| 555 | fi | ||
| 556 | |||
| 557 | echo "$repo | $npub | parse failure logged | fix event format or request user to re-announce" >> "$action_req" | ||
| 558 | echo "$key" >> "$tmp_dir/processed.txt" | ||
| 559 | ((count_action++)) || true | ||
| 560 | done < "$tmp_dir/parse_failures.txt" | ||
| 561 | |||
| 562 | # --- MANUAL INVESTIGATION: Prod category 3/4 not yet processed --- | ||
| 563 | for cat_file in "$tmp_dir/prod_cat3.txt" "$tmp_dir/prod_cat4.txt"; do | ||
| 564 | [[ ! -f "$cat_file" ]] && continue | ||
| 565 | cat_name=$(basename "$cat_file" .txt | sed 's/prod_//') | ||
| 566 | while IFS='|' read -r repo npub; do | ||
| 567 | [[ -z "$repo" ]] && continue | ||
| 568 | key="${repo}|${npub}" | ||
| 569 | |||
| 570 | if grep -qF "$key" "$tmp_dir/processed.txt" 2>/dev/null; then | ||
| 571 | continue | ||
| 572 | fi | ||
| 573 | |||
| 574 | if [[ "$cat_name" == "cat3" ]]; then | ||
| 575 | echo "$repo | $npub | partial match in prod, not in comparison results | investigate git ref mismatch" >> "$manual_inv" | ||
| 576 | else | ||
| 577 | echo "$repo | $npub | no match in prod (git exists but refs don't match) | investigate git data corruption" >> "$manual_inv" | ||
| 578 | fi | ||
| 579 | echo "$key" >> "$tmp_dir/processed.txt" | ||
| 580 | ((count_manual++)) || true | ||
| 581 | done < "$cat_file" | ||
| 582 | done | ||
| 583 | |||
| 584 | # ========================================================================= | ||
| 585 | # STEP 5: Count final results | ||
| 586 | # ========================================================================= | ||
| 587 | count_no_action=$(count_lines "$no_action") | ||
| 588 | count_action=$(count_lines "$action_req") | ||
| 589 | count_manual=$(count_lines "$manual_inv") | ||
| 590 | |||
| 591 | # Ensure counts are valid integers | ||
| 592 | count_no_action=${count_no_action:-0} | ||
| 593 | count_action=${count_action:-0} | ||
| 594 | count_manual=${count_manual:-0} | ||
| 595 | |||
| 596 | local total=$((count_no_action + count_action + count_manual)) | ||
| 597 | |||
| 598 | # Handle division by zero | ||
| 599 | if [[ $total -eq 0 ]]; then | ||
| 600 | total=1 # Avoid division by zero in percentage calculations | ||
| 601 | log_warn "No repos were classified. Check input files." | ||
| 602 | fi | ||
| 603 | |||
| 604 | # ========================================================================= | ||
| 605 | # STEP 6: Generate summary | ||
| 606 | # ========================================================================= | ||
| 607 | log_info "Generating summary..." | ||
| 608 | |||
| 609 | cat > "$summary" << EOF | ||
| 610 | # Migration Classification Summary | ||
| 611 | Generated: $(date -Iseconds) | ||
| 612 | Analysis Directory: $analysis_dir | ||
| 613 | |||
| 614 | ## Overview | ||
| 615 | |||
| 616 | | Category | Count | Percentage | | ||
| 617 | |----------|-------|------------| | ||
| 618 | | No Action Required | $count_no_action | $(awk "BEGIN {printf \"%.1f\", ($count_no_action/$total)*100}")% | | ||
| 619 | | Action Required | $count_action | $(awk "BEGIN {printf \"%.1f\", ($count_action/$total)*100}")% | | ||
| 620 | | Manual Investigation | $count_manual | $(awk "BEGIN {printf \"%.1f\", ($count_manual/$total)*100}")% | | ||
| 621 | | **Total** | **$total** | **100%** | | ||
| 622 | |||
| 623 | ## No Action Required ($count_no_action repos) | ||
| 624 | |||
| 625 | These repositories are ready for migration or don't need migration: | ||
| 626 | |||
| 627 | EOF | ||
| 628 | |||
| 629 | # Breakdown of no-action reasons | ||
| 630 | echo "| Reason | Count |" >> "$summary" | ||
| 631 | echo "|--------|-------|" >> "$summary" | ||
| 632 | grep -v '^#' "$no_action" 2>/dev/null | awk -F' \\| ' '{print $3}' | sort | uniq -c | sort -rn > "$tmp_dir/no_action_breakdown.txt" || true | ||
| 633 | while read -r cnt reason; do | ||
| 634 | echo "| $reason | $cnt |" >> "$summary" | ||
| 635 | done < "$tmp_dir/no_action_breakdown.txt" | ||
| 636 | |||
| 637 | cat >> "$summary" << EOF | ||
| 638 | |||
| 639 | ## Action Required ($count_action repos) | ||
| 640 | |||
| 641 | These repositories need intervention before migration: | ||
| 642 | |||
| 643 | EOF | ||
| 644 | |||
| 645 | # Breakdown of action reasons | ||
| 646 | echo "| Reason | Count | Suggested Action |" >> "$summary" | ||
| 647 | echo "|--------|-------|------------------|" >> "$summary" | ||
| 648 | grep -v '^#' "$action_req" 2>/dev/null | awk -F' \\| ' '{print $3 "|" $4}' | sort | uniq -c | sort -rn > "$tmp_dir/action_breakdown.txt" || true | ||
| 649 | while read -r cnt reason_action; do | ||
| 650 | reason=$(echo "$reason_action" | cut -d'|' -f1) | ||
| 651 | action=$(echo "$reason_action" | cut -d'|' -f2) | ||
| 652 | echo "| $reason | $cnt | $action |" >> "$summary" | ||
| 653 | done < "$tmp_dir/action_breakdown.txt" | ||
| 654 | |||
| 655 | cat >> "$summary" << EOF | ||
| 656 | |||
| 657 | ## Manual Investigation ($count_manual repos) | ||
| 658 | |||
| 659 | These repositories have conflicting or unexpected states requiring human review: | ||
| 660 | |||
| 661 | EOF | ||
| 662 | |||
| 663 | # Breakdown of manual investigation reasons | ||
| 664 | echo "| Reason | Count |" >> "$summary" | ||
| 665 | echo "|--------|-------|" >> "$summary" | ||
| 666 | grep -v '^#' "$manual_inv" 2>/dev/null | awk -F' \\| ' '{print $3}' | sort | uniq -c | sort -rn > "$tmp_dir/manual_breakdown.txt" || true | ||
| 667 | while read -r cnt reason; do | ||
| 668 | echo "| $reason | $cnt |" >> "$summary" | ||
| 669 | done < "$tmp_dir/manual_breakdown.txt" | ||
| 670 | |||
| 671 | # Pre-compute counts from temp files before they might be cleaned up | ||
| 672 | local prod_del_count archive_del_count | ||
| 673 | local prod_cat1_count prod_cat2_count prod_cat3_count prod_cat4_count | ||
| 674 | local archive_cat1_count archive_cat2_count archive_cat3_count archive_cat4_count | ||
| 675 | local parse_fail_count purgatory_count | ||
| 676 | |||
| 677 | prod_del_count=$(wc -l < "$tmp_dir/prod_deletions.txt" 2>/dev/null | tr -d ' ') || prod_del_count=0 | ||
| 678 | archive_del_count=$(wc -l < "$tmp_dir/archive_deletions.txt" 2>/dev/null | tr -d ' ') || archive_del_count=0 | ||
| 679 | prod_cat1_count=$(wc -l < "$tmp_dir/prod_cat1.txt" 2>/dev/null | tr -d ' ') || prod_cat1_count=0 | ||
| 680 | prod_cat2_count=$(wc -l < "$tmp_dir/prod_cat2.txt" 2>/dev/null | tr -d ' ') || prod_cat2_count=0 | ||
| 681 | prod_cat3_count=$(wc -l < "$tmp_dir/prod_cat3.txt" 2>/dev/null | tr -d ' ') || prod_cat3_count=0 | ||
| 682 | prod_cat4_count=$(wc -l < "$tmp_dir/prod_cat4.txt" 2>/dev/null | tr -d ' ') || prod_cat4_count=0 | ||
| 683 | archive_cat1_count=$(wc -l < "$tmp_dir/archive_cat1.txt" 2>/dev/null | tr -d ' ') || archive_cat1_count=0 | ||
| 684 | archive_cat2_count=$(wc -l < "$tmp_dir/archive_cat2.txt" 2>/dev/null | tr -d ' ') || archive_cat2_count=0 | ||
| 685 | archive_cat3_count=$(wc -l < "$tmp_dir/archive_cat3.txt" 2>/dev/null | tr -d ' ') || archive_cat3_count=0 | ||
| 686 | archive_cat4_count=$(wc -l < "$tmp_dir/archive_cat4.txt" 2>/dev/null | tr -d ' ') || archive_cat4_count=0 | ||
| 687 | parse_fail_count=$(wc -l < "$tmp_dir/parse_failures.txt" 2>/dev/null | tr -d ' ') || parse_fail_count=0 | ||
| 688 | purgatory_count=$(wc -l < "$tmp_dir/purgatory_expired.txt" 2>/dev/null | tr -d ' ') || purgatory_count=0 | ||
| 689 | |||
| 690 | cat >> "$summary" << EOF | ||
| 691 | |||
| 692 | ## Input Data Summary | ||
| 693 | |||
| 694 | ### Phase 1 (Events) | ||
| 695 | - Prod deletions: $prod_del_count | ||
| 696 | - Archive deletions: $archive_del_count | ||
| 697 | |||
| 698 | ### Phase 3 (Categories) | ||
| 699 | **Prod:** | ||
| 700 | - Category 1 (complete): $prod_cat1_count | ||
| 701 | - Category 2 (empty): $prod_cat2_count | ||
| 702 | - Category 3 (partial): $prod_cat3_count | ||
| 703 | - Category 4 (no match): $prod_cat4_count | ||
| 704 | |||
| 705 | **Archive:** | ||
| 706 | - Category 1 (complete): $archive_cat1_count | ||
| 707 | - Category 2 (empty): $archive_cat2_count | ||
| 708 | - Category 3 (partial): $archive_cat3_count | ||
| 709 | - Category 4 (no match): $archive_cat4_count | ||
| 710 | |||
| 711 | ### Phase 4 (Logs) | ||
| 712 | - Parse failures: $parse_fail_count | ||
| 713 | - Purgatory expired: $purgatory_count | ||
| 714 | |||
| 715 | ## Recommended Next Steps | ||
| 716 | |||
| 717 | 1. **Review action-required.txt** - Address these repos before migration | ||
| 718 | 2. **Review manual-investigation.txt** - Investigate unusual states | ||
| 719 | 3. **Verify no-action-required.txt** - Spot-check a few repos to confirm | ||
| 720 | 4. **Plan migration window** - Schedule cutover when action items are resolved | ||
| 721 | |||
| 722 | ## Output Files | ||
| 723 | |||
| 724 | - \`results/no-action-required.txt\` - $count_no_action repos ready for migration | ||
| 725 | - \`results/action-required.txt\` - $count_action repos needing intervention | ||
| 726 | - \`results/manual-investigation.txt\` - $count_manual repos needing human review | ||
| 727 | - \`results/summary.txt\` - This summary file | ||
| 728 | EOF | ||
| 729 | |||
| 730 | # ========================================================================= | ||
| 731 | # STEP 7: Display results | ||
| 732 | # ========================================================================= | ||
| 733 | echo "" | ||
| 734 | log_info "=== Classification Complete ===" | ||
| 735 | echo "" | ||
| 736 | log_success "No Action Required: $count_no_action repos" | ||
| 737 | log_warn "Action Required: $count_action repos" | ||
| 738 | log_error "Manual Investigation: $count_manual repos" | ||
| 739 | echo "" | ||
| 740 | log_info "Total: $total repos classified" | ||
| 741 | echo "" | ||
| 742 | log_info "Output files:" | ||
| 743 | echo " $no_action" | ||
| 744 | echo " $action_req" | ||
| 745 | echo " $manual_inv" | ||
| 746 | echo " $summary" | ||
| 747 | echo "" | ||
| 748 | |||
| 749 | # Show top action items | ||
| 750 | if [[ $count_action -gt 0 ]]; then | ||
| 751 | log_info "Top action items:" | ||
| 752 | grep -v '^#' "$action_req" 2>/dev/null | awk -F' \\| ' '{print $3}' | sort | uniq -c | sort -rn | head -5 | while read -r cnt reason; do | ||
| 753 | echo " - $reason: $cnt repos" | ||
| 754 | done | ||
| 755 | echo "" | ||
| 756 | fi | ||
| 757 | |||
| 758 | # Show top investigation items | ||
| 759 | if [[ $count_manual -gt 0 ]]; then | ||
| 760 | log_info "Top investigation items:" | ||
| 761 | grep -v '^#' "$manual_inv" 2>/dev/null | awk -F' \\| ' '{print $3}' | sort | uniq -c | sort -rn | head -5 | while read -r cnt reason; do | ||
| 762 | echo " - $reason: $cnt repos" | ||
| 763 | done | ||
| 764 | echo "" | ||
| 765 | fi | ||
| 766 | |||
| 767 | log_info "See $summary for full details and recommended next steps." | ||
| 768 | } | ||
| 769 | |||
| 770 | main "$@" | ||