diff options
| author | DanConwayDev <DanConwayDev@protonmail.com> | 2026-02-03 14:50:22 +0000 |
|---|---|---|
| committer | DanConwayDev <DanConwayDev@protonmail.com> | 2026-02-03 15:18:23 +0000 |
| commit | 874a8abe1d076cfafd9baf919ec23d7d58200698 (patch) | |
| tree | dce0d0d36bddc496ff32f8555a8790d8dc7be7e4 /docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh | |
| parent | 9fd4350c57bbe986ebf65bf3ea4c996572e81884 (diff) | |
| parent | 92a9a3bfe0bc522e8ae411991a366a3a6310d525 (diff) | |
Merge relay.ngit.dev migration: bug fixes and migration tooling
This merge includes critical bug fixes and comprehensive migration tooling
developed during the relay.ngit.dev migration effort.
Bug Fixes:
- Fix git protocol error handling to return HTTP 200 with ERR pkt-line
- Fix naughty list false positives and DNS failure identification
- Fix database query filters in load_existing_events (remove .since())
- Fix OID fetch tracking to distinguish 0 OIDs from successful fetches
- Fix purgatory event source tracking for filtered expiry logging
- Implement OID retry logic for 'not our ref' errors
Migration Tools & Documentation:
- Complete 5-phase migration analysis pipeline with orchestration script
- Phase 1: Event fetching from source relay
- Phase 2: Git sync verification
- Phase 3: Categorization and relay comparison
- Phase 4: Log extraction (parse failures, purgatory expiry)
- Phase 5: Action classification for migration decisions
- Comprehensive migration guide with lessons learned
- Troubleshooting guide for permission and corruption issues
Configuration:
- Add NGIT_LOG_LEVEL configuration option
- Update git throttle limits to 60/minute
- Improve logging throughout for better observability
Diffstat (limited to 'docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh')
| -rwxr-xr-x | docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh | 564 |
1 files changed, 564 insertions, 0 deletions
diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh new file mode 100755 index 0000000..b4536cb --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/10-check-git-sync.sh | |||
| @@ -0,0 +1,564 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | # | ||
| 3 | # 10-check-git-sync.sh - Compare state events to actual git data on disk | ||
| 4 | # | ||
| 5 | # PHASE 2 of the GRASP relay to ngit-grasp migration analysis pipeline. | ||
| 6 | # Compares kind 30618 state events against actual git refs on disk. | ||
| 7 | # | ||
| 8 | # USAGE: | ||
| 9 | # ./10-check-git-sync.sh <state-events.json> <git-base-dir> <output-dir> [--categorize] | ||
| 10 | # | ||
| 11 | # EXAMPLES: | ||
| 12 | # # Check source relay against source git data | ||
| 13 | # ./10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod | ||
| 14 | # | ||
| 15 | # # Check target relay against target git data | ||
| 16 | # ./10-check-git-sync.sh output/archive/raw/state-events.json /var/lib/ngit-grasp/git output/archive | ||
| 17 | # | ||
| 18 | # # Check and categorize in one step (convenience mode) | ||
| 19 | # ./10-check-git-sync.sh output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod --categorize | ||
| 20 | # | ||
| 21 | # INPUT: | ||
| 22 | # state-events.json - JSONL file from Phase 1 (01-fetch-events.sh) | ||
| 23 | # One kind 30618 event per line | ||
| 24 | # git-base-dir - Base directory containing git repos | ||
| 25 | # Structure: <git-base>/<npub>/<repo>.git/ | ||
| 26 | # | ||
| 27 | # OUTPUT: | ||
| 28 | # <output-dir>/git-sync-status.tsv - Tab-separated values: | ||
| 29 | # repo<TAB>npub<TAB>state_refs<TAB>git_refs<TAB>matches<TAB>reason | ||
| 30 | # | ||
| 31 | # With --categorize flag, also outputs: | ||
| 32 | # <output-dir>/category1-complete-match.txt | ||
| 33 | # <output-dir>/category2-empty-blank.txt | ||
| 34 | # <output-dir>/category3-partial-match.txt | ||
| 35 | # <output-dir>/category4-no-match.txt | ||
| 36 | # | ||
| 37 | # CATEGORIES: | ||
| 38 | # 1. Complete Match - All refs in state event match git data perfectly | ||
| 39 | # 2. Empty/Blank - No git data available (directory missing or empty) | ||
| 40 | # 3. Partial Match - Some refs match, some don't | ||
| 41 | # 4. No Match - Git data exists but commit hashes don't match | ||
| 42 | # | ||
| 43 | # PREREQUISITES: | ||
| 44 | # - nak (for npub encoding) - https://github.com/fiatjaf/nak | ||
| 45 | # - jq (for JSON parsing) | ||
| 46 | # - Read access to git directories (may need sudo) | ||
| 47 | # | ||
| 48 | # RUNTIME: ~20 minutes on VPS (git operations are slow) | ||
| 49 | # | ||
| 50 | # NOTES: | ||
| 51 | # - Must run on VPS with access to git directories | ||
| 52 | # - Progress indicator updates every 10 events | ||
| 53 | # - Handles packed refs (git show-ref) and loose refs | ||
| 54 | # | ||
| 55 | # SEE ALSO: | ||
| 56 | # docs/how-to/migrate-to-ngit-grasp.md - Full migration guide | ||
| 57 | # 01-fetch-events.sh - Phase 1 script that produces input for this script | ||
| 58 | # 20-categorize.sh - Phase 3a script that consumes output from this script | ||
| 59 | # | ||
| 60 | |||
| 61 | set -euo pipefail | ||
| 62 | |||
| 63 | # Colors for output (disabled if not a terminal) | ||
| 64 | if [[ -t 1 ]]; then | ||
| 65 | RED='\033[0;31m' | ||
| 66 | GREEN='\033[0;32m' | ||
| 67 | YELLOW='\033[0;33m' | ||
| 68 | BLUE='\033[0;34m' | ||
| 69 | NC='\033[0m' | ||
| 70 | else | ||
| 71 | RED='' | ||
| 72 | GREEN='' | ||
| 73 | YELLOW='' | ||
| 74 | BLUE='' | ||
| 75 | NC='' | ||
| 76 | fi | ||
| 77 | |||
| 78 | log_info() { | ||
| 79 | echo -e "${BLUE}[INFO]${NC} $*" >&2 | ||
| 80 | } | ||
| 81 | |||
| 82 | log_success() { | ||
| 83 | echo -e "${GREEN}[OK]${NC} $*" >&2 | ||
| 84 | } | ||
| 85 | |||
| 86 | log_warn() { | ||
| 87 | echo -e "${YELLOW}[WARN]${NC} $*" >&2 | ||
| 88 | } | ||
| 89 | |||
| 90 | log_error() { | ||
| 91 | echo -e "${RED}[ERROR]${NC} $*" >&2 | ||
| 92 | } | ||
| 93 | |||
| 94 | log_progress() { | ||
| 95 | # Overwrite current line for progress updates | ||
| 96 | echo -ne "\r${BLUE}[PROGRESS]${NC} $*" >&2 | ||
| 97 | } | ||
| 98 | |||
| 99 | usage() { | ||
| 100 | echo "Usage: $0 <state-events.json> <git-base-dir> <output-dir> [--categorize]" | ||
| 101 | echo "" | ||
| 102 | echo "Arguments:" | ||
| 103 | echo " state-events.json JSONL file from Phase 1 (kind 30618 events)" | ||
| 104 | echo " git-base-dir Base directory for git repos (e.g., /var/lib/grasp-relay/git)" | ||
| 105 | echo " output-dir Directory to store output files" | ||
| 106 | echo " --categorize Optional: also output category files (like Phase 3)" | ||
| 107 | echo "" | ||
| 108 | echo "Examples:" | ||
| 109 | echo " $0 output/prod/raw/state-events.json /var/lib/grasp-relay/git output/prod" | ||
| 110 | echo " $0 output/archive/raw/state-events.json /var/lib/ngit-grasp/git output/archive" | ||
| 111 | echo "" | ||
| 112 | echo "Output:" | ||
| 113 | echo " git-sync-status.tsv - TSV with: repo, npub, state_refs, git_refs, matches, reason" | ||
| 114 | exit 1 | ||
| 115 | } | ||
| 116 | |||
| 117 | # Check prerequisites | ||
| 118 | check_prerequisites() { | ||
| 119 | local missing=0 | ||
| 120 | |||
| 121 | if ! command -v git &> /dev/null; then | ||
| 122 | log_error "git not found. Install with your package manager." | ||
| 123 | missing=1 | ||
| 124 | fi | ||
| 125 | |||
| 126 | if ! command -v nak &> /dev/null; then | ||
| 127 | log_error "nak not found. Install from: https://github.com/fiatjaf/nak" | ||
| 128 | log_error "Or run: nix-shell -p nak jq --run \"$0 $*\"" | ||
| 129 | missing=1 | ||
| 130 | fi | ||
| 131 | |||
| 132 | if ! command -v jq &> /dev/null; then | ||
| 133 | log_error "jq not found. Install with your package manager." | ||
| 134 | missing=1 | ||
| 135 | fi | ||
| 136 | |||
| 137 | if [[ $missing -eq 1 ]]; then | ||
| 138 | exit 1 | ||
| 139 | fi | ||
| 140 | } | ||
| 141 | |||
| 142 | # Convert hex pubkey to npub | ||
| 143 | # Args: $1=hex_pubkey | ||
| 144 | # Returns: npub string or empty on error | ||
| 145 | hex_to_npub() { | ||
| 146 | local hex="$1" | ||
| 147 | nak encode npub "$hex" 2>/dev/null || echo "" | ||
| 148 | } | ||
| 149 | |||
| 150 | # Count refs in state event (only refs/heads/) | ||
| 151 | # Args: $1=event_json | ||
| 152 | # Returns: count | ||
| 153 | count_state_refs() { | ||
| 154 | local event="$1" | ||
| 155 | echo "$event" | jq '[.tags[] | select(.[0] | startswith("refs/heads/"))] | length' 2>/dev/null || echo "0" | ||
| 156 | } | ||
| 157 | |||
| 158 | # Get git refs from disk | ||
| 159 | # Args: $1=git_dir | ||
| 160 | # Returns: count of refs/heads/ refs | ||
| 161 | count_git_refs() { | ||
| 162 | local git_dir="$1" | ||
| 163 | |||
| 164 | if [[ ! -d "$git_dir" ]]; then | ||
| 165 | echo "0" | ||
| 166 | return | ||
| 167 | fi | ||
| 168 | |||
| 169 | # Try git show-ref first (handles packed refs correctly) | ||
| 170 | # Note: We capture output separately to avoid pipefail issues | ||
| 171 | local count | ||
| 172 | if count=$(git --git-dir="$git_dir" show-ref --heads 2>/dev/null | wc -l); then | ||
| 173 | echo "$count" | tr -d ' ' | ||
| 174 | return | ||
| 175 | fi | ||
| 176 | |||
| 177 | # Fallback: count loose refs (when git is not available or fails) | ||
| 178 | if [[ -d "$git_dir/refs/heads" ]]; then | ||
| 179 | find "$git_dir/refs/heads" -type f 2>/dev/null | wc -l | tr -d ' ' | ||
| 180 | else | ||
| 181 | echo "0" | ||
| 182 | fi | ||
| 183 | } | ||
| 184 | |||
| 185 | # Get ref hash from git directory | ||
| 186 | # Args: $1=git_dir, $2=ref_path (e.g., refs/heads/main) | ||
| 187 | # Returns: commit hash or empty | ||
| 188 | get_git_ref_hash() { | ||
| 189 | local git_dir="$1" | ||
| 190 | local ref_path="$2" | ||
| 191 | |||
| 192 | # Try git show-ref first (handles packed refs) | ||
| 193 | local hash | ||
| 194 | hash=$(git --git-dir="$git_dir" show-ref --hash "$ref_path" 2>/dev/null | head -1 || echo "") | ||
| 195 | |||
| 196 | if [[ -n "$hash" ]]; then | ||
| 197 | echo "$hash" | ||
| 198 | return | ||
| 199 | fi | ||
| 200 | |||
| 201 | # Fallback: read loose ref file | ||
| 202 | local ref_file="$git_dir/$ref_path" | ||
| 203 | if [[ -f "$ref_file" ]]; then | ||
| 204 | cat "$ref_file" 2>/dev/null | tr -d '\n' || echo "" | ||
| 205 | else | ||
| 206 | echo "" | ||
| 207 | fi | ||
| 208 | } | ||
| 209 | |||
| 210 | # Compare state event refs to git refs | ||
| 211 | # Args: $1=event_json, $2=git_dir | ||
| 212 | # Returns: count of matching refs | ||
| 213 | count_matching_refs() { | ||
| 214 | local event="$1" | ||
| 215 | local git_dir="$2" | ||
| 216 | local matching=0 | ||
| 217 | |||
| 218 | # Extract refs/heads/ tags and compare | ||
| 219 | while IFS= read -r ref_tag; do | ||
| 220 | [[ -z "$ref_tag" ]] && continue | ||
| 221 | |||
| 222 | local ref_path expected_hash | ||
| 223 | ref_path=$(echo "$ref_tag" | jq -r '.[0]' 2>/dev/null || echo "") | ||
| 224 | expected_hash=$(echo "$ref_tag" | jq -r '.[1]' 2>/dev/null || echo "") | ||
| 225 | |||
| 226 | # Skip if not a heads ref or hash is missing | ||
| 227 | [[ ! "$ref_path" =~ ^refs/heads/ ]] && continue | ||
| 228 | [[ -z "$expected_hash" || "$expected_hash" == "null" ]] && continue | ||
| 229 | |||
| 230 | # Get actual hash from git | ||
| 231 | local actual_hash | ||
| 232 | actual_hash=$(get_git_ref_hash "$git_dir" "$ref_path") | ||
| 233 | |||
| 234 | if [[ "$expected_hash" == "$actual_hash" ]]; then | ||
| 235 | matching=$((matching + 1)) | ||
| 236 | fi | ||
| 237 | done < <(echo "$event" | jq -c '.tags[] | select(.[0] | startswith("refs/heads/"))' 2>/dev/null) | ||
| 238 | |||
| 239 | echo "$matching" | ||
| 240 | } | ||
| 241 | |||
| 242 | # Categorize a single entry | ||
| 243 | # Args: $1=state_refs, $2=git_refs, $3=matches, $4=reason | ||
| 244 | # Returns: category number (1-4) | ||
| 245 | categorize_entry() { | ||
| 246 | local state_refs="$1" | ||
| 247 | local git_refs="$2" | ||
| 248 | local matches="$3" | ||
| 249 | local reason="$4" | ||
| 250 | |||
| 251 | # Category 2: Empty/Blank | ||
| 252 | if [[ -n "$reason" ]] || [[ "$git_refs" -eq 0 ]]; then | ||
| 253 | echo "2" | ||
| 254 | return | ||
| 255 | fi | ||
| 256 | |||
| 257 | # Category 1: Complete Match | ||
| 258 | if [[ "$state_refs" -gt 0 ]] && [[ "$state_refs" -eq "$git_refs" ]] && [[ "$matches" -eq "$state_refs" ]]; then | ||
| 259 | echo "1" | ||
| 260 | return | ||
| 261 | fi | ||
| 262 | |||
| 263 | # Category 4: No Match | ||
| 264 | if [[ "$git_refs" -gt 0 ]] && [[ "$matches" -eq 0 ]]; then | ||
| 265 | echo "4" | ||
| 266 | return | ||
| 267 | fi | ||
| 268 | |||
| 269 | # Category 3: Partial Match (default for anything else with matches > 0) | ||
| 270 | if [[ "$matches" -gt 0 ]]; then | ||
| 271 | echo "3" | ||
| 272 | return | ||
| 273 | fi | ||
| 274 | |||
| 275 | # Fallback to category 2 | ||
| 276 | echo "2" | ||
| 277 | } | ||
| 278 | |||
| 279 | # Format entry for category file | ||
| 280 | # Args: $1=repo, $2=npub, $3=state_refs, $4=git_refs, $5=matches, $6=reason | ||
| 281 | format_category_line() { | ||
| 282 | local repo="$1" | ||
| 283 | local npub="$2" | ||
| 284 | local state_refs="$3" | ||
| 285 | local git_refs="$4" | ||
| 286 | local matches="$5" | ||
| 287 | local reason="$6" | ||
| 288 | |||
| 289 | if [[ -n "$reason" ]]; then | ||
| 290 | echo "$repo | $npub | state_refs=$state_refs | git_refs=$git_refs | matches=$matches | reason=$reason" | ||
| 291 | else | ||
| 292 | echo "$repo | $npub | state_refs=$state_refs | git_refs=$git_refs | matches=$matches" | ||
| 293 | fi | ||
| 294 | } | ||
| 295 | |||
| 296 | # Process a single state event | ||
| 297 | # Args: $1=event_json, $2=git_base | ||
| 298 | # Outputs: TSV line to stdout | ||
| 299 | process_event() { | ||
| 300 | local event="$1" | ||
| 301 | local git_base="$2" | ||
| 302 | |||
| 303 | # Extract repository identifier (d tag) | ||
| 304 | local identifier | ||
| 305 | identifier=$(echo "$event" | jq -r '.tags[] | select(.[0] == "d") | .[1]' 2>/dev/null | head -1 || echo "") | ||
| 306 | |||
| 307 | if [[ -z "$identifier" ]]; then | ||
| 308 | return 1 | ||
| 309 | fi | ||
| 310 | |||
| 311 | # Extract maintainer pubkey (hex) | ||
| 312 | local hex_pubkey | ||
| 313 | hex_pubkey=$(echo "$event" | jq -r '.pubkey' 2>/dev/null || echo "") | ||
| 314 | |||
| 315 | if [[ -z "$hex_pubkey" ]]; then | ||
| 316 | return 1 | ||
| 317 | fi | ||
| 318 | |||
| 319 | # Convert to npub | ||
| 320 | local npub | ||
| 321 | npub=$(hex_to_npub "$hex_pubkey") | ||
| 322 | |||
| 323 | if [[ -z "$npub" ]]; then | ||
| 324 | return 1 | ||
| 325 | fi | ||
| 326 | |||
| 327 | # Count state refs | ||
| 328 | local state_refs | ||
| 329 | state_refs=$(count_state_refs "$event") | ||
| 330 | |||
| 331 | # Find git directory | ||
| 332 | local git_dir="$git_base/${npub}/${identifier}.git" | ||
| 333 | |||
| 334 | # Check git directory status | ||
| 335 | local git_refs=0 | ||
| 336 | local matches=0 | ||
| 337 | local reason="" | ||
| 338 | |||
| 339 | if [[ ! -d "$git_dir" ]]; then | ||
| 340 | reason="no_git_dir" | ||
| 341 | elif [[ ! -d "$git_dir/refs/heads" ]] && [[ ! -f "$git_dir/packed-refs" ]]; then | ||
| 342 | reason="empty_refs" | ||
| 343 | else | ||
| 344 | git_refs=$(count_git_refs "$git_dir") | ||
| 345 | |||
| 346 | if [[ "$git_refs" -eq 0 ]]; then | ||
| 347 | reason="empty_refs" | ||
| 348 | elif [[ "$state_refs" -eq 0 ]]; then | ||
| 349 | reason="no_state_refs" | ||
| 350 | else | ||
| 351 | matches=$(count_matching_refs "$event" "$git_dir") | ||
| 352 | fi | ||
| 353 | fi | ||
| 354 | |||
| 355 | # Output TSV line: repo, npub, state_refs, git_refs, matches, reason | ||
| 356 | printf '%s\t%s\t%s\t%s\t%s\t%s\n' "$identifier" "$npub" "$state_refs" "$git_refs" "$matches" "$reason" | ||
| 357 | } | ||
| 358 | |||
| 359 | # Main | ||
| 360 | main() { | ||
| 361 | local do_categorize=0 | ||
| 362 | local args=() | ||
| 363 | |||
| 364 | # Parse arguments | ||
| 365 | for arg in "$@"; do | ||
| 366 | if [[ "$arg" == "--categorize" ]]; then | ||
| 367 | do_categorize=1 | ||
| 368 | else | ||
| 369 | args+=("$arg") | ||
| 370 | fi | ||
| 371 | done | ||
| 372 | |||
| 373 | if [[ ${#args[@]} -ne 3 ]]; then | ||
| 374 | usage | ||
| 375 | fi | ||
| 376 | |||
| 377 | local state_events_file="${args[0]}" | ||
| 378 | local git_base="${args[1]}" | ||
| 379 | local output_dir="${args[2]}" | ||
| 380 | |||
| 381 | # Validate inputs | ||
| 382 | if [[ ! -f "$state_events_file" ]]; then | ||
| 383 | log_error "State events file not found: $state_events_file" | ||
| 384 | exit 1 | ||
| 385 | fi | ||
| 386 | |||
| 387 | if [[ ! -d "$git_base" ]]; then | ||
| 388 | log_error "Git base directory not found: $git_base" | ||
| 389 | log_error "This script must run on the VPS with access to git directories." | ||
| 390 | exit 1 | ||
| 391 | fi | ||
| 392 | |||
| 393 | # Check read permissions | ||
| 394 | if ! ls "$git_base" >/dev/null 2>&1; then | ||
| 395 | log_error "Cannot read git base directory (permission denied): $git_base" | ||
| 396 | log_error "Try running with sudo or grant read permissions." | ||
| 397 | exit 1 | ||
| 398 | fi | ||
| 399 | |||
| 400 | check_prerequisites | ||
| 401 | |||
| 402 | log_info "=== Git State Synchronization Check ===" | ||
| 403 | log_info "State events: $state_events_file" | ||
| 404 | log_info "Git base: $git_base" | ||
| 405 | log_info "Output: $output_dir" | ||
| 406 | if [[ $do_categorize -eq 1 ]]; then | ||
| 407 | log_info "Mode: TSV + categorization" | ||
| 408 | else | ||
| 409 | log_info "Mode: TSV only (use 20-categorize.sh for categories)" | ||
| 410 | fi | ||
| 411 | log_info "Started: $(date)" | ||
| 412 | echo "" | ||
| 413 | |||
| 414 | # Create output directory | ||
| 415 | mkdir -p "$output_dir" | ||
| 416 | |||
| 417 | # Output files | ||
| 418 | local tsv_file="$output_dir/git-sync-status.tsv" | ||
| 419 | |||
| 420 | # Initialize TSV with header | ||
| 421 | echo -e "repo\tnpub\tstate_refs\tgit_refs\tmatches\treason" > "$tsv_file" | ||
| 422 | |||
| 423 | # Initialize category files if categorizing | ||
| 424 | local cat1="" cat2="" cat3="" cat4="" | ||
| 425 | if [[ $do_categorize -eq 1 ]]; then | ||
| 426 | cat1="$output_dir/category1-complete-match.txt" | ||
| 427 | cat2="$output_dir/category2-empty-blank.txt" | ||
| 428 | cat3="$output_dir/category3-partial-match.txt" | ||
| 429 | cat4="$output_dir/category4-no-match.txt" | ||
| 430 | > "$cat1" | ||
| 431 | > "$cat2" | ||
| 432 | > "$cat3" | ||
| 433 | > "$cat4" | ||
| 434 | fi | ||
| 435 | |||
| 436 | # Count total events | ||
| 437 | local total_events | ||
| 438 | total_events=$(wc -l < "$state_events_file" | tr -d ' ') | ||
| 439 | log_info "Processing $total_events state events..." | ||
| 440 | echo "" | ||
| 441 | |||
| 442 | # Process each event | ||
| 443 | local count=0 | ||
| 444 | local processed=0 | ||
| 445 | local skipped=0 | ||
| 446 | local count_cat1=0 count_cat2=0 count_cat3=0 count_cat4=0 | ||
| 447 | local start_time | ||
| 448 | start_time=$(date +%s) | ||
| 449 | |||
| 450 | while IFS= read -r event; do | ||
| 451 | count=$((count + 1)) | ||
| 452 | |||
| 453 | # Skip empty lines | ||
| 454 | [[ -z "$event" ]] && continue | ||
| 455 | |||
| 456 | # Process event | ||
| 457 | local result | ||
| 458 | if result=$(process_event "$event" "$git_base"); then | ||
| 459 | processed=$((processed + 1)) | ||
| 460 | |||
| 461 | # Write to TSV (skip header line) | ||
| 462 | echo "$result" >> "$tsv_file" | ||
| 463 | |||
| 464 | # Categorize if requested | ||
| 465 | if [[ $do_categorize -eq 1 ]]; then | ||
| 466 | # Parse result | ||
| 467 | IFS=$'\t' read -r repo npub state_refs git_refs matches reason <<< "$result" | ||
| 468 | |||
| 469 | local category | ||
| 470 | category=$(categorize_entry "$state_refs" "$git_refs" "$matches" "$reason") | ||
| 471 | |||
| 472 | local cat_line | ||
| 473 | cat_line=$(format_category_line "$repo" "$npub" "$state_refs" "$git_refs" "$matches" "$reason") | ||
| 474 | |||
| 475 | case "$category" in | ||
| 476 | 1) echo "$cat_line" >> "$cat1"; count_cat1=$((count_cat1 + 1)) ;; | ||
| 477 | 2) echo "$cat_line" >> "$cat2"; count_cat2=$((count_cat2 + 1)) ;; | ||
| 478 | 3) echo "$cat_line" >> "$cat3"; count_cat3=$((count_cat3 + 1)) ;; | ||
| 479 | 4) echo "$cat_line" >> "$cat4"; count_cat4=$((count_cat4 + 1)) ;; | ||
| 480 | esac | ||
| 481 | fi | ||
| 482 | else | ||
| 483 | skipped=$((skipped + 1)) | ||
| 484 | fi | ||
| 485 | |||
| 486 | # Progress indicator every 10 events | ||
| 487 | if [[ $((count % 10)) -eq 0 ]]; then | ||
| 488 | local elapsed=$(($(date +%s) - start_time)) | ||
| 489 | local rate=0 | ||
| 490 | if [[ $elapsed -gt 0 ]]; then | ||
| 491 | rate=$((count / elapsed)) | ||
| 492 | fi | ||
| 493 | local eta="?" | ||
| 494 | if [[ $rate -gt 0 ]]; then | ||
| 495 | eta=$(( (total_events - count) / rate )) | ||
| 496 | fi | ||
| 497 | log_progress "Processed $count/$total_events events (~${rate}/s, ETA: ${eta}s)..." | ||
| 498 | fi | ||
| 499 | done < "$state_events_file" | ||
| 500 | |||
| 501 | # Clear progress line | ||
| 502 | echo "" >&2 | ||
| 503 | |||
| 504 | local end_time | ||
| 505 | end_time=$(date +%s) | ||
| 506 | local duration=$((end_time - start_time)) | ||
| 507 | |||
| 508 | # Summary | ||
| 509 | echo "" | ||
| 510 | log_info "=== Analysis Complete ===" | ||
| 511 | log_info "Finished: $(date)" | ||
| 512 | log_info "Duration: ${duration}s" | ||
| 513 | log_info "Processed: $processed events" | ||
| 514 | if [[ $skipped -gt 0 ]]; then | ||
| 515 | log_warn "Skipped: $skipped events (missing identifier or pubkey)" | ||
| 516 | fi | ||
| 517 | echo "" | ||
| 518 | |||
| 519 | if [[ $do_categorize -eq 1 ]]; then | ||
| 520 | # Calculate percentages | ||
| 521 | local total=$((count_cat1 + count_cat2 + count_cat3 + count_cat4)) | ||
| 522 | local pct1=0 pct2=0 pct3=0 pct4=0 | ||
| 523 | if [[ $total -gt 0 ]]; then | ||
| 524 | pct1=$(awk "BEGIN {printf \"%.1f\", ($count_cat1/$total)*100}") | ||
| 525 | pct2=$(awk "BEGIN {printf \"%.1f\", ($count_cat2/$total)*100}") | ||
| 526 | pct3=$(awk "BEGIN {printf \"%.1f\", ($count_cat3/$total)*100}") | ||
| 527 | pct4=$(awk "BEGIN {printf \"%.1f\", ($count_cat4/$total)*100}") | ||
| 528 | fi | ||
| 529 | |||
| 530 | log_info "=== Category Summary ===" | ||
| 531 | log_success "Category 1 (Complete Match): $count_cat1 ($pct1%)" | ||
| 532 | log_warn "Category 2 (Empty/Blank): $count_cat2 ($pct2%)" | ||
| 533 | log_warn "Category 3 (Partial Match): $count_cat3 ($pct3%)" | ||
| 534 | log_error "Category 4 (No Match): $count_cat4 ($pct4%)" | ||
| 535 | echo "" | ||
| 536 | |||
| 537 | # Validation warning | ||
| 538 | if [[ $count_cat2 -eq $total ]] && [[ $total -gt 0 ]]; then | ||
| 539 | log_error "WARNING: 100% of repos categorized as Empty/Blank" | ||
| 540 | log_error "This usually indicates a permission or path issue." | ||
| 541 | echo "" | ||
| 542 | log_info "Troubleshooting:" | ||
| 543 | echo " 1. Verify git data exists: sudo ls -la $git_base | head -10" | ||
| 544 | echo " 2. Check sample repo: sudo find $git_base -name '*.git' -type d | head -1" | ||
| 545 | echo " 3. Re-run with sudo if not already using it" | ||
| 546 | echo "" | ||
| 547 | fi | ||
| 548 | fi | ||
| 549 | |||
| 550 | log_info "Output files:" | ||
| 551 | echo " $tsv_file" | ||
| 552 | if [[ $do_categorize -eq 1 ]]; then | ||
| 553 | echo " $cat1" | ||
| 554 | echo " $cat2" | ||
| 555 | echo " $cat3" | ||
| 556 | echo " $cat4" | ||
| 557 | else | ||
| 558 | echo "" | ||
| 559 | log_info "Next step: Run 20-categorize.sh to categorize results" | ||
| 560 | echo " ./20-categorize.sh $tsv_file $output_dir" | ||
| 561 | fi | ||
| 562 | } | ||
| 563 | |||
| 564 | main "$@" | ||