diff options
Diffstat (limited to 'docs/how-to/migration-scripts')
| -rwxr-xr-x | docs/how-to/migration-scripts/30-extract-parse-failures.sh | 289 | ||||
| -rwxr-xr-x | docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh | 15 |
2 files changed, 243 insertions, 61 deletions
diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh index d4f0ff2..114a44d 100755 --- a/docs/how-to/migration-scripts/30-extract-parse-failures.sh +++ b/docs/how-to/migration-scripts/30-extract-parse-failures.sh | |||
| @@ -3,7 +3,8 @@ | |||
| 3 | # 30-extract-parse-failures.sh - Extract parse failure events from systemd logs | 3 | # 30-extract-parse-failures.sh - Extract parse failure events from systemd logs |
| 4 | # | 4 | # |
| 5 | # PHASE 4a of the GRASP relay to ngit-grasp migration analysis pipeline. | 5 | # PHASE 4a of the GRASP relay to ngit-grasp migration analysis pipeline. |
| 6 | # Extracts structured [PARSE_FAIL] log entries from journalctl. | 6 | # Extracts structured [PARSE_FAIL] log entries AND "Invalid announcement" |
| 7 | # rejections from journalctl. | ||
| 7 | # | 8 | # |
| 8 | # USAGE: | 9 | # USAGE: |
| 9 | # ./30-extract-parse-failures.sh <service-name> <output-dir> [options] | 10 | # ./30-extract-parse-failures.sh <service-name> <output-dir> [options] |
| @@ -27,24 +28,34 @@ | |||
| 27 | # <output-dir>/parse-failures.txt | 28 | # <output-dir>/parse-failures.txt |
| 28 | # | 29 | # |
| 29 | # OUTPUT FORMAT (TSV): | 30 | # OUTPUT FORMAT (TSV): |
| 30 | # repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason | 31 | # event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub |
| 31 | # | 32 | # |
| 32 | # EXPECTED LOG FORMAT: | 33 | # EXPECTED LOG FORMATS: |
| 33 | # The script looks for structured log entries in this format: | 34 | # The script looks for two types of log entries: |
| 34 | # | 35 | # |
| 35 | # 2026-01-22T10:30:45Z ngit-grasp[1234]: [PARSE_FAIL] kind=30618 event_id=abc123... reason="invalid refs format" repo=myrepo npub=npub1... | 36 | # 1. Structured [PARSE_FAIL] entries: |
| 37 | # 2026-01-22T10:30:45Z ngit-grasp[1234]: [PARSE_FAIL] kind=30618 event_id=abc123... reason="invalid refs format" repo=myrepo npub=npub1... | ||
| 38 | # | ||
| 39 | # 2. "Invalid announcement" rejections (write policy): | ||
| 40 | # Event rejected by write policy event_id=abc123... relay=wss://... kind=30617 reason=Invalid announcement: multiple clone tags found... | ||
| 41 | # | ||
| 42 | # 3. "Rejected repository announcement" (builder): | ||
| 43 | # Rejected repository announcement note1xxx: Invalid announcement: multiple clone tags found... | ||
| 36 | # | 44 | # |
| 37 | # Required fields: kind, event_id, reason | 45 | # Required fields: kind, event_id, reason |
| 38 | # Optional fields: repo, npub (may not be available if parsing failed early) | 46 | # Optional fields: repo, npub (may not be available for all entry types) |
| 39 | # | 47 | # |
| 40 | # DEPENDENCY: | 48 | # DEPENDENCY: |
| 41 | # This script requires logging improvements in ngit-grasp to emit structured | 49 | # This script requires logging improvements in ngit-grasp to emit structured |
| 42 | # [PARSE_FAIL] log entries. Until those are implemented, this script will | 50 | # [PARSE_FAIL] log entries. Until those are implemented, this script will |
| 43 | # find no matching entries (which is handled gracefully). | 51 | # find no matching entries (which is handled gracefully). |
| 44 | # | 52 | # |
| 53 | # "Invalid announcement" rejections are logged by the write policy and | ||
| 54 | # should be present in any ngit-grasp deployment. | ||
| 55 | # | ||
| 45 | # See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section) | 56 | # See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section) |
| 46 | # | 57 | # |
| 47 | # Expected Rust logging code: | 58 | # Expected Rust logging code for [PARSE_FAIL]: |
| 48 | # tracing::warn!( | 59 | # tracing::warn!( |
| 49 | # target: "migration", | 60 | # target: "migration", |
| 50 | # "[PARSE_FAIL] kind={} event_id={} reason=\"{}\" repo={} npub={}", | 61 | # "[PARSE_FAIL] kind={} event_id={} reason=\"{}\" repo={} npub={}", |
| @@ -53,7 +64,7 @@ | |||
| 53 | # | 64 | # |
| 54 | # PREREQUISITES: | 65 | # PREREQUISITES: |
| 55 | # - journalctl (systemd) | 66 | # - journalctl (systemd) |
| 56 | # - grep, awk (standard Unix tools) | 67 | # - grep, awk, sed (standard Unix tools) |
| 57 | # - Access to systemd journal (may require sudo or journal group membership) | 68 | # - Access to systemd journal (may require sudo or journal group membership) |
| 58 | # | 69 | # |
| 59 | # RUNTIME: Depends on log volume, typically < 30 seconds | 70 | # RUNTIME: Depends on log volume, typically < 30 seconds |
| @@ -121,15 +132,17 @@ usage() { | |||
| 121 | echo " $0 ngit-grasp.service output/logs --since '2026-01-01'" | 132 | echo " $0 ngit-grasp.service output/logs --since '2026-01-01'" |
| 122 | echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'" | 133 | echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'" |
| 123 | echo "" | 134 | echo "" |
| 124 | echo "Expected log format:" | 135 | echo "Expected log formats:" |
| 125 | echo " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." | 136 | echo " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." |
| 137 | echo " Event rejected by write policy event_id=abc123 ... kind=30617 reason=Invalid announcement: ..." | ||
| 138 | echo " Rejected repository announcement note1xxx: Invalid announcement: ..." | ||
| 126 | exit 1 | 139 | exit 1 |
| 127 | } | 140 | } |
| 128 | 141 | ||
| 129 | # Parse a single log line and extract fields | 142 | # Parse a [PARSE_FAIL] log line and extract fields |
| 130 | # Input: log line containing [PARSE_FAIL] | 143 | # Input: log line containing [PARSE_FAIL] |
| 131 | # Output: TSV line: repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason | 144 | # Output: TSV line: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub |
| 132 | parse_log_line() { | 145 | parse_parse_fail_line() { |
| 133 | local line="$1" | 146 | local line="$1" |
| 134 | 147 | ||
| 135 | # Extract fields using grep -oP (Perl regex) or awk | 148 | # Extract fields using grep -oP (Perl regex) or awk |
| @@ -154,7 +167,56 @@ parse_log_line() { | |||
| 154 | 167 | ||
| 155 | # Only output if we have the required fields | 168 | # Only output if we have the required fields |
| 156 | if [[ -n "$kind" && -n "$event_id" && -n "$reason" ]]; then | 169 | if [[ -n "$kind" && -n "$event_id" && -n "$reason" ]]; then |
| 157 | printf '%s\t%s\t%s\t%s\t%s\n' "$repo" "$npub" "$kind" "$event_id" "$reason" | 170 | printf '%s\t%s\t%s\t%s\t%s\n' "$event_id" "$kind" "$reason" "$repo" "$npub" |
| 171 | fi | ||
| 172 | } | ||
| 173 | |||
| 174 | # Parse an "Invalid announcement" rejection log line from write policy | ||
| 175 | # Input: log line containing "Event rejected by write policy" with "Invalid announcement" | ||
| 176 | # Output: TSV line: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub | ||
| 177 | # Note: repo and npub are empty for these entries (not available in log format) | ||
| 178 | parse_write_policy_rejection_line() { | ||
| 179 | local line="$1" | ||
| 180 | |||
| 181 | local kind event_id reason | ||
| 182 | |||
| 183 | # Extract event_id=VALUE (hex string) | ||
| 184 | event_id=$(echo "$line" | grep -oP 'event_id=\K[a-f0-9]+' || echo "") | ||
| 185 | |||
| 186 | # Extract kind=VALUE | ||
| 187 | kind=$(echo "$line" | grep -oP 'kind=\K[0-9]+' || echo "") | ||
| 188 | |||
| 189 | # Extract reason=VALUE (everything after "reason=") | ||
| 190 | # The reason is unquoted and goes to end of line | ||
| 191 | reason=$(echo "$line" | grep -oP 'reason=\K.*$' || echo "") | ||
| 192 | |||
| 193 | # Only output if we have the required fields | ||
| 194 | if [[ -n "$kind" && -n "$event_id" && -n "$reason" ]]; then | ||
| 195 | # repo and npub are empty for invalid announcement entries | ||
| 196 | printf '%s\t%s\t%s\t\t\n' "$event_id" "$kind" "$reason" | ||
| 197 | fi | ||
| 198 | } | ||
| 199 | |||
| 200 | # Parse a "Rejected repository announcement" log line from builder | ||
| 201 | # Input: log line containing "Rejected repository announcement <note_id>: Invalid announcement:" | ||
| 202 | # Output: TSV line: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub | ||
| 203 | # Note: The note_id is bech32 encoded, we need to extract it | ||
| 204 | parse_builder_rejection_line() { | ||
| 205 | local line="$1" | ||
| 206 | |||
| 207 | local note_id reason | ||
| 208 | |||
| 209 | # Extract note_id (note1...) from "Rejected repository announcement note1xxx:" | ||
| 210 | note_id=$(echo "$line" | grep -oP 'Rejected repository announcement \Knote1[a-z0-9]+' || echo "") | ||
| 211 | |||
| 212 | # Extract reason (everything after the note_id and colon) | ||
| 213 | reason=$(echo "$line" | grep -oP 'Rejected repository announcement note1[a-z0-9]+: \K.*$' || echo "") | ||
| 214 | |||
| 215 | # Only output if we have the required fields | ||
| 216 | # Kind is always 30617 for announcements | ||
| 217 | if [[ -n "$note_id" && -n "$reason" ]]; then | ||
| 218 | # Use note_id as event_id (bech32 format), kind=30617, repo and npub empty | ||
| 219 | printf '%s\t%s\t%s\t\t\n' "$note_id" "30617" "$reason" | ||
| 158 | fi | 220 | fi |
| 159 | } | 221 | } |
| 160 | 222 | ||
| @@ -260,21 +322,27 @@ main() { | |||
| 260 | journal_cmd="$journal_cmd --until '$until_date'" | 322 | journal_cmd="$journal_cmd --until '$until_date'" |
| 261 | fi | 323 | fi |
| 262 | 324 | ||
| 263 | log_info "Running: $journal_cmd | grep '\\[PARSE_FAIL\\]'" | 325 | log_info "Running: $journal_cmd | grep '[PARSE_FAIL]' or 'Invalid announcement'" |
| 264 | 326 | ||
| 265 | if [[ "$dry_run" == true ]]; then | 327 | if [[ "$dry_run" == true ]]; then |
| 266 | log_info "[DRY RUN] Would extract to: $output_dir/parse-failures.txt" | 328 | log_info "[DRY RUN] Would extract to: $output_dir/parse-failures.txt" |
| 267 | 329 | ||
| 268 | # Show sample of what would be extracted | 330 | # Show sample of what would be extracted |
| 269 | log_info "Checking for matching log entries..." | 331 | log_info "Checking for matching log entries..." |
| 270 | local sample_count | 332 | local parse_fail_count invalid_announcement_count |
| 271 | sample_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0") | 333 | parse_fail_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0") |
| 272 | sample_count="${sample_count//[^0-9]/}" # Strip non-numeric characters | 334 | parse_fail_count="${parse_fail_count//[^0-9]/}" # Strip non-numeric characters |
| 273 | sample_count="${sample_count:-0}" | 335 | parse_fail_count="${parse_fail_count:-0}" |
| 274 | log_info "Found $sample_count matching log entries" | 336 | |
| 337 | invalid_announcement_count=$(eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep -c 'Invalid announcement' || echo "0") | ||
| 338 | invalid_announcement_count="${invalid_announcement_count//[^0-9]/}" | ||
| 339 | invalid_announcement_count="${invalid_announcement_count:-0}" | ||
| 275 | 340 | ||
| 276 | if [[ "$sample_count" -eq 0 ]]; then | 341 | log_info "Found $parse_fail_count [PARSE_FAIL] entries" |
| 277 | log_warn "No [PARSE_FAIL] entries found in logs." | 342 | log_info "Found $invalid_announcement_count 'Invalid announcement' rejections" |
| 343 | |||
| 344 | if [[ "$parse_fail_count" -eq 0 && "$invalid_announcement_count" -eq 0 ]]; then | ||
| 345 | log_warn "No matching entries found in logs." | ||
| 278 | log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." | 346 | log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." |
| 279 | log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" | 347 | log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" |
| 280 | fi | 348 | fi |
| @@ -289,69 +357,159 @@ main() { | |||
| 289 | local temp_file | 357 | local temp_file |
| 290 | temp_file=$(mktemp) | 358 | temp_file=$(mktemp) |
| 291 | 359 | ||
| 292 | # Extract and parse log entries | 360 | # Extract and parse log entries using streaming (avoids loading all logs into memory) |
| 293 | log_info "Extracting log entries..." | 361 | log_info "Extracting log entries..." |
| 294 | 362 | ||
| 295 | # Get raw log lines containing [PARSE_FAIL] | 363 | # Create temp files for intermediate results |
| 296 | # Capture stderr separately to detect journalctl errors | 364 | local temp_stderr temp_parse_fail temp_write_policy_rejection temp_builder_rejection |
| 297 | local raw_lines journal_stderr journal_exit | ||
| 298 | local temp_stderr | ||
| 299 | temp_stderr=$(mktemp) | 365 | temp_stderr=$(mktemp) |
| 366 | temp_parse_fail=$(mktemp) | ||
| 367 | temp_write_policy_rejection=$(mktemp) | ||
| 368 | temp_builder_rejection=$(mktemp) | ||
| 300 | 369 | ||
| 301 | raw_lines=$(eval "$journal_cmd" 2>"$temp_stderr" | grep '\[PARSE_FAIL\]' || true) | 370 | # Extract [PARSE_FAIL] entries directly to temp file (streaming) |
| 302 | journal_exit=$? | 371 | log_info " Searching for [PARSE_FAIL] entries..." |
| 303 | journal_stderr=$(cat "$temp_stderr" 2>/dev/null || true) | 372 | eval "$journal_cmd" 2>"$temp_stderr" | grep '\[PARSE_FAIL\]' > "$temp_parse_fail" || true |
| 304 | rm -f "$temp_stderr" | ||
| 305 | 373 | ||
| 306 | # Report any journalctl errors (but don't fail - empty logs are valid) | 374 | local journal_stderr |
| 375 | journal_stderr=$(cat "$temp_stderr" 2>/dev/null || true) | ||
| 307 | if [[ -n "$journal_stderr" ]]; then | 376 | if [[ -n "$journal_stderr" ]]; then |
| 308 | log_warn "journalctl reported: $journal_stderr" | 377 | log_warn "journalctl reported: $journal_stderr" |
| 309 | fi | 378 | fi |
| 310 | 379 | ||
| 311 | if [[ -z "$raw_lines" ]]; then | 380 | # Extract "Event rejected by write policy" with "Invalid announcement" (streaming) |
| 312 | log_warn "No [PARSE_FAIL] entries found in logs." | 381 | log_info " Searching for write policy rejections..." |
| 382 | eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep 'Invalid announcement' > "$temp_write_policy_rejection" || true | ||
| 383 | |||
| 384 | # Extract "Rejected repository announcement" from builder (streaming) | ||
| 385 | log_info " Searching for builder rejections..." | ||
| 386 | eval "$journal_cmd" 2>/dev/null | grep 'Rejected repository announcement' | grep 'Invalid announcement' > "$temp_builder_rejection" || true | ||
| 387 | |||
| 388 | rm -f "$temp_stderr" | ||
| 389 | |||
| 390 | # Check if we found anything | ||
| 391 | local parse_fail_line_count write_policy_line_count builder_line_count | ||
| 392 | parse_fail_line_count=$(wc -l < "$temp_parse_fail") | ||
| 393 | parse_fail_line_count="${parse_fail_line_count//[^0-9]/}" | ||
| 394 | write_policy_line_count=$(wc -l < "$temp_write_policy_rejection") | ||
| 395 | write_policy_line_count="${write_policy_line_count//[^0-9]/}" | ||
| 396 | builder_line_count=$(wc -l < "$temp_builder_rejection") | ||
| 397 | builder_line_count="${builder_line_count//[^0-9]/}" | ||
| 398 | |||
| 399 | log_info " Found $parse_fail_line_count [PARSE_FAIL] log lines" | ||
| 400 | log_info " Found $write_policy_line_count write policy rejection log lines" | ||
| 401 | log_info " Found $builder_line_count builder rejection log lines" | ||
| 402 | |||
| 403 | local total_invalid_announcement_lines=$((write_policy_line_count + builder_line_count)) | ||
| 404 | |||
| 405 | if [[ "$parse_fail_line_count" -eq 0 && "$total_invalid_announcement_lines" -eq 0 ]]; then | ||
| 406 | log_warn "No matching entries found in logs." | ||
| 313 | log_warn "" | 407 | log_warn "" |
| 314 | log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." | 408 | log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." |
| 315 | log_warn "The structured log format required by this script:" | 409 | log_warn "The script looks for:" |
| 316 | log_warn "" | 410 | log_warn "" |
| 317 | log_warn " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." | 411 | log_warn " 1. [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." |
| 412 | log_warn " 2. Event rejected by write policy event_id=... kind=30617 reason=Invalid announcement: ..." | ||
| 318 | log_warn "" | 413 | log_warn "" |
| 319 | log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" | 414 | log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" |
| 320 | log_warn "" | 415 | log_warn "" |
| 321 | 416 | ||
| 322 | # Create empty output file with header comment | 417 | # Create empty output file with header comment |
| 323 | { | 418 | { |
| 324 | echo "# Parse failures extracted from $service" | 419 | echo "# Parse failures and invalid announcements extracted from $service" |
| 325 | echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" | 420 | echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" |
| 326 | echo "# Extracted: $(date -Iseconds)" | 421 | echo "# Extracted: $(date -Iseconds)" |
| 327 | echo "# Format: repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason" | ||
| 328 | echo "#" | 422 | echo "#" |
| 329 | echo "# NOTE: No [PARSE_FAIL] entries found." | 423 | echo "# Includes:" |
| 424 | echo "# - [PARSE_FAIL] structured log entries" | ||
| 425 | echo "# - \"Invalid announcement\" rejections" | ||
| 426 | echo "#" | ||
| 427 | echo "# Format: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub" | ||
| 428 | echo "# Note: repo and npub may be empty for some entries" | ||
| 429 | echo "#" | ||
| 430 | echo "# NOTE: No matching entries found." | ||
| 330 | echo "# This is expected if ngit-grasp logging improvements are not yet deployed." | 431 | echo "# This is expected if ngit-grasp logging improvements are not yet deployed." |
| 331 | } > "$output_file" | 432 | } > "$output_file" |
| 332 | 433 | ||
| 434 | rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_builder_rejection" | ||
| 333 | log_info "Created empty output file: $output_file" | 435 | log_info "Created empty output file: $output_file" |
| 334 | exit 0 | 436 | exit 0 |
| 335 | fi | 437 | fi |
| 336 | 438 | ||
| 337 | # Write header | 439 | # Write header |
| 338 | { | 440 | { |
| 339 | echo "# Parse failures extracted from $service" | 441 | echo "# Parse failures and invalid announcements extracted from $service" |
| 340 | echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" | 442 | echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" |
| 341 | echo "# Extracted: $(date -Iseconds)" | 443 | echo "# Extracted: $(date -Iseconds)" |
| 342 | echo "# Format: repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason" | 444 | echo "#" |
| 445 | echo "# Includes:" | ||
| 446 | echo "# - [PARSE_FAIL] structured log entries" | ||
| 447 | echo "# - \"Invalid announcement\" rejections" | ||
| 448 | echo "#" | ||
| 449 | echo "# Format: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub" | ||
| 450 | echo "# Note: repo and npub may be empty for some entries" | ||
| 343 | } > "$output_file" | 451 | } > "$output_file" |
| 344 | 452 | ||
| 345 | # Parse each line | 453 | # Parse [PARSE_FAIL] entries |
| 346 | local count=0 | 454 | log_info " Parsing [PARSE_FAIL] entries..." |
| 347 | while IFS= read -r line; do | 455 | local parse_fail_count=0 |
| 348 | local parsed | 456 | if [[ "$parse_fail_line_count" -gt 0 ]]; then |
| 349 | parsed=$(parse_log_line "$line") | 457 | while IFS= read -r line; do |
| 350 | if [[ -n "$parsed" ]]; then | 458 | local parsed |
| 351 | echo "$parsed" >> "$output_file" | 459 | parsed=$(parse_parse_fail_line "$line") |
| 352 | ((count++)) | 460 | if [[ -n "$parsed" ]]; then |
| 353 | fi | 461 | echo "$parsed" >> "$output_file" |
| 354 | done <<< "$raw_lines" | 462 | parse_fail_count=$((parse_fail_count + 1)) |
| 463 | fi | ||
| 464 | done < "$temp_parse_fail" | ||
| 465 | fi | ||
| 466 | |||
| 467 | # Parse write policy rejection entries | ||
| 468 | log_info " Parsing write policy rejection entries..." | ||
| 469 | local write_policy_count=0 | ||
| 470 | if [[ "$write_policy_line_count" -gt 0 ]]; then | ||
| 471 | while IFS= read -r line; do | ||
| 472 | local parsed | ||
| 473 | parsed=$(parse_write_policy_rejection_line "$line") | ||
| 474 | if [[ -n "$parsed" ]]; then | ||
| 475 | echo "$parsed" >> "$output_file" | ||
| 476 | write_policy_count=$((write_policy_count + 1)) | ||
| 477 | fi | ||
| 478 | done < "$temp_write_policy_rejection" | ||
| 479 | fi | ||
| 480 | |||
| 481 | # Parse builder rejection entries | ||
| 482 | log_info " Parsing builder rejection entries..." | ||
| 483 | local builder_count=0 | ||
| 484 | if [[ "$builder_line_count" -gt 0 ]]; then | ||
| 485 | while IFS= read -r line; do | ||
| 486 | local parsed | ||
| 487 | parsed=$(parse_builder_rejection_line "$line") | ||
| 488 | if [[ -n "$parsed" ]]; then | ||
| 489 | echo "$parsed" >> "$output_file" | ||
| 490 | builder_count=$((builder_count + 1)) | ||
| 491 | fi | ||
| 492 | done < "$temp_builder_rejection" | ||
| 493 | fi | ||
| 494 | |||
| 495 | local invalid_announcement_count=$((write_policy_count + builder_count)) | ||
| 496 | |||
| 497 | rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_builder_rejection" | ||
| 498 | |||
| 499 | # Deduplicate by event_id (first column) - keep first occurrence | ||
| 500 | log_info " Deduplicating entries..." | ||
| 501 | local deduped_file | ||
| 502 | deduped_file=$(mktemp) | ||
| 503 | # Preserve header lines (starting with #) and deduplicate data lines | ||
| 504 | grep '^#' "$output_file" > "$deduped_file" | ||
| 505 | grep -v '^#' "$output_file" | sort -t$'\t' -k1,1 -u >> "$deduped_file" | ||
| 506 | mv "$deduped_file" "$output_file" | ||
| 507 | |||
| 508 | # Count final entries (excluding header lines) | ||
| 509 | local count | ||
| 510 | count=$(grep -v '^#' "$output_file" | wc -l) | ||
| 511 | count="${count//[^0-9]/}" # Strip whitespace | ||
| 512 | count="${count:-0}" | ||
| 355 | 513 | ||
| 356 | rm -f "$temp_file" | 514 | rm -f "$temp_file" |
| 357 | 515 | ||
| @@ -360,26 +518,45 @@ main() { | |||
| 360 | log_info "=== Extraction Summary ===" | 518 | log_info "=== Extraction Summary ===" |
| 361 | log_info "Service: $service" | 519 | log_info "Service: $service" |
| 362 | log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" | 520 | log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" |
| 363 | log_success "Extracted $count parse failure entries" | 521 | log_success "Extracted $count total entries" |
| 522 | log_info " - [PARSE_FAIL] entries: $parse_fail_count" | ||
| 523 | log_info " - Invalid announcement rejections: $invalid_announcement_count" | ||
| 364 | echo "" | 524 | echo "" |
| 365 | log_info "Output file: $output_file" | 525 | log_info "Output file: $output_file" |
| 366 | 526 | ||
| 367 | if [[ $count -gt 0 ]]; then | 527 | if [[ $count -gt 0 ]]; then |
| 368 | echo "" | 528 | echo "" |
| 369 | log_info "Sample entries (first 5):" | 529 | log_info "Sample entries (first 5):" |
| 370 | tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub kind event_id reason; do | 530 | # Use a subshell to avoid SIGPIPE issues with set -e |
| 371 | echo " kind=$kind repo=$repo reason=\"$reason\"" | 531 | # New format: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub |
| 372 | done | 532 | (grep -v '^#' "$output_file" | head -5 | while IFS=$'\t' read -r event_id kind reason repo npub; do |
| 533 | echo " kind=$kind event_id=${event_id:0:16}... reason=\"${reason:0:60}...\"" | ||
| 534 | done) || true | ||
| 373 | fi | 535 | fi |
| 374 | 536 | ||
| 375 | # Breakdown by kind | 537 | # Breakdown by kind |
| 376 | if [[ $count -gt 0 ]]; then | 538 | if [[ $count -gt 0 ]]; then |
| 377 | echo "" | 539 | echo "" |
| 378 | log_info "Breakdown by event kind:" | 540 | log_info "Breakdown by event kind:" |
| 379 | tail -n +5 "$output_file" | awk -F'\t' '{print $3}' | sort | uniq -c | sort -rn | while read -r cnt kind; do | 541 | # Use a subshell to avoid SIGPIPE issues with set -e |
| 542 | # kind is now column 2 | ||
| 543 | (grep -v '^#' "$output_file" | awk -F'\t' '{print $2}' | sort | uniq -c | sort -rn | while read -r cnt kind; do | ||
| 380 | echo " kind $kind: $cnt failures" | 544 | echo " kind $kind: $cnt failures" |
| 381 | done | 545 | done) || true |
| 546 | fi | ||
| 547 | |||
| 548 | # Breakdown by reason pattern (for invalid announcements) | ||
| 549 | if [[ $invalid_announcement_count -gt 0 ]]; then | ||
| 550 | echo "" | ||
| 551 | log_info "Breakdown by reason pattern:" | ||
| 552 | # Extract the main reason type (before the colon details) | ||
| 553 | (grep -v '^#' "$output_file" | awk -F'\t' '{print $3}' | sed 's/:.*//' | sort | uniq -c | sort -rn | head -10 | while read -r cnt reason; do | ||
| 554 | echo " $reason: $cnt" | ||
| 555 | done) || true | ||
| 382 | fi | 556 | fi |
| 557 | |||
| 558 | # Explicit success exit | ||
| 559 | exit 0 | ||
| 383 | } | 560 | } |
| 384 | 561 | ||
| 385 | main "$@" | 562 | main "$@" |
diff --git a/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh index a603a1e..a0c8ad0 100755 --- a/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh +++ b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh | |||
| @@ -356,7 +356,7 @@ main() { | |||
| 356 | parsed=$(parse_log_line "$line") | 356 | parsed=$(parse_log_line "$line") |
| 357 | if [[ -n "$parsed" ]]; then | 357 | if [[ -n "$parsed" ]]; then |
| 358 | echo "$parsed" >> "$output_file" | 358 | echo "$parsed" >> "$output_file" |
| 359 | ((count++)) | 359 | count=$((count + 1)) |
| 360 | fi | 360 | fi |
| 361 | done <<< "$raw_lines" | 361 | done <<< "$raw_lines" |
| 362 | 362 | ||
| @@ -374,9 +374,10 @@ main() { | |||
| 374 | if [[ $count -gt 0 ]]; then | 374 | if [[ $count -gt 0 ]]; then |
| 375 | echo "" | 375 | echo "" |
| 376 | log_info "Sample entries (first 5):" | 376 | log_info "Sample entries (first 5):" |
| 377 | tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub timestamp reason; do | 377 | # Use a subshell to avoid SIGPIPE issues with set -e |
| 378 | (tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub timestamp reason; do | ||
| 378 | echo " repo=$repo npub=${npub:0:20}... timestamp=$timestamp" | 379 | echo " repo=$repo npub=${npub:0:20}... timestamp=$timestamp" |
| 379 | done | 380 | done) || true |
| 380 | fi | 381 | fi |
| 381 | 382 | ||
| 382 | # Show unique repos affected | 383 | # Show unique repos affected |
| @@ -388,9 +389,10 @@ main() { | |||
| 388 | 389 | ||
| 389 | echo "" | 390 | echo "" |
| 390 | log_info "Repositories with purgatory expiry:" | 391 | log_info "Repositories with purgatory expiry:" |
| 391 | tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort | uniq -c | sort -rn | head -10 | while read -r cnt repo; do | 392 | # Use a subshell to avoid SIGPIPE issues with set -e |
| 393 | (tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort | uniq -c | sort -rn | head -10 | while read -r cnt repo; do | ||
| 392 | echo " $repo: $cnt expiry events" | 394 | echo " $repo: $cnt expiry events" |
| 393 | done | 395 | done) || true |
| 394 | 396 | ||
| 395 | local total_repos | 397 | local total_repos |
| 396 | total_repos=$(tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort -u | wc -l) | 398 | total_repos=$(tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort -u | wc -l) |
| @@ -398,6 +400,9 @@ main() { | |||
| 398 | echo " ... and $((total_repos - 10)) more repositories" | 400 | echo " ... and $((total_repos - 10)) more repositories" |
| 399 | fi | 401 | fi |
| 400 | fi | 402 | fi |
| 403 | |||
| 404 | # Explicit success exit | ||
| 405 | exit 0 | ||
| 401 | } | 406 | } |
| 402 | 407 | ||
| 403 | main "$@" | 408 | main "$@" |