diff options
| author | DanConwayDev <DanConwayDev@protonmail.com> | 2026-01-23 16:59:24 +0000 |
|---|---|---|
| committer | DanConwayDev <DanConwayDev@protonmail.com> | 2026-01-27 20:38:06 +0000 |
| commit | cbb0e768641a6ca0cbd7e7013437cc71b920004d (patch) | |
| tree | 3d350eec31ea2fcd2c533f949bb136498ea45767 /docs/how-to/migration-scripts | |
| parent | 0b9527ede03521a40f1174a5a6e40a943bf27e2d (diff) | |
Capture invalid announcement rejections in Phase 4
Update parse failures script to also extract 'Invalid announcement'
rejections from logs. These are announcement events that failed
validation (e.g., multiple clone tags instead of single tag with
multiple values).
Changes:
- Search for 'Event rejected by write policy' pattern with 'Invalid announcement'
- Search for 'Rejected repository announcement' pattern from builder
- Extract event_id, kind, and reason from rejection logs
- Combine with [PARSE_FAIL] entries in output
- Deduplicate entries by event_id
- Update header to clarify both patterns are captured
- Update migration guide to document this
- Fix SIGPIPE handling in purgatory script (minor)
This captures the ~446 unique announcements rejected for NIP-34 format
violations (multiple clone tags), which were previously unexplained
in the migration analysis.
Diffstat (limited to 'docs/how-to/migration-scripts')
| -rwxr-xr-x | docs/how-to/migration-scripts/30-extract-parse-failures.sh | 289 | ||||
| -rwxr-xr-x | docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh | 15 |
2 files changed, 243 insertions, 61 deletions
diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh index d4f0ff2..114a44d 100755 --- a/docs/how-to/migration-scripts/30-extract-parse-failures.sh +++ b/docs/how-to/migration-scripts/30-extract-parse-failures.sh | |||
| @@ -3,7 +3,8 @@ | |||
| 3 | # 30-extract-parse-failures.sh - Extract parse failure events from systemd logs | 3 | # 30-extract-parse-failures.sh - Extract parse failure events from systemd logs |
| 4 | # | 4 | # |
| 5 | # PHASE 4a of the GRASP relay to ngit-grasp migration analysis pipeline. | 5 | # PHASE 4a of the GRASP relay to ngit-grasp migration analysis pipeline. |
| 6 | # Extracts structured [PARSE_FAIL] log entries from journalctl. | 6 | # Extracts structured [PARSE_FAIL] log entries AND "Invalid announcement" |
| 7 | # rejections from journalctl. | ||
| 7 | # | 8 | # |
| 8 | # USAGE: | 9 | # USAGE: |
| 9 | # ./30-extract-parse-failures.sh <service-name> <output-dir> [options] | 10 | # ./30-extract-parse-failures.sh <service-name> <output-dir> [options] |
| @@ -27,24 +28,34 @@ | |||
| 27 | # <output-dir>/parse-failures.txt | 28 | # <output-dir>/parse-failures.txt |
| 28 | # | 29 | # |
| 29 | # OUTPUT FORMAT (TSV): | 30 | # OUTPUT FORMAT (TSV): |
| 30 | # repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason | 31 | # event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub |
| 31 | # | 32 | # |
| 32 | # EXPECTED LOG FORMAT: | 33 | # EXPECTED LOG FORMATS: |
| 33 | # The script looks for structured log entries in this format: | 34 | # The script looks for two types of log entries: |
| 34 | # | 35 | # |
| 35 | # 2026-01-22T10:30:45Z ngit-grasp[1234]: [PARSE_FAIL] kind=30618 event_id=abc123... reason="invalid refs format" repo=myrepo npub=npub1... | 36 | # 1. Structured [PARSE_FAIL] entries: |
| 37 | # 2026-01-22T10:30:45Z ngit-grasp[1234]: [PARSE_FAIL] kind=30618 event_id=abc123... reason="invalid refs format" repo=myrepo npub=npub1... | ||
| 38 | # | ||
| 39 | # 2. "Invalid announcement" rejections (write policy): | ||
| 40 | # Event rejected by write policy event_id=abc123... relay=wss://... kind=30617 reason=Invalid announcement: multiple clone tags found... | ||
| 41 | # | ||
| 42 | # 3. "Rejected repository announcement" (builder): | ||
| 43 | # Rejected repository announcement note1xxx: Invalid announcement: multiple clone tags found... | ||
| 36 | # | 44 | # |
| 37 | # Required fields: kind, event_id, reason | 45 | # Required fields: kind, event_id, reason |
| 38 | # Optional fields: repo, npub (may not be available if parsing failed early) | 46 | # Optional fields: repo, npub (may not be available for all entry types) |
| 39 | # | 47 | # |
| 40 | # DEPENDENCY: | 48 | # DEPENDENCY: |
| 41 | # This script requires logging improvements in ngit-grasp to emit structured | 49 | # This script requires logging improvements in ngit-grasp to emit structured |
| 42 | # [PARSE_FAIL] log entries. Until those are implemented, this script will | 50 | # [PARSE_FAIL] log entries. Until those are implemented, this script will |
| 43 | # find no matching entries (which is handled gracefully). | 51 | # find no matching entries (which is handled gracefully). |
| 44 | # | 52 | # |
| 53 | # "Invalid announcement" rejections are logged by the write policy and | ||
| 54 | # should be present in any ngit-grasp deployment. | ||
| 55 | # | ||
| 45 | # See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section) | 56 | # See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section) |
| 46 | # | 57 | # |
| 47 | # Expected Rust logging code: | 58 | # Expected Rust logging code for [PARSE_FAIL]: |
| 48 | # tracing::warn!( | 59 | # tracing::warn!( |
| 49 | # target: "migration", | 60 | # target: "migration", |
| 50 | # "[PARSE_FAIL] kind={} event_id={} reason=\"{}\" repo={} npub={}", | 61 | # "[PARSE_FAIL] kind={} event_id={} reason=\"{}\" repo={} npub={}", |
| @@ -53,7 +64,7 @@ | |||
| 53 | # | 64 | # |
| 54 | # PREREQUISITES: | 65 | # PREREQUISITES: |
| 55 | # - journalctl (systemd) | 66 | # - journalctl (systemd) |
| 56 | # - grep, awk (standard Unix tools) | 67 | # - grep, awk, sed (standard Unix tools) |
| 57 | # - Access to systemd journal (may require sudo or journal group membership) | 68 | # - Access to systemd journal (may require sudo or journal group membership) |
| 58 | # | 69 | # |
| 59 | # RUNTIME: Depends on log volume, typically < 30 seconds | 70 | # RUNTIME: Depends on log volume, typically < 30 seconds |
| @@ -121,15 +132,17 @@ usage() { | |||
| 121 | echo " $0 ngit-grasp.service output/logs --since '2026-01-01'" | 132 | echo " $0 ngit-grasp.service output/logs --since '2026-01-01'" |
| 122 | echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'" | 133 | echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'" |
| 123 | echo "" | 134 | echo "" |
| 124 | echo "Expected log format:" | 135 | echo "Expected log formats:" |
| 125 | echo " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." | 136 | echo " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." |
| 137 | echo " Event rejected by write policy event_id=abc123 ... kind=30617 reason=Invalid announcement: ..." | ||
| 138 | echo " Rejected repository announcement note1xxx: Invalid announcement: ..." | ||
| 126 | exit 1 | 139 | exit 1 |
| 127 | } | 140 | } |
| 128 | 141 | ||
| 129 | # Parse a single log line and extract fields | 142 | # Parse a [PARSE_FAIL] log line and extract fields |
| 130 | # Input: log line containing [PARSE_FAIL] | 143 | # Input: log line containing [PARSE_FAIL] |
| 131 | # Output: TSV line: repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason | 144 | # Output: TSV line: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub |
| 132 | parse_log_line() { | 145 | parse_parse_fail_line() { |
| 133 | local line="$1" | 146 | local line="$1" |
| 134 | 147 | ||
| 135 | # Extract fields using grep -oP (Perl regex) or awk | 148 | # Extract fields using grep -oP (Perl regex) or awk |
| @@ -154,7 +167,56 @@ parse_log_line() { | |||
| 154 | 167 | ||
| 155 | # Only output if we have the required fields | 168 | # Only output if we have the required fields |
| 156 | if [[ -n "$kind" && -n "$event_id" && -n "$reason" ]]; then | 169 | if [[ -n "$kind" && -n "$event_id" && -n "$reason" ]]; then |
| 157 | printf '%s\t%s\t%s\t%s\t%s\n' "$repo" "$npub" "$kind" "$event_id" "$reason" | 170 | printf '%s\t%s\t%s\t%s\t%s\n' "$event_id" "$kind" "$reason" "$repo" "$npub" |
| 171 | fi | ||
| 172 | } | ||
| 173 | |||
| 174 | # Parse an "Invalid announcement" rejection log line from write policy | ||
| 175 | # Input: log line containing "Event rejected by write policy" with "Invalid announcement" | ||
| 176 | # Output: TSV line: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub | ||
| 177 | # Note: repo and npub are empty for these entries (not available in log format) | ||
| 178 | parse_write_policy_rejection_line() { | ||
| 179 | local line="$1" | ||
| 180 | |||
| 181 | local kind event_id reason | ||
| 182 | |||
| 183 | # Extract event_id=VALUE (hex string) | ||
| 184 | event_id=$(echo "$line" | grep -oP 'event_id=\K[a-f0-9]+' || echo "") | ||
| 185 | |||
| 186 | # Extract kind=VALUE | ||
| 187 | kind=$(echo "$line" | grep -oP 'kind=\K[0-9]+' || echo "") | ||
| 188 | |||
| 189 | # Extract reason=VALUE (everything after "reason=") | ||
| 190 | # The reason is unquoted and goes to end of line | ||
| 191 | reason=$(echo "$line" | grep -oP 'reason=\K.*$' || echo "") | ||
| 192 | |||
| 193 | # Only output if we have the required fields | ||
| 194 | if [[ -n "$kind" && -n "$event_id" && -n "$reason" ]]; then | ||
| 195 | # repo and npub are empty for invalid announcement entries | ||
| 196 | printf '%s\t%s\t%s\t\t\n' "$event_id" "$kind" "$reason" | ||
| 197 | fi | ||
| 198 | } | ||
| 199 | |||
| 200 | # Parse a "Rejected repository announcement" log line from builder | ||
| 201 | # Input: log line containing "Rejected repository announcement <note_id>: Invalid announcement:" | ||
| 202 | # Output: TSV line: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub | ||
| 203 | # Note: The note_id is bech32 encoded, we need to extract it | ||
| 204 | parse_builder_rejection_line() { | ||
| 205 | local line="$1" | ||
| 206 | |||
| 207 | local note_id reason | ||
| 208 | |||
| 209 | # Extract note_id (note1...) from "Rejected repository announcement note1xxx:" | ||
| 210 | note_id=$(echo "$line" | grep -oP 'Rejected repository announcement \Knote1[a-z0-9]+' || echo "") | ||
| 211 | |||
| 212 | # Extract reason (everything after the note_id and colon) | ||
| 213 | reason=$(echo "$line" | grep -oP 'Rejected repository announcement note1[a-z0-9]+: \K.*$' || echo "") | ||
| 214 | |||
| 215 | # Only output if we have the required fields | ||
| 216 | # Kind is always 30617 for announcements | ||
| 217 | if [[ -n "$note_id" && -n "$reason" ]]; then | ||
| 218 | # Use note_id as event_id (bech32 format), kind=30617, repo and npub empty | ||
| 219 | printf '%s\t%s\t%s\t\t\n' "$note_id" "30617" "$reason" | ||
| 158 | fi | 220 | fi |
| 159 | } | 221 | } |
| 160 | 222 | ||
| @@ -260,21 +322,27 @@ main() { | |||
| 260 | journal_cmd="$journal_cmd --until '$until_date'" | 322 | journal_cmd="$journal_cmd --until '$until_date'" |
| 261 | fi | 323 | fi |
| 262 | 324 | ||
| 263 | log_info "Running: $journal_cmd | grep '\\[PARSE_FAIL\\]'" | 325 | log_info "Running: $journal_cmd | grep '[PARSE_FAIL]' or 'Invalid announcement'" |
| 264 | 326 | ||
| 265 | if [[ "$dry_run" == true ]]; then | 327 | if [[ "$dry_run" == true ]]; then |
| 266 | log_info "[DRY RUN] Would extract to: $output_dir/parse-failures.txt" | 328 | log_info "[DRY RUN] Would extract to: $output_dir/parse-failures.txt" |
| 267 | 329 | ||
| 268 | # Show sample of what would be extracted | 330 | # Show sample of what would be extracted |
| 269 | log_info "Checking for matching log entries..." | 331 | log_info "Checking for matching log entries..." |
| 270 | local sample_count | 332 | local parse_fail_count invalid_announcement_count |
| 271 | sample_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0") | 333 | parse_fail_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0") |
| 272 | sample_count="${sample_count//[^0-9]/}" # Strip non-numeric characters | 334 | parse_fail_count="${parse_fail_count//[^0-9]/}" # Strip non-numeric characters |
| 273 | sample_count="${sample_count:-0}" | 335 | parse_fail_count="${parse_fail_count:-0}" |
| 274 | log_info "Found $sample_count matching log entries" | 336 | |
| 337 | invalid_announcement_count=$(eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep -c 'Invalid announcement' || echo "0") | ||
| 338 | invalid_announcement_count="${invalid_announcement_count//[^0-9]/}" | ||
| 339 | invalid_announcement_count="${invalid_announcement_count:-0}" | ||
| 275 | 340 | ||
| 276 | if [[ "$sample_count" -eq 0 ]]; then | 341 | log_info "Found $parse_fail_count [PARSE_FAIL] entries" |
| 277 | log_warn "No [PARSE_FAIL] entries found in logs." | 342 | log_info "Found $invalid_announcement_count 'Invalid announcement' rejections" |
| 343 | |||
| 344 | if [[ "$parse_fail_count" -eq 0 && "$invalid_announcement_count" -eq 0 ]]; then | ||
| 345 | log_warn "No matching entries found in logs." | ||
| 278 | log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." | 346 | log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." |
| 279 | log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" | 347 | log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" |
| 280 | fi | 348 | fi |
| @@ -289,69 +357,159 @@ main() { | |||
| 289 | local temp_file | 357 | local temp_file |
| 290 | temp_file=$(mktemp) | 358 | temp_file=$(mktemp) |
| 291 | 359 | ||
| 292 | # Extract and parse log entries | 360 | # Extract and parse log entries using streaming (avoids loading all logs into memory) |
| 293 | log_info "Extracting log entries..." | 361 | log_info "Extracting log entries..." |
| 294 | 362 | ||
| 295 | # Get raw log lines containing [PARSE_FAIL] | 363 | # Create temp files for intermediate results |
| 296 | # Capture stderr separately to detect journalctl errors | 364 | local temp_stderr temp_parse_fail temp_write_policy_rejection temp_builder_rejection |
| 297 | local raw_lines journal_stderr journal_exit | ||
| 298 | local temp_stderr | ||
| 299 | temp_stderr=$(mktemp) | 365 | temp_stderr=$(mktemp) |
| 366 | temp_parse_fail=$(mktemp) | ||
| 367 | temp_write_policy_rejection=$(mktemp) | ||
| 368 | temp_builder_rejection=$(mktemp) | ||
| 300 | 369 | ||
| 301 | raw_lines=$(eval "$journal_cmd" 2>"$temp_stderr" | grep '\[PARSE_FAIL\]' || true) | 370 | # Extract [PARSE_FAIL] entries directly to temp file (streaming) |
| 302 | journal_exit=$? | 371 | log_info " Searching for [PARSE_FAIL] entries..." |
| 303 | journal_stderr=$(cat "$temp_stderr" 2>/dev/null || true) | 372 | eval "$journal_cmd" 2>"$temp_stderr" | grep '\[PARSE_FAIL\]' > "$temp_parse_fail" || true |
| 304 | rm -f "$temp_stderr" | ||
| 305 | 373 | ||
| 306 | # Report any journalctl errors (but don't fail - empty logs are valid) | 374 | local journal_stderr |
| 375 | journal_stderr=$(cat "$temp_stderr" 2>/dev/null || true) | ||
| 307 | if [[ -n "$journal_stderr" ]]; then | 376 | if [[ -n "$journal_stderr" ]]; then |
| 308 | log_warn "journalctl reported: $journal_stderr" | 377 | log_warn "journalctl reported: $journal_stderr" |
| 309 | fi | 378 | fi |
| 310 | 379 | ||
| 311 | if [[ -z "$raw_lines" ]]; then | 380 | # Extract "Event rejected by write policy" with "Invalid announcement" (streaming) |
| 312 | log_warn "No [PARSE_FAIL] entries found in logs." | 381 | log_info " Searching for write policy rejections..." |
| 382 | eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep 'Invalid announcement' > "$temp_write_policy_rejection" || true | ||
| 383 | |||
| 384 | # Extract "Rejected repository announcement" from builder (streaming) | ||
| 385 | log_info " Searching for builder rejections..." | ||
| 386 | eval "$journal_cmd" 2>/dev/null | grep 'Rejected repository announcement' | grep 'Invalid announcement' > "$temp_builder_rejection" || true | ||
| 387 | |||
| 388 | rm -f "$temp_stderr" | ||
| 389 | |||
| 390 | # Check if we found anything | ||
| 391 | local parse_fail_line_count write_policy_line_count builder_line_count | ||
| 392 | parse_fail_line_count=$(wc -l < "$temp_parse_fail") | ||
| 393 | parse_fail_line_count="${parse_fail_line_count//[^0-9]/}" | ||
| 394 | write_policy_line_count=$(wc -l < "$temp_write_policy_rejection") | ||
| 395 | write_policy_line_count="${write_policy_line_count//[^0-9]/}" | ||
| 396 | builder_line_count=$(wc -l < "$temp_builder_rejection") | ||
| 397 | builder_line_count="${builder_line_count//[^0-9]/}" | ||
| 398 | |||
| 399 | log_info " Found $parse_fail_line_count [PARSE_FAIL] log lines" | ||
| 400 | log_info " Found $write_policy_line_count write policy rejection log lines" | ||
| 401 | log_info " Found $builder_line_count builder rejection log lines" | ||
| 402 | |||
| 403 | local total_invalid_announcement_lines=$((write_policy_line_count + builder_line_count)) | ||
| 404 | |||
| 405 | if [[ "$parse_fail_line_count" -eq 0 && "$total_invalid_announcement_lines" -eq 0 ]]; then | ||
| 406 | log_warn "No matching entries found in logs." | ||
| 313 | log_warn "" | 407 | log_warn "" |
| 314 | log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." | 408 | log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." |
| 315 | log_warn "The structured log format required by this script:" | 409 | log_warn "The script looks for:" |
| 316 | log_warn "" | 410 | log_warn "" |
| 317 | log_warn " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." | 411 | log_warn " 1. [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." |
| 412 | log_warn " 2. Event rejected by write policy event_id=... kind=30617 reason=Invalid announcement: ..." | ||
| 318 | log_warn "" | 413 | log_warn "" |
| 319 | log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" | 414 | log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" |
| 320 | log_warn "" | 415 | log_warn "" |
| 321 | 416 | ||
| 322 | # Create empty output file with header comment | 417 | # Create empty output file with header comment |
| 323 | { | 418 | { |
| 324 | echo "# Parse failures extracted from $service" | 419 | echo "# Parse failures and invalid announcements extracted from $service" |
| 325 | echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" | 420 | echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" |
| 326 | echo "# Extracted: $(date -Iseconds)" | 421 | echo "# Extracted: $(date -Iseconds)" |
| 327 | echo "# Format: repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason" | ||
| 328 | echo "#" | 422 | echo "#" |
| 329 | echo "# NOTE: No [PARSE_FAIL] entries found." | 423 | echo "# Includes:" |
| 424 | echo "# - [PARSE_FAIL] structured log entries" | ||
| 425 | echo "# - \"Invalid announcement\" rejections" | ||
| 426 | echo "#" | ||
| 427 | echo "# Format: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub" | ||
| 428 | echo "# Note: repo and npub may be empty for some entries" | ||
| 429 | echo "#" | ||
| 430 | echo "# NOTE: No matching entries found." | ||
| 330 | echo "# This is expected if ngit-grasp logging improvements are not yet deployed." | 431 | echo "# This is expected if ngit-grasp logging improvements are not yet deployed." |
| 331 | } > "$output_file" | 432 | } > "$output_file" |
| 332 | 433 | ||
| 434 | rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_builder_rejection" | ||
| 333 | log_info "Created empty output file: $output_file" | 435 | log_info "Created empty output file: $output_file" |
| 334 | exit 0 | 436 | exit 0 |
| 335 | fi | 437 | fi |
| 336 | 438 | ||
| 337 | # Write header | 439 | # Write header |
| 338 | { | 440 | { |
| 339 | echo "# Parse failures extracted from $service" | 441 | echo "# Parse failures and invalid announcements extracted from $service" |
| 340 | echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" | 442 | echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" |
| 341 | echo "# Extracted: $(date -Iseconds)" | 443 | echo "# Extracted: $(date -Iseconds)" |
| 342 | echo "# Format: repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason" | 444 | echo "#" |
| 445 | echo "# Includes:" | ||
| 446 | echo "# - [PARSE_FAIL] structured log entries" | ||
| 447 | echo "# - \"Invalid announcement\" rejections" | ||
| 448 | echo "#" | ||
| 449 | echo "# Format: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub" | ||
| 450 | echo "# Note: repo and npub may be empty for some entries" | ||
| 343 | } > "$output_file" | 451 | } > "$output_file" |
| 344 | 452 | ||
| 345 | # Parse each line | 453 | # Parse [PARSE_FAIL] entries |
| 346 | local count=0 | 454 | log_info " Parsing [PARSE_FAIL] entries..." |
| 347 | while IFS= read -r line; do | 455 | local parse_fail_count=0 |
| 348 | local parsed | 456 | if [[ "$parse_fail_line_count" -gt 0 ]]; then |
| 349 | parsed=$(parse_log_line "$line") | 457 | while IFS= read -r line; do |
| 350 | if [[ -n "$parsed" ]]; then | 458 | local parsed |
| 351 | echo "$parsed" >> "$output_file" | 459 | parsed=$(parse_parse_fail_line "$line") |
| 352 | ((count++)) | 460 | if [[ -n "$parsed" ]]; then |
| 353 | fi | 461 | echo "$parsed" >> "$output_file" |
| 354 | done <<< "$raw_lines" | 462 | parse_fail_count=$((parse_fail_count + 1)) |
| 463 | fi | ||
| 464 | done < "$temp_parse_fail" | ||
| 465 | fi | ||
| 466 | |||
| 467 | # Parse write policy rejection entries | ||
| 468 | log_info " Parsing write policy rejection entries..." | ||
| 469 | local write_policy_count=0 | ||
| 470 | if [[ "$write_policy_line_count" -gt 0 ]]; then | ||
| 471 | while IFS= read -r line; do | ||
| 472 | local parsed | ||
| 473 | parsed=$(parse_write_policy_rejection_line "$line") | ||
| 474 | if [[ -n "$parsed" ]]; then | ||
| 475 | echo "$parsed" >> "$output_file" | ||
| 476 | write_policy_count=$((write_policy_count + 1)) | ||
| 477 | fi | ||
| 478 | done < "$temp_write_policy_rejection" | ||
| 479 | fi | ||
| 480 | |||
| 481 | # Parse builder rejection entries | ||
| 482 | log_info " Parsing builder rejection entries..." | ||
| 483 | local builder_count=0 | ||
| 484 | if [[ "$builder_line_count" -gt 0 ]]; then | ||
| 485 | while IFS= read -r line; do | ||
| 486 | local parsed | ||
| 487 | parsed=$(parse_builder_rejection_line "$line") | ||
| 488 | if [[ -n "$parsed" ]]; then | ||
| 489 | echo "$parsed" >> "$output_file" | ||
| 490 | builder_count=$((builder_count + 1)) | ||
| 491 | fi | ||
| 492 | done < "$temp_builder_rejection" | ||
| 493 | fi | ||
| 494 | |||
| 495 | local invalid_announcement_count=$((write_policy_count + builder_count)) | ||
| 496 | |||
| 497 | rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_builder_rejection" | ||
| 498 | |||
| 499 | # Deduplicate by event_id (first column) - keep first occurrence | ||
| 500 | log_info " Deduplicating entries..." | ||
| 501 | local deduped_file | ||
| 502 | deduped_file=$(mktemp) | ||
| 503 | # Preserve header lines (starting with #) and deduplicate data lines | ||
| 504 | grep '^#' "$output_file" > "$deduped_file" | ||
| 505 | grep -v '^#' "$output_file" | sort -t$'\t' -k1,1 -u >> "$deduped_file" | ||
| 506 | mv "$deduped_file" "$output_file" | ||
| 507 | |||
| 508 | # Count final entries (excluding header lines) | ||
| 509 | local count | ||
| 510 | count=$(grep -v '^#' "$output_file" | wc -l) | ||
| 511 | count="${count//[^0-9]/}" # Strip whitespace | ||
| 512 | count="${count:-0}" | ||
| 355 | 513 | ||
| 356 | rm -f "$temp_file" | 514 | rm -f "$temp_file" |
| 357 | 515 | ||
| @@ -360,26 +518,45 @@ main() { | |||
| 360 | log_info "=== Extraction Summary ===" | 518 | log_info "=== Extraction Summary ===" |
| 361 | log_info "Service: $service" | 519 | log_info "Service: $service" |
| 362 | log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" | 520 | log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" |
| 363 | log_success "Extracted $count parse failure entries" | 521 | log_success "Extracted $count total entries" |
| 522 | log_info " - [PARSE_FAIL] entries: $parse_fail_count" | ||
| 523 | log_info " - Invalid announcement rejections: $invalid_announcement_count" | ||
| 364 | echo "" | 524 | echo "" |
| 365 | log_info "Output file: $output_file" | 525 | log_info "Output file: $output_file" |
| 366 | 526 | ||
| 367 | if [[ $count -gt 0 ]]; then | 527 | if [[ $count -gt 0 ]]; then |
| 368 | echo "" | 528 | echo "" |
| 369 | log_info "Sample entries (first 5):" | 529 | log_info "Sample entries (first 5):" |
| 370 | tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub kind event_id reason; do | 530 | # Use a subshell to avoid SIGPIPE issues with set -e |
| 371 | echo " kind=$kind repo=$repo reason=\"$reason\"" | 531 | # New format: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub |
| 372 | done | 532 | (grep -v '^#' "$output_file" | head -5 | while IFS=$'\t' read -r event_id kind reason repo npub; do |
| 533 | echo " kind=$kind event_id=${event_id:0:16}... reason=\"${reason:0:60}...\"" | ||
| 534 | done) || true | ||
| 373 | fi | 535 | fi |
| 374 | 536 | ||
| 375 | # Breakdown by kind | 537 | # Breakdown by kind |
| 376 | if [[ $count -gt 0 ]]; then | 538 | if [[ $count -gt 0 ]]; then |
| 377 | echo "" | 539 | echo "" |
| 378 | log_info "Breakdown by event kind:" | 540 | log_info "Breakdown by event kind:" |
| 379 | tail -n +5 "$output_file" | awk -F'\t' '{print $3}' | sort | uniq -c | sort -rn | while read -r cnt kind; do | 541 | # Use a subshell to avoid SIGPIPE issues with set -e |
| 542 | # kind is now column 2 | ||
| 543 | (grep -v '^#' "$output_file" | awk -F'\t' '{print $2}' | sort | uniq -c | sort -rn | while read -r cnt kind; do | ||
| 380 | echo " kind $kind: $cnt failures" | 544 | echo " kind $kind: $cnt failures" |
| 381 | done | 545 | done) || true |
| 546 | fi | ||
| 547 | |||
| 548 | # Breakdown by reason pattern (for invalid announcements) | ||
| 549 | if [[ $invalid_announcement_count -gt 0 ]]; then | ||
| 550 | echo "" | ||
| 551 | log_info "Breakdown by reason pattern:" | ||
| 552 | # Extract the main reason type (before the colon details) | ||
| 553 | (grep -v '^#' "$output_file" | awk -F'\t' '{print $3}' | sed 's/:.*//' | sort | uniq -c | sort -rn | head -10 | while read -r cnt reason; do | ||
| 554 | echo " $reason: $cnt" | ||
| 555 | done) || true | ||
| 382 | fi | 556 | fi |
| 557 | |||
| 558 | # Explicit success exit | ||
| 559 | exit 0 | ||
| 383 | } | 560 | } |
| 384 | 561 | ||
| 385 | main "$@" | 562 | main "$@" |
diff --git a/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh index a603a1e..a0c8ad0 100755 --- a/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh +++ b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh | |||
| @@ -356,7 +356,7 @@ main() { | |||
| 356 | parsed=$(parse_log_line "$line") | 356 | parsed=$(parse_log_line "$line") |
| 357 | if [[ -n "$parsed" ]]; then | 357 | if [[ -n "$parsed" ]]; then |
| 358 | echo "$parsed" >> "$output_file" | 358 | echo "$parsed" >> "$output_file" |
| 359 | ((count++)) | 359 | count=$((count + 1)) |
| 360 | fi | 360 | fi |
| 361 | done <<< "$raw_lines" | 361 | done <<< "$raw_lines" |
| 362 | 362 | ||
| @@ -374,9 +374,10 @@ main() { | |||
| 374 | if [[ $count -gt 0 ]]; then | 374 | if [[ $count -gt 0 ]]; then |
| 375 | echo "" | 375 | echo "" |
| 376 | log_info "Sample entries (first 5):" | 376 | log_info "Sample entries (first 5):" |
| 377 | tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub timestamp reason; do | 377 | # Use a subshell to avoid SIGPIPE issues with set -e |
| 378 | (tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub timestamp reason; do | ||
| 378 | echo " repo=$repo npub=${npub:0:20}... timestamp=$timestamp" | 379 | echo " repo=$repo npub=${npub:0:20}... timestamp=$timestamp" |
| 379 | done | 380 | done) || true |
| 380 | fi | 381 | fi |
| 381 | 382 | ||
| 382 | # Show unique repos affected | 383 | # Show unique repos affected |
| @@ -388,9 +389,10 @@ main() { | |||
| 388 | 389 | ||
| 389 | echo "" | 390 | echo "" |
| 390 | log_info "Repositories with purgatory expiry:" | 391 | log_info "Repositories with purgatory expiry:" |
| 391 | tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort | uniq -c | sort -rn | head -10 | while read -r cnt repo; do | 392 | # Use a subshell to avoid SIGPIPE issues with set -e |
| 393 | (tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort | uniq -c | sort -rn | head -10 | while read -r cnt repo; do | ||
| 392 | echo " $repo: $cnt expiry events" | 394 | echo " $repo: $cnt expiry events" |
| 393 | done | 395 | done) || true |
| 394 | 396 | ||
| 395 | local total_repos | 397 | local total_repos |
| 396 | total_repos=$(tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort -u | wc -l) | 398 | total_repos=$(tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort -u | wc -l) |
| @@ -398,6 +400,9 @@ main() { | |||
| 398 | echo " ... and $((total_repos - 10)) more repositories" | 400 | echo " ... and $((total_repos - 10)) more repositories" |
| 399 | fi | 401 | fi |
| 400 | fi | 402 | fi |
| 403 | |||
| 404 | # Explicit success exit | ||
| 405 | exit 0 | ||
| 401 | } | 406 | } |
| 402 | 407 | ||
| 403 | main "$@" | 408 | main "$@" |