upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/docs
diff options
context:
space:
mode:
Diffstat (limited to 'docs')
-rwxr-xr-xdocs/how-to/migration-scripts/30-extract-parse-failures.sh70
1 files changed, 17 insertions, 53 deletions
diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh
index 114a44d..7870c61 100755
--- a/docs/how-to/migration-scripts/30-extract-parse-failures.sh
+++ b/docs/how-to/migration-scripts/30-extract-parse-failures.sh
@@ -39,8 +39,10 @@
39# 2. "Invalid announcement" rejections (write policy): 39# 2. "Invalid announcement" rejections (write policy):
40# Event rejected by write policy event_id=abc123... relay=wss://... kind=30617 reason=Invalid announcement: multiple clone tags found... 40# Event rejected by write policy event_id=abc123... relay=wss://... kind=30617 reason=Invalid announcement: multiple clone tags found...
41# 41#
42# 3. "Rejected repository announcement" (builder): 42# NOTE: Builder logs ("Rejected repository announcement note1xxx:") are NOT extracted
43# Rejected repository announcement note1xxx: Invalid announcement: multiple clone tags found... 43# because they use bech32 (note1) IDs while write policy logs use hex IDs. Extracting
44# both would cause double-counting since deduplication only works within each format.
45# Write policy logs contain the same events, so we don't lose any data.
44# 46#
45# Required fields: kind, event_id, reason 47# Required fields: kind, event_id, reason
46# Optional fields: repo, npub (may not be available for all entry types) 48# Optional fields: repo, npub (may not be available for all entry types)
@@ -135,7 +137,6 @@ usage() {
135 echo "Expected log formats:" 137 echo "Expected log formats:"
136 echo " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." 138 echo " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..."
137 echo " Event rejected by write policy event_id=abc123 ... kind=30617 reason=Invalid announcement: ..." 139 echo " Event rejected by write policy event_id=abc123 ... kind=30617 reason=Invalid announcement: ..."
138 echo " Rejected repository announcement note1xxx: Invalid announcement: ..."
139 exit 1 140 exit 1
140} 141}
141 142
@@ -197,28 +198,11 @@ parse_write_policy_rejection_line() {
197 fi 198 fi
198} 199}
199 200
200# Parse a "Rejected repository announcement" log line from builder 201# NOTE: parse_builder_rejection_line() was removed to fix double-counting bug.
201# Input: log line containing "Rejected repository announcement <note_id>: Invalid announcement:" 202# Builder logs use bech32 (note1) IDs while write policy logs use hex IDs.
202# Output: TSV line: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub 203# Since deduplication only works within each format, extracting both caused
203# Note: The note_id is bech32 encoded, we need to extract it 204# the same event to be counted twice. Write policy logs contain the same
204parse_builder_rejection_line() { 205# events, so we don't lose any data by only extracting from that source.
205 local line="$1"
206
207 local note_id reason
208
209 # Extract note_id (note1...) from "Rejected repository announcement note1xxx:"
210 note_id=$(echo "$line" | grep -oP 'Rejected repository announcement \Knote1[a-z0-9]+' || echo "")
211
212 # Extract reason (everything after the note_id and colon)
213 reason=$(echo "$line" | grep -oP 'Rejected repository announcement note1[a-z0-9]+: \K.*$' || echo "")
214
215 # Only output if we have the required fields
216 # Kind is always 30617 for announcements
217 if [[ -n "$note_id" && -n "$reason" ]]; then
218 # Use note_id as event_id (bech32 format), kind=30617, repo and npub empty
219 printf '%s\t%s\t%s\t\t\n' "$note_id" "30617" "$reason"
220 fi
221}
222 206
223# Main 207# Main
224main() { 208main() {
@@ -361,11 +345,10 @@ main() {
361 log_info "Extracting log entries..." 345 log_info "Extracting log entries..."
362 346
363 # Create temp files for intermediate results 347 # Create temp files for intermediate results
364 local temp_stderr temp_parse_fail temp_write_policy_rejection temp_builder_rejection 348 local temp_stderr temp_parse_fail temp_write_policy_rejection
365 temp_stderr=$(mktemp) 349 temp_stderr=$(mktemp)
366 temp_parse_fail=$(mktemp) 350 temp_parse_fail=$(mktemp)
367 temp_write_policy_rejection=$(mktemp) 351 temp_write_policy_rejection=$(mktemp)
368 temp_builder_rejection=$(mktemp)
369 352
370 # Extract [PARSE_FAIL] entries directly to temp file (streaming) 353 # Extract [PARSE_FAIL] entries directly to temp file (streaming)
371 log_info " Searching for [PARSE_FAIL] entries..." 354 log_info " Searching for [PARSE_FAIL] entries..."
@@ -378,29 +361,24 @@ main() {
378 fi 361 fi
379 362
380 # Extract "Event rejected by write policy" with "Invalid announcement" (streaming) 363 # Extract "Event rejected by write policy" with "Invalid announcement" (streaming)
364 # NOTE: We only extract from write policy logs (hex IDs), not builder logs (note1 IDs)
365 # to avoid double-counting. Both log sources contain the same events.
381 log_info " Searching for write policy rejections..." 366 log_info " Searching for write policy rejections..."
382 eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep 'Invalid announcement' > "$temp_write_policy_rejection" || true 367 eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep 'Invalid announcement' > "$temp_write_policy_rejection" || true
383 368
384 # Extract "Rejected repository announcement" from builder (streaming)
385 log_info " Searching for builder rejections..."
386 eval "$journal_cmd" 2>/dev/null | grep 'Rejected repository announcement' | grep 'Invalid announcement' > "$temp_builder_rejection" || true
387
388 rm -f "$temp_stderr" 369 rm -f "$temp_stderr"
389 370
390 # Check if we found anything 371 # Check if we found anything
391 local parse_fail_line_count write_policy_line_count builder_line_count 372 local parse_fail_line_count write_policy_line_count
392 parse_fail_line_count=$(wc -l < "$temp_parse_fail") 373 parse_fail_line_count=$(wc -l < "$temp_parse_fail")
393 parse_fail_line_count="${parse_fail_line_count//[^0-9]/}" 374 parse_fail_line_count="${parse_fail_line_count//[^0-9]/}"
394 write_policy_line_count=$(wc -l < "$temp_write_policy_rejection") 375 write_policy_line_count=$(wc -l < "$temp_write_policy_rejection")
395 write_policy_line_count="${write_policy_line_count//[^0-9]/}" 376 write_policy_line_count="${write_policy_line_count//[^0-9]/}"
396 builder_line_count=$(wc -l < "$temp_builder_rejection")
397 builder_line_count="${builder_line_count//[^0-9]/}"
398 377
399 log_info " Found $parse_fail_line_count [PARSE_FAIL] log lines" 378 log_info " Found $parse_fail_line_count [PARSE_FAIL] log lines"
400 log_info " Found $write_policy_line_count write policy rejection log lines" 379 log_info " Found $write_policy_line_count write policy rejection log lines"
401 log_info " Found $builder_line_count builder rejection log lines"
402 380
403 local total_invalid_announcement_lines=$((write_policy_line_count + builder_line_count)) 381 local total_invalid_announcement_lines=$write_policy_line_count
404 382
405 if [[ "$parse_fail_line_count" -eq 0 && "$total_invalid_announcement_lines" -eq 0 ]]; then 383 if [[ "$parse_fail_line_count" -eq 0 && "$total_invalid_announcement_lines" -eq 0 ]]; then
406 log_warn "No matching entries found in logs." 384 log_warn "No matching entries found in logs."
@@ -431,7 +409,7 @@ main() {
431 echo "# This is expected if ngit-grasp logging improvements are not yet deployed." 409 echo "# This is expected if ngit-grasp logging improvements are not yet deployed."
432 } > "$output_file" 410 } > "$output_file"
433 411
434 rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_builder_rejection" 412 rm -f "$temp_parse_fail" "$temp_write_policy_rejection"
435 log_info "Created empty output file: $output_file" 413 log_info "Created empty output file: $output_file"
436 exit 0 414 exit 0
437 fi 415 fi
@@ -478,23 +456,9 @@ main() {
478 done < "$temp_write_policy_rejection" 456 done < "$temp_write_policy_rejection"
479 fi 457 fi
480 458
481 # Parse builder rejection entries 459 local invalid_announcement_count=$write_policy_count
482 log_info " Parsing builder rejection entries..."
483 local builder_count=0
484 if [[ "$builder_line_count" -gt 0 ]]; then
485 while IFS= read -r line; do
486 local parsed
487 parsed=$(parse_builder_rejection_line "$line")
488 if [[ -n "$parsed" ]]; then
489 echo "$parsed" >> "$output_file"
490 builder_count=$((builder_count + 1))
491 fi
492 done < "$temp_builder_rejection"
493 fi
494
495 local invalid_announcement_count=$((write_policy_count + builder_count))
496 460
497 rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_builder_rejection" 461 rm -f "$temp_parse_fail" "$temp_write_policy_rejection"
498 462
499 # Deduplicate by event_id (first column) - keep first occurrence 463 # Deduplicate by event_id (first column) - keep first occurrence
500 log_info " Deduplicating entries..." 464 log_info " Deduplicating entries..."