From 92a9a3bfe0bc522e8ae411991a366a3a6310d525 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 3 Feb 2026 14:41:46 +0000 Subject: docs: archive relay.ngit.dev migration materials for reference Move migration guide and scripts to docs/archive/2026-01-relay-ngit-dev-migration/ with clear warnings that these are reference-only materials from a specific migration context, not general-purpose tools. These materials document the relay.ngit.dev migration from ngit-relay to ngit-grasp in January 2026. The scripts were developed iteratively during the migration and are specific to that context. They are preserved for: - Historical reference - Context for production fixes in this branch - Inspiration for future migrations (not direct reuse) The migration uncovered critical bugs now fixed in this branch: - Git protocol error handling - Naughty list false positives - Purgatory event tracking - Sync startup issues - Configuration management --- .../migration-scripts/30-extract-parse-failures.sh | 774 --------------------- 1 file changed, 774 deletions(-) delete mode 100755 docs/how-to/migration-scripts/30-extract-parse-failures.sh (limited to 'docs/how-to/migration-scripts/30-extract-parse-failures.sh') diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh deleted file mode 100755 index d762aae..0000000 --- a/docs/how-to/migration-scripts/30-extract-parse-failures.sh +++ /dev/null @@ -1,774 +0,0 @@ -#!/usr/bin/env bash -# -# 30-extract-parse-failures.sh - Extract parse failure events from systemd logs -# -# PHASE 4a of the GRASP relay to ngit-grasp migration analysis pipeline. -# Extracts structured [PARSE_FAIL] log entries AND "Invalid announcement" -# rejections from journalctl. -# -# USAGE: -# ./30-extract-parse-failures.sh [options] -# -# EXAMPLES: -# # Extract from ngit-grasp service (last 30 days, default) -# ./30-extract-parse-failures.sh ngit-grasp.service output/logs -# -# # Extract with custom time range -# ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-01" -# -# # Extract from specific time window -# ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-15" --until "2026-01-22" -# -# OPTIONS: -# --since Start date for log extraction (default: 30 days ago) -# --until End date for log extraction (default: now) -# --dry-run Show what would be extracted without writing files -# -# ENRICHMENT: -# The script automatically enriches parse failures with repo/npub information -# by extracting from "Added rejected announcement" log entries which include -# pubkey and identifier fields. Hex pubkeys are converted to npub format using -# `nak encode npub ` if the nak tool is available. -# -# OUTPUT: -# /parse-failures.txt -# -# OUTPUT FORMAT (TSV): -# event_idkindreasonreponpub -# -# EXPECTED LOG FORMATS: -# The script looks for three types of log entries: -# -# 1. Structured [PARSE_FAIL] entries: -# 2026-01-22T10:30:45Z ngit-grasp[1234]: [PARSE_FAIL] kind=30618 event_id=abc123... reason="invalid refs format" repo=myrepo npub=npub1... -# -# 2. "Invalid announcement" rejections (write policy): -# Event rejected by write policy event_id=abc123... relay=wss://... kind=30617 reason=Invalid announcement: multiple clone tags found... -# -# 3. "Added rejected announcement" entries (for enrichment): -# Added rejected announcement to two-tier index event_id=abc123... kind=30617 identifier=myrepo pubkey=hex... -# These entries provide pubkey and identifier for enriching write policy rejections. -# -# NOTE: Builder logs ("Rejected repository announcement note1xxx:") are NOT extracted -# because they use bech32 (note1) IDs while write policy logs use hex IDs. Extracting -# both would cause double-counting since deduplication only works within each format. -# Write policy logs contain the same events, so we don't lose any data. -# -# Required fields: kind, event_id, reason -# Enrichment fields: repo (identifier), npub (converted from hex pubkey) -# -# DEPENDENCY: -# This script requires logging improvements in ngit-grasp to emit structured -# [PARSE_FAIL] log entries. Until those are implemented, this script will -# find no matching entries (which is handled gracefully). -# -# "Invalid announcement" rejections are logged by the write policy and -# should be present in any ngit-grasp deployment. -# -# See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section) -# -# Expected Rust logging code for [PARSE_FAIL]: -# tracing::warn!( -# target: "migration", -# "[PARSE_FAIL] kind={} event_id={} reason=\"{}\" repo={} npub={}", -# event.kind, event.id, reason, identifier, npub -# ); -# -# PREREQUISITES: -# - journalctl (systemd) -# - grep, awk, sed (standard Unix tools) -# - Access to systemd journal (may require sudo or journal group membership) -# -# RUNTIME: Depends on log volume, typically < 30 seconds -# -# SEE ALSO: -# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide -# 31-extract-purgatory-expiry.sh - Companion script for purgatory expiry logs -# - -set -euo pipefail - -# Get script directory for sourcing helpers -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -# Source the service validation helper -if [[ -f "$SCRIPT_DIR/validate-service.sh" ]]; then - source "$SCRIPT_DIR/validate-service.sh" -fi - -# Colors for output (disabled if not a terminal) -if [[ -t 1 ]]; then - RED='\033[0;31m' - GREEN='\033[0;32m' - YELLOW='\033[0;33m' - BLUE='\033[0;34m' - NC='\033[0m' -else - RED='' - GREEN='' - YELLOW='' - BLUE='' - NC='' -fi - -log_info() { - echo -e "${BLUE}[INFO]${NC} $*" >&2 -} - -log_success() { - echo -e "${GREEN}[OK]${NC} $*" >&2 -} - -log_warn() { - echo -e "${YELLOW}[WARN]${NC} $*" >&2 -} - -log_error() { - echo -e "${RED}[ERROR]${NC} $*" >&2 -} - -usage() { - echo "Usage: $0 [options]" - echo "" - echo "Arguments:" - echo " service-name Systemd service name (e.g., ngit-grasp.service)" - echo " output-dir Directory to store extracted log data" - echo "" - echo "Options:" - echo " --since Start date (default: 30 days ago)" - echo " --until End date (default: now)" - echo " --dry-run Show what would be extracted without writing" - echo "" - echo "Examples:" - echo " $0 ngit-grasp.service output/logs" - echo " $0 ngit-grasp.service output/logs --since '2026-01-01'" - echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'" - echo "" - echo "Expected log formats:" - echo " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." - echo " Event rejected by write policy event_id=abc123 ... kind=30617 reason=Invalid announcement: ..." - echo "" - echo "Enrichment:" - echo " Parse failures are automatically enriched with repo/npub from" - echo " 'Added rejected announcement' log entries. Hex pubkeys are converted" - echo " to npub format using 'nak encode npub' if available." - exit 1 -} - -# ============================================================================= -# AWK-BASED BATCH PARSING FUNCTIONS -# ============================================================================= -# These functions use awk for efficient batch processing instead of per-line -# grep calls. This provides ~400x speedup for large log files. -# -# NOTE: parse_builder_rejection_line() was removed to fix double-counting bug. -# Builder logs use bech32 (note1) IDs while write policy logs use hex IDs. -# Since deduplication only works within each format, extracting both caused -# the same event to be counted twice. Write policy logs contain the same -# events, so we don't lose any data by only extracting from that source. - -# Parse [PARSE_FAIL] log lines in batch using awk -# Input: file containing log lines with [PARSE_FAIL] -# Output: TSV lines: event_idkindreasonreponpub -parse_parse_fail_batch() { - local input_file="$1" - awk ' - { - # Extract kind=VALUE - kind = "" - if (match($0, /kind=([0-9]+)/, m)) kind = m[1] - - # Extract event_id=VALUE (hex string) - event_id = "" - if (match($0, /event_id=([a-f0-9]+)/, m)) event_id = m[1] - - # Extract reason="VALUE" (quoted string) - reason = "" - if (match($0, /reason="([^"]*)"/, m)) reason = m[1] - - # Extract repo=VALUE (optional) - repo = "" - if (match($0, /repo=([^ ]+)/, m)) repo = m[1] - - # Extract npub=VALUE (optional) - npub = "" - if (match($0, /npub=([^ ]+)/, m)) npub = m[1] - - # Output if we have required fields - if (kind != "" && event_id != "" && reason != "") { - print event_id "\t" kind "\t" reason "\t" repo "\t" npub - } - } - ' "$input_file" -} - -# Parse "Invalid announcement" rejection log lines in batch using awk -# Input: file containing "Event rejected by write policy" log lines -# Output: TSV lines: event_idkindreason -parse_write_policy_rejection_batch() { - local input_file="$1" - awk ' - { - # Extract event_id=VALUE (hex string) - event_id = "" - if (match($0, /event_id=([a-f0-9]+)/, m)) event_id = m[1] - - # Extract kind=VALUE - kind = "" - if (match($0, /kind=([0-9]+)/, m)) kind = m[1] - - # Extract reason=VALUE (everything after "reason=") - reason = "" - if (match($0, /reason=(.*)$/, m)) reason = m[1] - - # Output if we have required fields (repo and npub are empty) - if (kind != "" && event_id != "" && reason != "") { - print event_id "\t" kind "\t" reason "\t\t" - } - } - ' "$input_file" -} - -# Parse "Added rejected announcement" log lines in batch using awk -# Input: file containing "Added rejected announcement to two-tier index" log lines -# Output: TSV lines: event_ididentifierpubkey_hex -parse_rejected_announcement_batch() { - local input_file="$1" - awk ' - { - # Extract event_id=VALUE (hex string) - event_id = "" - if (match($0, /event_id=([a-f0-9]+)/, m)) event_id = m[1] - - # Extract identifier=VALUE (repo name) - identifier = "" - if (match($0, /identifier=([^ ]+)/, m)) identifier = m[1] - - # Extract pubkey=VALUE (hex string) - pubkey = "" - if (match($0, /pubkey=([a-f0-9]+)/, m)) pubkey = m[1] - - # Output if we have all required fields - if (event_id != "" && identifier != "" && pubkey != "") { - print event_id "\t" identifier "\t" pubkey - } - } - ' "$input_file" -} - -# Enrich parse failures with repo/npub by looking up event_id in "Added rejected announcement" log entries -# This is critical because "Invalid announcement" rejections only log event_id and kind, -# not the repo name or npub. Without enrichment, Phase 5 shows event_id|kind instead -# of repo|npub in action-required.txt, making the output unusable. -# -# Arguments: -# $1 - parse failures file to enrich (modified in place) -# $2 - lookup file containing event_id -> identifier|pubkey mappings from logs -# -# The function: -# 1. Uses the lookup table built from "Added rejected announcement" log entries -# 2. For each parse failure with empty repo/npub, looks up the event_id -# 3. Populates repo and npub columns from the lookup -# 4. Converts hex pubkeys to npub format using `nak encode npub` if available -# -# OPTIMIZATION: This function uses batch processing for efficiency: -# - Uses awk for O(n) join instead of per-line grep (O(n*m)) -# - Batches all pubkey->npub conversions in a single nak call -# - This reduces runtime from minutes to seconds for large datasets -enrich_with_repo_npub() { - local parse_failures_file="$1" - local lookup_file="$2" - - # Validate lookup file exists and has content - if [[ ! -f "$lookup_file" ]] || [[ ! -s "$lookup_file" ]]; then - log_warn "No enrichment data available - repo/npub columns will remain empty" - return 0 - fi - - log_info "Enriching parse failures with repo/npub from log entries..." - - # Check if we have nak for pubkey->npub conversion - local can_convert_npub=false - if command -v nak &> /dev/null; then - can_convert_npub=true - log_info " Using 'nak' for pubkey->npub conversion" - else - log_warn " 'nak' not found - will use hex pubkeys instead of npub" - fi - - local lookup_count - lookup_count=$(wc -l < "$lookup_file") - lookup_count="${lookup_count//[^0-9]/}" - log_info " Lookup table has $lookup_count entries" - - # STEP 1: Extract unique pubkeys that need conversion - # Get pubkeys from lookup file (column 3), deduplicate - local unique_pubkeys_file npub_map_file - unique_pubkeys_file=$(mktemp) - npub_map_file=$(mktemp) - - cut -f3 "$lookup_file" | sort -u > "$unique_pubkeys_file" - local unique_pubkey_count - unique_pubkey_count=$(wc -l < "$unique_pubkeys_file") - unique_pubkey_count="${unique_pubkey_count//[^0-9]/}" - log_info " Converting $unique_pubkey_count unique pubkeys to npub format..." - - # STEP 2: Batch convert all pubkeys to npub in a single nak call - # nak reads hex pubkeys from stdin (one per line) and outputs npubs - if [[ "$can_convert_npub" == true && "$unique_pubkey_count" -gt 0 ]]; then - # Create mapping file: pubkey_hexnpub - # nak encode npub reads from stdin and outputs one npub per line - paste "$unique_pubkeys_file" <(nak encode npub < "$unique_pubkeys_file" 2>/dev/null) > "$npub_map_file" || { - # Fallback: if batch conversion fails, use hex pubkeys - log_warn " Batch npub conversion failed, using hex pubkeys" - awk '{print $1 "\t" $1}' "$unique_pubkeys_file" > "$npub_map_file" - } - else - # No nak available, use hex pubkeys as-is - awk '{print $1 "\t" $1}' "$unique_pubkeys_file" > "$npub_map_file" - fi - - rm -f "$unique_pubkeys_file" - - # STEP 3: Use awk for efficient join (O(n) instead of O(n*m) grep per line) - # This joins parse_failures with lookup_file on event_id, then with npub_map on pubkey - local enriched_file - enriched_file=$(mktemp) - - # Copy header lines - grep '^#' "$parse_failures_file" > "$enriched_file" 2>/dev/null || true - - # Use awk to perform the join efficiently - # Input files (order matters for ARGIND): - # 1. npub_map_file: pubkey_hexnpub - # 2. lookup_file: event_ididentifierpubkey_hex - # 3. parse_failures_file: event_idkindreasonreponpub - awk -F'\t' -v OFS='\t' ' - # Track which file we are processing - FNR==1 { file_num++ } - - # First file: npub_map (pubkey_hex -> npub) - file_num==1 { - npub_map[$1] = $2 - next - } - # Second file: lookup (event_id -> identifier, pubkey_hex) - file_num==2 { - lookup_repo[$1] = $2 - lookup_pubkey[$1] = $3 - next - } - # Third file: parse_failures - /^#/ { next } # Skip headers (already copied) - { - event_id = $1 - kind = $2 - reason = $3 - repo = $4 - npub = $5 - - # If repo/npub empty, try to enrich from lookup - if (repo == "" && event_id in lookup_repo) { - repo = lookup_repo[event_id] - } - if (npub == "" && event_id in lookup_pubkey) { - pubkey = lookup_pubkey[event_id] - if (pubkey in npub_map) { - npub = npub_map[pubkey] - } else { - npub = pubkey # Fallback to hex - } - } - - print event_id, kind, reason, repo, npub - } - ' "$npub_map_file" "$lookup_file" "$parse_failures_file" >> "$enriched_file" - - rm -f "$npub_map_file" - - # Count enriched entries - local enriched_count total_count - total_count=$(grep -v '^#' "$parse_failures_file" | wc -l) - total_count="${total_count//[^0-9]/}" - # Count entries that have non-empty repo AND npub after enrichment - enriched_count=$(grep -v '^#' "$enriched_file" | awk -F'\t' '$4 != "" && $5 != ""' | wc -l) - enriched_count="${enriched_count//[^0-9]/}" - - # Replace original with enriched version - mv "$enriched_file" "$parse_failures_file" - - log_info " Enriched $enriched_count of $total_count parse failures with repo/npub" - log_success "Enrichment complete" -} - -# Parse "Added rejected announcement" log entries to build enrichment lookup table -# Input: log line containing "Added rejected announcement to two-tier index" -# Output: TSV line: event_ididentifierpubkey_hex -parse_rejected_announcement_line() { - local line="$1" - - local event_id identifier pubkey_hex - - # Extract event_id=VALUE (hex string) - event_id=$(echo "$line" | grep -oP 'event_id=\K[a-f0-9]+' || echo "") - - # Extract identifier=VALUE (repo name) - identifier=$(echo "$line" | grep -oP 'identifier=\K[^ ]+' || echo "") - - # Extract pubkey=VALUE (hex string) - pubkey_hex=$(echo "$line" | grep -oP 'pubkey=\K[a-f0-9]+' || echo "") - - # Only output if we have all required fields - if [[ -n "$event_id" && -n "$identifier" && -n "$pubkey_hex" ]]; then - printf '%s\t%s\t%s\n' "$event_id" "$identifier" "$pubkey_hex" - fi -} - -# Main -main() { - if [[ $# -lt 2 ]]; then - usage - fi - - local service="$1" - local output_dir="$2" - shift 2 - - # Default time range: last 30 days - local since_date - since_date=$(date -d "30 days ago" "+%Y-%m-%d" 2>/dev/null || date -v-30d "+%Y-%m-%d" 2>/dev/null || echo "") - local until_date="" - local dry_run=false - - # Parse options - while [[ $# -gt 0 ]]; do - case "$1" in - --since) - since_date="$2" - shift 2 - ;; - --until) - until_date="$2" - shift 2 - ;; - --dry-run) - dry_run=true - shift - ;; - *) - log_error "Unknown option: $1" - usage - ;; - esac - done - - # Validate service name format - if [[ ! "$service" =~ \.service$ ]]; then - service="${service}.service" - fi - - # Validate service is appropriate for structured logging - # This prevents the common mistake of using ngit-relay instead of ngit-grasp - if type validate_service_for_structured_logging &>/dev/null; then - # Use non-interactive mode if not a terminal, skip log check (we'll do our own) - local interactive="true" - [[ ! -t 0 ]] && interactive="false" - - if ! validate_service_for_structured_logging "$service" "false" "$interactive"; then - log_error "Service validation failed. Use an ngit-grasp service for structured logging." - exit 1 - fi - else - # Fallback validation if helper not available - if [[ "$service" == *"ngit-relay"* ]]; then - log_error "Service name appears to be ngit-relay: $service" - log_error "Structured logging ([PARSE_FAIL]) only exists in ngit-grasp services." - log_error "Please use the ngit-grasp archive service instead." - log_error "" - log_error "To find the correct service:" - log_error " systemctl list-units 'ngit-grasp*' --all" - exit 1 - fi - fi - - log_info "Extracting parse failures from systemd logs" - log_info "Service: $service" - log_info "Output: $output_dir" - log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" - - # Check if journalctl is available - if ! command -v journalctl &> /dev/null; then - log_error "journalctl not found. This script requires systemd." - exit 1 - fi - - # Validate service exists (check if journalctl can find any logs for it) - # Note: We don't require the service to be running, just that it has logs - if ! journalctl --no-pager -u "$service" -n 1 &>/dev/null; then - log_warn "Could not query logs for service: $service" - log_warn "This may indicate the service doesn't exist or you lack permissions." - log_warn "" - log_warn "To list available ngit-grasp services:" - log_warn " systemctl list-units 'ngit-grasp*' --all" - log_warn " journalctl --list-boots # Check if you have journal access" - log_warn "" - # Continue anyway - the service might exist but have no logs yet - fi - - # Build journalctl command - local journal_cmd="journalctl -u $service --no-pager -o short-iso" - - if [[ -n "$since_date" ]]; then - journal_cmd="$journal_cmd --since '$since_date'" - fi - - if [[ -n "$until_date" ]]; then - journal_cmd="$journal_cmd --until '$until_date'" - fi - - log_info "Running: $journal_cmd | grep '[PARSE_FAIL]' or 'Invalid announcement'" - - if [[ "$dry_run" == true ]]; then - log_info "[DRY RUN] Would extract to: $output_dir/parse-failures.txt" - - # Show sample of what would be extracted - log_info "Checking for matching log entries..." - local parse_fail_count invalid_announcement_count - parse_fail_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0") - parse_fail_count="${parse_fail_count//[^0-9]/}" # Strip non-numeric characters - parse_fail_count="${parse_fail_count:-0}" - - invalid_announcement_count=$(eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep -c 'Invalid announcement' || echo "0") - invalid_announcement_count="${invalid_announcement_count//[^0-9]/}" - invalid_announcement_count="${invalid_announcement_count:-0}" - - log_info "Found $parse_fail_count [PARSE_FAIL] entries" - log_info "Found $invalid_announcement_count 'Invalid announcement' rejections" - - if [[ "$parse_fail_count" -eq 0 && "$invalid_announcement_count" -eq 0 ]]; then - log_warn "No matching entries found in logs." - log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." - log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" - fi - - exit 0 - fi - - # Create output directory - mkdir -p "$output_dir" - - local output_file="$output_dir/parse-failures.txt" - local temp_file - temp_file=$(mktemp) - - # Extract and parse log entries using streaming (avoids loading all logs into memory) - log_info "Extracting log entries..." - - # Create temp files for intermediate results - local temp_stderr temp_parse_fail temp_write_policy_rejection temp_rejected_announcement - temp_stderr=$(mktemp) - temp_parse_fail=$(mktemp) - temp_write_policy_rejection=$(mktemp) - temp_rejected_announcement=$(mktemp) - - # Extract [PARSE_FAIL] entries directly to temp file (streaming) - log_info " Searching for [PARSE_FAIL] entries..." - eval "$journal_cmd" 2>"$temp_stderr" | grep '\[PARSE_FAIL\]' > "$temp_parse_fail" || true - - local journal_stderr - journal_stderr=$(cat "$temp_stderr" 2>/dev/null || true) - if [[ -n "$journal_stderr" ]]; then - log_warn "journalctl reported: $journal_stderr" - fi - - # Extract "Event rejected by write policy" with "Invalid announcement" (streaming) - # NOTE: We only extract from write policy logs (hex IDs), not builder logs (note1 IDs) - # to avoid double-counting. Both log sources contain the same events. - log_info " Searching for write policy rejections..." - eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep 'Invalid announcement' > "$temp_write_policy_rejection" || true - - # Extract "Added rejected announcement" entries for enrichment (streaming) - # These contain pubkey and identifier which we use to enrich write policy rejections - log_info " Searching for rejected announcement entries (for enrichment)..." - eval "$journal_cmd" 2>/dev/null | grep 'Added rejected announcement to two-tier index' > "$temp_rejected_announcement" || true - - rm -f "$temp_stderr" - - # Check if we found anything - local parse_fail_line_count write_policy_line_count rejected_announcement_line_count - parse_fail_line_count=$(wc -l < "$temp_parse_fail") - parse_fail_line_count="${parse_fail_line_count//[^0-9]/}" - write_policy_line_count=$(wc -l < "$temp_write_policy_rejection") - write_policy_line_count="${write_policy_line_count//[^0-9]/}" - rejected_announcement_line_count=$(wc -l < "$temp_rejected_announcement") - rejected_announcement_line_count="${rejected_announcement_line_count//[^0-9]/}" - - log_info " Found $parse_fail_line_count [PARSE_FAIL] log lines" - log_info " Found $write_policy_line_count write policy rejection log lines" - log_info " Found $rejected_announcement_line_count rejected announcement log lines (for enrichment)" - - local total_invalid_announcement_lines=$write_policy_line_count - - if [[ "$parse_fail_line_count" -eq 0 && "$total_invalid_announcement_lines" -eq 0 ]]; then - log_warn "No matching entries found in logs." - log_warn "" - log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." - log_warn "The script looks for:" - log_warn "" - log_warn " 1. [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." - log_warn " 2. Event rejected by write policy event_id=... kind=30617 reason=Invalid announcement: ..." - log_warn "" - log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" - log_warn "" - - # Create empty output file with header comment - { - echo "# Parse failures and invalid announcements extracted from $service" - echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" - echo "# Extracted: $(date -Iseconds)" - echo "#" - echo "# Includes:" - echo "# - [PARSE_FAIL] structured log entries" - echo "# - \"Invalid announcement\" rejections" - echo "#" - echo "# Format: event_idkindreasonreponpub" - echo "# Note: repo and npub may be empty for some entries" - echo "#" - echo "# NOTE: No matching entries found." - echo "# This is expected if ngit-grasp logging improvements are not yet deployed." - } > "$output_file" - - rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_rejected_announcement" - log_info "Created empty output file: $output_file" - exit 0 - fi - - # Write header - { - echo "# Parse failures and invalid announcements extracted from $service" - echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" - echo "# Extracted: $(date -Iseconds)" - echo "#" - echo "# Includes:" - echo "# - [PARSE_FAIL] structured log entries" - echo "# - \"Invalid announcement\" rejections" - echo "#" - echo "# Format: event_idkindreasonreponpub" - echo "# Note: repo and npub may be empty for some entries" - } > "$output_file" - - # Parse [PARSE_FAIL] entries using batch awk processing - log_info " Parsing [PARSE_FAIL] entries..." - local parse_fail_count=0 - if [[ "$parse_fail_line_count" -gt 0 ]]; then - parse_parse_fail_batch "$temp_parse_fail" >> "$output_file" - parse_fail_count=$(grep -v '^#' "$output_file" | wc -l) - parse_fail_count="${parse_fail_count//[^0-9]/}" - fi - - # Parse write policy rejection entries using batch awk processing - log_info " Parsing write policy rejection entries..." - local write_policy_count=0 - if [[ "$write_policy_line_count" -gt 0 ]]; then - local before_count - before_count=$(grep -v '^#' "$output_file" 2>/dev/null | wc -l || echo "0") - before_count="${before_count//[^0-9]/}" - before_count="${before_count:-0}" - parse_write_policy_rejection_batch "$temp_write_policy_rejection" >> "$output_file" - local after_count - after_count=$(grep -v '^#' "$output_file" 2>/dev/null | wc -l || echo "0") - after_count="${after_count//[^0-9]/}" - after_count="${after_count:-0}" - write_policy_count=$((after_count - before_count)) - fi - - local invalid_announcement_count=$write_policy_count - - # Build enrichment lookup table from "Added rejected announcement" entries - local enrichment_lookup_file - enrichment_lookup_file=$(mktemp) - - log_info " Building enrichment lookup table..." - if [[ "$rejected_announcement_line_count" -gt 0 ]]; then - parse_rejected_announcement_batch "$temp_rejected_announcement" > "$enrichment_lookup_file" - fi - - rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_rejected_announcement" - - # Deduplicate by event_id (first column) - keep first occurrence - log_info " Deduplicating entries..." - local deduped_file - deduped_file=$(mktemp) - # Preserve header lines (starting with #) and deduplicate data lines - grep '^#' "$output_file" > "$deduped_file" - grep -v '^#' "$output_file" | sort -t$'\t' -k1,1 -u >> "$deduped_file" - mv "$deduped_file" "$output_file" - - # Deduplicate enrichment lookup table by event_id - if [[ -s "$enrichment_lookup_file" ]]; then - sort -t$'\t' -k1,1 -u "$enrichment_lookup_file" > "$enrichment_lookup_file.deduped" - mv "$enrichment_lookup_file.deduped" "$enrichment_lookup_file" - fi - - # Enrich with repo/npub from "Added rejected announcement" log entries - # This is critical for usability - without it, action-required.txt shows - # event_id|kind instead of repo|npub, making parse failures unidentifiable - enrich_with_repo_npub "$output_file" "$enrichment_lookup_file" - - rm -f "$enrichment_lookup_file" - - # Count final entries (excluding header lines) - local count - count=$(grep -v '^#' "$output_file" | wc -l) - count="${count//[^0-9]/}" # Strip whitespace - count="${count:-0}" - - rm -f "$temp_file" - - # Summary - echo "" - log_info "=== Extraction Summary ===" - log_info "Service: $service" - log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" - log_success "Extracted $count total entries" - log_info " - [PARSE_FAIL] entries: $parse_fail_count" - log_info " - Invalid announcement rejections: $invalid_announcement_count" - echo "" - log_info "Output file: $output_file" - - if [[ $count -gt 0 ]]; then - echo "" - log_info "Sample entries (first 5):" - # Use a subshell to avoid SIGPIPE issues with set -e - # New format: event_idkindreasonreponpub - (grep -v '^#' "$output_file" | head -5 | while IFS=$'\t' read -r event_id kind reason repo npub; do - echo " kind=$kind event_id=${event_id:0:16}... reason=\"${reason:0:60}...\"" - done) || true - fi - - # Breakdown by kind - if [[ $count -gt 0 ]]; then - echo "" - log_info "Breakdown by event kind:" - # Use a subshell to avoid SIGPIPE issues with set -e - # kind is now column 2 - (grep -v '^#' "$output_file" | awk -F'\t' '{print $2}' | sort | uniq -c | sort -rn | while read -r cnt kind; do - echo " kind $kind: $cnt failures" - done) || true - fi - - # Breakdown by reason pattern (for invalid announcements) - if [[ $invalid_announcement_count -gt 0 ]]; then - echo "" - log_info "Breakdown by reason pattern:" - # Extract the main reason type (before the colon details) - (grep -v '^#' "$output_file" | awk -F'\t' '{print $3}' | sed 's/:.*//' | sort | uniq -c | sort -rn | head -10 | while read -r cnt reason; do - echo " $reason: $cnt" - done) || true - fi - - # Explicit success exit - exit 0 -} - -main "$@" -- cgit v1.2.3