From 92a9a3bfe0bc522e8ae411991a366a3a6310d525 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 3 Feb 2026 14:41:46 +0000 Subject: docs: archive relay.ngit.dev migration materials for reference Move migration guide and scripts to docs/archive/2026-01-relay-ngit-dev-migration/ with clear warnings that these are reference-only materials from a specific migration context, not general-purpose tools. These materials document the relay.ngit.dev migration from ngit-relay to ngit-grasp in January 2026. The scripts were developed iteratively during the migration and are specific to that context. They are preserved for: - Historical reference - Context for production fixes in this branch - Inspiration for future migrations (not direct reuse) The migration uncovered critical bugs now fixed in this branch: - Git protocol error handling - Naughty list false positives - Purgatory event tracking - Sync startup issues - Configuration management --- .../scripts/30-extract-parse-failures.sh | 774 +++++++++++++++++++++ 1 file changed, 774 insertions(+) create mode 100755 docs/archive/2026-01-relay-ngit-dev-migration/scripts/30-extract-parse-failures.sh (limited to 'docs/archive/2026-01-relay-ngit-dev-migration/scripts/30-extract-parse-failures.sh') diff --git a/docs/archive/2026-01-relay-ngit-dev-migration/scripts/30-extract-parse-failures.sh b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/30-extract-parse-failures.sh new file mode 100755 index 0000000..d762aae --- /dev/null +++ b/docs/archive/2026-01-relay-ngit-dev-migration/scripts/30-extract-parse-failures.sh @@ -0,0 +1,774 @@ +#!/usr/bin/env bash +# +# 30-extract-parse-failures.sh - Extract parse failure events from systemd logs +# +# PHASE 4a of the GRASP relay to ngit-grasp migration analysis pipeline. +# Extracts structured [PARSE_FAIL] log entries AND "Invalid announcement" +# rejections from journalctl. +# +# USAGE: +# ./30-extract-parse-failures.sh [options] +# +# EXAMPLES: +# # Extract from ngit-grasp service (last 30 days, default) +# ./30-extract-parse-failures.sh ngit-grasp.service output/logs +# +# # Extract with custom time range +# ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-01" +# +# # Extract from specific time window +# ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-15" --until "2026-01-22" +# +# OPTIONS: +# --since Start date for log extraction (default: 30 days ago) +# --until End date for log extraction (default: now) +# --dry-run Show what would be extracted without writing files +# +# ENRICHMENT: +# The script automatically enriches parse failures with repo/npub information +# by extracting from "Added rejected announcement" log entries which include +# pubkey and identifier fields. Hex pubkeys are converted to npub format using +# `nak encode npub ` if the nak tool is available. +# +# OUTPUT: +# /parse-failures.txt +# +# OUTPUT FORMAT (TSV): +# event_idkindreasonreponpub +# +# EXPECTED LOG FORMATS: +# The script looks for three types of log entries: +# +# 1. Structured [PARSE_FAIL] entries: +# 2026-01-22T10:30:45Z ngit-grasp[1234]: [PARSE_FAIL] kind=30618 event_id=abc123... reason="invalid refs format" repo=myrepo npub=npub1... +# +# 2. "Invalid announcement" rejections (write policy): +# Event rejected by write policy event_id=abc123... relay=wss://... kind=30617 reason=Invalid announcement: multiple clone tags found... +# +# 3. "Added rejected announcement" entries (for enrichment): +# Added rejected announcement to two-tier index event_id=abc123... kind=30617 identifier=myrepo pubkey=hex... +# These entries provide pubkey and identifier for enriching write policy rejections. +# +# NOTE: Builder logs ("Rejected repository announcement note1xxx:") are NOT extracted +# because they use bech32 (note1) IDs while write policy logs use hex IDs. Extracting +# both would cause double-counting since deduplication only works within each format. +# Write policy logs contain the same events, so we don't lose any data. +# +# Required fields: kind, event_id, reason +# Enrichment fields: repo (identifier), npub (converted from hex pubkey) +# +# DEPENDENCY: +# This script requires logging improvements in ngit-grasp to emit structured +# [PARSE_FAIL] log entries. Until those are implemented, this script will +# find no matching entries (which is handled gracefully). +# +# "Invalid announcement" rejections are logged by the write policy and +# should be present in any ngit-grasp deployment. +# +# See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section) +# +# Expected Rust logging code for [PARSE_FAIL]: +# tracing::warn!( +# target: "migration", +# "[PARSE_FAIL] kind={} event_id={} reason=\"{}\" repo={} npub={}", +# event.kind, event.id, reason, identifier, npub +# ); +# +# PREREQUISITES: +# - journalctl (systemd) +# - grep, awk, sed (standard Unix tools) +# - Access to systemd journal (may require sudo or journal group membership) +# +# RUNTIME: Depends on log volume, typically < 30 seconds +# +# SEE ALSO: +# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide +# 31-extract-purgatory-expiry.sh - Companion script for purgatory expiry logs +# + +set -euo pipefail + +# Get script directory for sourcing helpers +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Source the service validation helper +if [[ -f "$SCRIPT_DIR/validate-service.sh" ]]; then + source "$SCRIPT_DIR/validate-service.sh" +fi + +# Colors for output (disabled if not a terminal) +if [[ -t 1 ]]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[0;33m' + BLUE='\033[0;34m' + NC='\033[0m' +else + RED='' + GREEN='' + YELLOW='' + BLUE='' + NC='' +fi + +log_info() { + echo -e "${BLUE}[INFO]${NC} $*" >&2 +} + +log_success() { + echo -e "${GREEN}[OK]${NC} $*" >&2 +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $*" >&2 +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +usage() { + echo "Usage: $0 [options]" + echo "" + echo "Arguments:" + echo " service-name Systemd service name (e.g., ngit-grasp.service)" + echo " output-dir Directory to store extracted log data" + echo "" + echo "Options:" + echo " --since Start date (default: 30 days ago)" + echo " --until End date (default: now)" + echo " --dry-run Show what would be extracted without writing" + echo "" + echo "Examples:" + echo " $0 ngit-grasp.service output/logs" + echo " $0 ngit-grasp.service output/logs --since '2026-01-01'" + echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'" + echo "" + echo "Expected log formats:" + echo " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." + echo " Event rejected by write policy event_id=abc123 ... kind=30617 reason=Invalid announcement: ..." + echo "" + echo "Enrichment:" + echo " Parse failures are automatically enriched with repo/npub from" + echo " 'Added rejected announcement' log entries. Hex pubkeys are converted" + echo " to npub format using 'nak encode npub' if available." + exit 1 +} + +# ============================================================================= +# AWK-BASED BATCH PARSING FUNCTIONS +# ============================================================================= +# These functions use awk for efficient batch processing instead of per-line +# grep calls. This provides ~400x speedup for large log files. +# +# NOTE: parse_builder_rejection_line() was removed to fix double-counting bug. +# Builder logs use bech32 (note1) IDs while write policy logs use hex IDs. +# Since deduplication only works within each format, extracting both caused +# the same event to be counted twice. Write policy logs contain the same +# events, so we don't lose any data by only extracting from that source. + +# Parse [PARSE_FAIL] log lines in batch using awk +# Input: file containing log lines with [PARSE_FAIL] +# Output: TSV lines: event_idkindreasonreponpub +parse_parse_fail_batch() { + local input_file="$1" + awk ' + { + # Extract kind=VALUE + kind = "" + if (match($0, /kind=([0-9]+)/, m)) kind = m[1] + + # Extract event_id=VALUE (hex string) + event_id = "" + if (match($0, /event_id=([a-f0-9]+)/, m)) event_id = m[1] + + # Extract reason="VALUE" (quoted string) + reason = "" + if (match($0, /reason="([^"]*)"/, m)) reason = m[1] + + # Extract repo=VALUE (optional) + repo = "" + if (match($0, /repo=([^ ]+)/, m)) repo = m[1] + + # Extract npub=VALUE (optional) + npub = "" + if (match($0, /npub=([^ ]+)/, m)) npub = m[1] + + # Output if we have required fields + if (kind != "" && event_id != "" && reason != "") { + print event_id "\t" kind "\t" reason "\t" repo "\t" npub + } + } + ' "$input_file" +} + +# Parse "Invalid announcement" rejection log lines in batch using awk +# Input: file containing "Event rejected by write policy" log lines +# Output: TSV lines: event_idkindreason +parse_write_policy_rejection_batch() { + local input_file="$1" + awk ' + { + # Extract event_id=VALUE (hex string) + event_id = "" + if (match($0, /event_id=([a-f0-9]+)/, m)) event_id = m[1] + + # Extract kind=VALUE + kind = "" + if (match($0, /kind=([0-9]+)/, m)) kind = m[1] + + # Extract reason=VALUE (everything after "reason=") + reason = "" + if (match($0, /reason=(.*)$/, m)) reason = m[1] + + # Output if we have required fields (repo and npub are empty) + if (kind != "" && event_id != "" && reason != "") { + print event_id "\t" kind "\t" reason "\t\t" + } + } + ' "$input_file" +} + +# Parse "Added rejected announcement" log lines in batch using awk +# Input: file containing "Added rejected announcement to two-tier index" log lines +# Output: TSV lines: event_ididentifierpubkey_hex +parse_rejected_announcement_batch() { + local input_file="$1" + awk ' + { + # Extract event_id=VALUE (hex string) + event_id = "" + if (match($0, /event_id=([a-f0-9]+)/, m)) event_id = m[1] + + # Extract identifier=VALUE (repo name) + identifier = "" + if (match($0, /identifier=([^ ]+)/, m)) identifier = m[1] + + # Extract pubkey=VALUE (hex string) + pubkey = "" + if (match($0, /pubkey=([a-f0-9]+)/, m)) pubkey = m[1] + + # Output if we have all required fields + if (event_id != "" && identifier != "" && pubkey != "") { + print event_id "\t" identifier "\t" pubkey + } + } + ' "$input_file" +} + +# Enrich parse failures with repo/npub by looking up event_id in "Added rejected announcement" log entries +# This is critical because "Invalid announcement" rejections only log event_id and kind, +# not the repo name or npub. Without enrichment, Phase 5 shows event_id|kind instead +# of repo|npub in action-required.txt, making the output unusable. +# +# Arguments: +# $1 - parse failures file to enrich (modified in place) +# $2 - lookup file containing event_id -> identifier|pubkey mappings from logs +# +# The function: +# 1. Uses the lookup table built from "Added rejected announcement" log entries +# 2. For each parse failure with empty repo/npub, looks up the event_id +# 3. Populates repo and npub columns from the lookup +# 4. Converts hex pubkeys to npub format using `nak encode npub` if available +# +# OPTIMIZATION: This function uses batch processing for efficiency: +# - Uses awk for O(n) join instead of per-line grep (O(n*m)) +# - Batches all pubkey->npub conversions in a single nak call +# - This reduces runtime from minutes to seconds for large datasets +enrich_with_repo_npub() { + local parse_failures_file="$1" + local lookup_file="$2" + + # Validate lookup file exists and has content + if [[ ! -f "$lookup_file" ]] || [[ ! -s "$lookup_file" ]]; then + log_warn "No enrichment data available - repo/npub columns will remain empty" + return 0 + fi + + log_info "Enriching parse failures with repo/npub from log entries..." + + # Check if we have nak for pubkey->npub conversion + local can_convert_npub=false + if command -v nak &> /dev/null; then + can_convert_npub=true + log_info " Using 'nak' for pubkey->npub conversion" + else + log_warn " 'nak' not found - will use hex pubkeys instead of npub" + fi + + local lookup_count + lookup_count=$(wc -l < "$lookup_file") + lookup_count="${lookup_count//[^0-9]/}" + log_info " Lookup table has $lookup_count entries" + + # STEP 1: Extract unique pubkeys that need conversion + # Get pubkeys from lookup file (column 3), deduplicate + local unique_pubkeys_file npub_map_file + unique_pubkeys_file=$(mktemp) + npub_map_file=$(mktemp) + + cut -f3 "$lookup_file" | sort -u > "$unique_pubkeys_file" + local unique_pubkey_count + unique_pubkey_count=$(wc -l < "$unique_pubkeys_file") + unique_pubkey_count="${unique_pubkey_count//[^0-9]/}" + log_info " Converting $unique_pubkey_count unique pubkeys to npub format..." + + # STEP 2: Batch convert all pubkeys to npub in a single nak call + # nak reads hex pubkeys from stdin (one per line) and outputs npubs + if [[ "$can_convert_npub" == true && "$unique_pubkey_count" -gt 0 ]]; then + # Create mapping file: pubkey_hexnpub + # nak encode npub reads from stdin and outputs one npub per line + paste "$unique_pubkeys_file" <(nak encode npub < "$unique_pubkeys_file" 2>/dev/null) > "$npub_map_file" || { + # Fallback: if batch conversion fails, use hex pubkeys + log_warn " Batch npub conversion failed, using hex pubkeys" + awk '{print $1 "\t" $1}' "$unique_pubkeys_file" > "$npub_map_file" + } + else + # No nak available, use hex pubkeys as-is + awk '{print $1 "\t" $1}' "$unique_pubkeys_file" > "$npub_map_file" + fi + + rm -f "$unique_pubkeys_file" + + # STEP 3: Use awk for efficient join (O(n) instead of O(n*m) grep per line) + # This joins parse_failures with lookup_file on event_id, then with npub_map on pubkey + local enriched_file + enriched_file=$(mktemp) + + # Copy header lines + grep '^#' "$parse_failures_file" > "$enriched_file" 2>/dev/null || true + + # Use awk to perform the join efficiently + # Input files (order matters for ARGIND): + # 1. npub_map_file: pubkey_hexnpub + # 2. lookup_file: event_ididentifierpubkey_hex + # 3. parse_failures_file: event_idkindreasonreponpub + awk -F'\t' -v OFS='\t' ' + # Track which file we are processing + FNR==1 { file_num++ } + + # First file: npub_map (pubkey_hex -> npub) + file_num==1 { + npub_map[$1] = $2 + next + } + # Second file: lookup (event_id -> identifier, pubkey_hex) + file_num==2 { + lookup_repo[$1] = $2 + lookup_pubkey[$1] = $3 + next + } + # Third file: parse_failures + /^#/ { next } # Skip headers (already copied) + { + event_id = $1 + kind = $2 + reason = $3 + repo = $4 + npub = $5 + + # If repo/npub empty, try to enrich from lookup + if (repo == "" && event_id in lookup_repo) { + repo = lookup_repo[event_id] + } + if (npub == "" && event_id in lookup_pubkey) { + pubkey = lookup_pubkey[event_id] + if (pubkey in npub_map) { + npub = npub_map[pubkey] + } else { + npub = pubkey # Fallback to hex + } + } + + print event_id, kind, reason, repo, npub + } + ' "$npub_map_file" "$lookup_file" "$parse_failures_file" >> "$enriched_file" + + rm -f "$npub_map_file" + + # Count enriched entries + local enriched_count total_count + total_count=$(grep -v '^#' "$parse_failures_file" | wc -l) + total_count="${total_count//[^0-9]/}" + # Count entries that have non-empty repo AND npub after enrichment + enriched_count=$(grep -v '^#' "$enriched_file" | awk -F'\t' '$4 != "" && $5 != ""' | wc -l) + enriched_count="${enriched_count//[^0-9]/}" + + # Replace original with enriched version + mv "$enriched_file" "$parse_failures_file" + + log_info " Enriched $enriched_count of $total_count parse failures with repo/npub" + log_success "Enrichment complete" +} + +# Parse "Added rejected announcement" log entries to build enrichment lookup table +# Input: log line containing "Added rejected announcement to two-tier index" +# Output: TSV line: event_ididentifierpubkey_hex +parse_rejected_announcement_line() { + local line="$1" + + local event_id identifier pubkey_hex + + # Extract event_id=VALUE (hex string) + event_id=$(echo "$line" | grep -oP 'event_id=\K[a-f0-9]+' || echo "") + + # Extract identifier=VALUE (repo name) + identifier=$(echo "$line" | grep -oP 'identifier=\K[^ ]+' || echo "") + + # Extract pubkey=VALUE (hex string) + pubkey_hex=$(echo "$line" | grep -oP 'pubkey=\K[a-f0-9]+' || echo "") + + # Only output if we have all required fields + if [[ -n "$event_id" && -n "$identifier" && -n "$pubkey_hex" ]]; then + printf '%s\t%s\t%s\n' "$event_id" "$identifier" "$pubkey_hex" + fi +} + +# Main +main() { + if [[ $# -lt 2 ]]; then + usage + fi + + local service="$1" + local output_dir="$2" + shift 2 + + # Default time range: last 30 days + local since_date + since_date=$(date -d "30 days ago" "+%Y-%m-%d" 2>/dev/null || date -v-30d "+%Y-%m-%d" 2>/dev/null || echo "") + local until_date="" + local dry_run=false + + # Parse options + while [[ $# -gt 0 ]]; do + case "$1" in + --since) + since_date="$2" + shift 2 + ;; + --until) + until_date="$2" + shift 2 + ;; + --dry-run) + dry_run=true + shift + ;; + *) + log_error "Unknown option: $1" + usage + ;; + esac + done + + # Validate service name format + if [[ ! "$service" =~ \.service$ ]]; then + service="${service}.service" + fi + + # Validate service is appropriate for structured logging + # This prevents the common mistake of using ngit-relay instead of ngit-grasp + if type validate_service_for_structured_logging &>/dev/null; then + # Use non-interactive mode if not a terminal, skip log check (we'll do our own) + local interactive="true" + [[ ! -t 0 ]] && interactive="false" + + if ! validate_service_for_structured_logging "$service" "false" "$interactive"; then + log_error "Service validation failed. Use an ngit-grasp service for structured logging." + exit 1 + fi + else + # Fallback validation if helper not available + if [[ "$service" == *"ngit-relay"* ]]; then + log_error "Service name appears to be ngit-relay: $service" + log_error "Structured logging ([PARSE_FAIL]) only exists in ngit-grasp services." + log_error "Please use the ngit-grasp archive service instead." + log_error "" + log_error "To find the correct service:" + log_error " systemctl list-units 'ngit-grasp*' --all" + exit 1 + fi + fi + + log_info "Extracting parse failures from systemd logs" + log_info "Service: $service" + log_info "Output: $output_dir" + log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" + + # Check if journalctl is available + if ! command -v journalctl &> /dev/null; then + log_error "journalctl not found. This script requires systemd." + exit 1 + fi + + # Validate service exists (check if journalctl can find any logs for it) + # Note: We don't require the service to be running, just that it has logs + if ! journalctl --no-pager -u "$service" -n 1 &>/dev/null; then + log_warn "Could not query logs for service: $service" + log_warn "This may indicate the service doesn't exist or you lack permissions." + log_warn "" + log_warn "To list available ngit-grasp services:" + log_warn " systemctl list-units 'ngit-grasp*' --all" + log_warn " journalctl --list-boots # Check if you have journal access" + log_warn "" + # Continue anyway - the service might exist but have no logs yet + fi + + # Build journalctl command + local journal_cmd="journalctl -u $service --no-pager -o short-iso" + + if [[ -n "$since_date" ]]; then + journal_cmd="$journal_cmd --since '$since_date'" + fi + + if [[ -n "$until_date" ]]; then + journal_cmd="$journal_cmd --until '$until_date'" + fi + + log_info "Running: $journal_cmd | grep '[PARSE_FAIL]' or 'Invalid announcement'" + + if [[ "$dry_run" == true ]]; then + log_info "[DRY RUN] Would extract to: $output_dir/parse-failures.txt" + + # Show sample of what would be extracted + log_info "Checking for matching log entries..." + local parse_fail_count invalid_announcement_count + parse_fail_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0") + parse_fail_count="${parse_fail_count//[^0-9]/}" # Strip non-numeric characters + parse_fail_count="${parse_fail_count:-0}" + + invalid_announcement_count=$(eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep -c 'Invalid announcement' || echo "0") + invalid_announcement_count="${invalid_announcement_count//[^0-9]/}" + invalid_announcement_count="${invalid_announcement_count:-0}" + + log_info "Found $parse_fail_count [PARSE_FAIL] entries" + log_info "Found $invalid_announcement_count 'Invalid announcement' rejections" + + if [[ "$parse_fail_count" -eq 0 && "$invalid_announcement_count" -eq 0 ]]; then + log_warn "No matching entries found in logs." + log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." + log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" + fi + + exit 0 + fi + + # Create output directory + mkdir -p "$output_dir" + + local output_file="$output_dir/parse-failures.txt" + local temp_file + temp_file=$(mktemp) + + # Extract and parse log entries using streaming (avoids loading all logs into memory) + log_info "Extracting log entries..." + + # Create temp files for intermediate results + local temp_stderr temp_parse_fail temp_write_policy_rejection temp_rejected_announcement + temp_stderr=$(mktemp) + temp_parse_fail=$(mktemp) + temp_write_policy_rejection=$(mktemp) + temp_rejected_announcement=$(mktemp) + + # Extract [PARSE_FAIL] entries directly to temp file (streaming) + log_info " Searching for [PARSE_FAIL] entries..." + eval "$journal_cmd" 2>"$temp_stderr" | grep '\[PARSE_FAIL\]' > "$temp_parse_fail" || true + + local journal_stderr + journal_stderr=$(cat "$temp_stderr" 2>/dev/null || true) + if [[ -n "$journal_stderr" ]]; then + log_warn "journalctl reported: $journal_stderr" + fi + + # Extract "Event rejected by write policy" with "Invalid announcement" (streaming) + # NOTE: We only extract from write policy logs (hex IDs), not builder logs (note1 IDs) + # to avoid double-counting. Both log sources contain the same events. + log_info " Searching for write policy rejections..." + eval "$journal_cmd" 2>/dev/null | grep 'Event rejected by write policy' | grep 'Invalid announcement' > "$temp_write_policy_rejection" || true + + # Extract "Added rejected announcement" entries for enrichment (streaming) + # These contain pubkey and identifier which we use to enrich write policy rejections + log_info " Searching for rejected announcement entries (for enrichment)..." + eval "$journal_cmd" 2>/dev/null | grep 'Added rejected announcement to two-tier index' > "$temp_rejected_announcement" || true + + rm -f "$temp_stderr" + + # Check if we found anything + local parse_fail_line_count write_policy_line_count rejected_announcement_line_count + parse_fail_line_count=$(wc -l < "$temp_parse_fail") + parse_fail_line_count="${parse_fail_line_count//[^0-9]/}" + write_policy_line_count=$(wc -l < "$temp_write_policy_rejection") + write_policy_line_count="${write_policy_line_count//[^0-9]/}" + rejected_announcement_line_count=$(wc -l < "$temp_rejected_announcement") + rejected_announcement_line_count="${rejected_announcement_line_count//[^0-9]/}" + + log_info " Found $parse_fail_line_count [PARSE_FAIL] log lines" + log_info " Found $write_policy_line_count write policy rejection log lines" + log_info " Found $rejected_announcement_line_count rejected announcement log lines (for enrichment)" + + local total_invalid_announcement_lines=$write_policy_line_count + + if [[ "$parse_fail_line_count" -eq 0 && "$total_invalid_announcement_lines" -eq 0 ]]; then + log_warn "No matching entries found in logs." + log_warn "" + log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." + log_warn "The script looks for:" + log_warn "" + log_warn " 1. [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." + log_warn " 2. Event rejected by write policy event_id=... kind=30617 reason=Invalid announcement: ..." + log_warn "" + log_warn "See: docs/how-to/migrate-to-ngit-grasp.md (Dependencies section)" + log_warn "" + + # Create empty output file with header comment + { + echo "# Parse failures and invalid announcements extracted from $service" + echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" + echo "# Extracted: $(date -Iseconds)" + echo "#" + echo "# Includes:" + echo "# - [PARSE_FAIL] structured log entries" + echo "# - \"Invalid announcement\" rejections" + echo "#" + echo "# Format: event_idkindreasonreponpub" + echo "# Note: repo and npub may be empty for some entries" + echo "#" + echo "# NOTE: No matching entries found." + echo "# This is expected if ngit-grasp logging improvements are not yet deployed." + } > "$output_file" + + rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_rejected_announcement" + log_info "Created empty output file: $output_file" + exit 0 + fi + + # Write header + { + echo "# Parse failures and invalid announcements extracted from $service" + echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" + echo "# Extracted: $(date -Iseconds)" + echo "#" + echo "# Includes:" + echo "# - [PARSE_FAIL] structured log entries" + echo "# - \"Invalid announcement\" rejections" + echo "#" + echo "# Format: event_idkindreasonreponpub" + echo "# Note: repo and npub may be empty for some entries" + } > "$output_file" + + # Parse [PARSE_FAIL] entries using batch awk processing + log_info " Parsing [PARSE_FAIL] entries..." + local parse_fail_count=0 + if [[ "$parse_fail_line_count" -gt 0 ]]; then + parse_parse_fail_batch "$temp_parse_fail" >> "$output_file" + parse_fail_count=$(grep -v '^#' "$output_file" | wc -l) + parse_fail_count="${parse_fail_count//[^0-9]/}" + fi + + # Parse write policy rejection entries using batch awk processing + log_info " Parsing write policy rejection entries..." + local write_policy_count=0 + if [[ "$write_policy_line_count" -gt 0 ]]; then + local before_count + before_count=$(grep -v '^#' "$output_file" 2>/dev/null | wc -l || echo "0") + before_count="${before_count//[^0-9]/}" + before_count="${before_count:-0}" + parse_write_policy_rejection_batch "$temp_write_policy_rejection" >> "$output_file" + local after_count + after_count=$(grep -v '^#' "$output_file" 2>/dev/null | wc -l || echo "0") + after_count="${after_count//[^0-9]/}" + after_count="${after_count:-0}" + write_policy_count=$((after_count - before_count)) + fi + + local invalid_announcement_count=$write_policy_count + + # Build enrichment lookup table from "Added rejected announcement" entries + local enrichment_lookup_file + enrichment_lookup_file=$(mktemp) + + log_info " Building enrichment lookup table..." + if [[ "$rejected_announcement_line_count" -gt 0 ]]; then + parse_rejected_announcement_batch "$temp_rejected_announcement" > "$enrichment_lookup_file" + fi + + rm -f "$temp_parse_fail" "$temp_write_policy_rejection" "$temp_rejected_announcement" + + # Deduplicate by event_id (first column) - keep first occurrence + log_info " Deduplicating entries..." + local deduped_file + deduped_file=$(mktemp) + # Preserve header lines (starting with #) and deduplicate data lines + grep '^#' "$output_file" > "$deduped_file" + grep -v '^#' "$output_file" | sort -t$'\t' -k1,1 -u >> "$deduped_file" + mv "$deduped_file" "$output_file" + + # Deduplicate enrichment lookup table by event_id + if [[ -s "$enrichment_lookup_file" ]]; then + sort -t$'\t' -k1,1 -u "$enrichment_lookup_file" > "$enrichment_lookup_file.deduped" + mv "$enrichment_lookup_file.deduped" "$enrichment_lookup_file" + fi + + # Enrich with repo/npub from "Added rejected announcement" log entries + # This is critical for usability - without it, action-required.txt shows + # event_id|kind instead of repo|npub, making parse failures unidentifiable + enrich_with_repo_npub "$output_file" "$enrichment_lookup_file" + + rm -f "$enrichment_lookup_file" + + # Count final entries (excluding header lines) + local count + count=$(grep -v '^#' "$output_file" | wc -l) + count="${count//[^0-9]/}" # Strip whitespace + count="${count:-0}" + + rm -f "$temp_file" + + # Summary + echo "" + log_info "=== Extraction Summary ===" + log_info "Service: $service" + log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" + log_success "Extracted $count total entries" + log_info " - [PARSE_FAIL] entries: $parse_fail_count" + log_info " - Invalid announcement rejections: $invalid_announcement_count" + echo "" + log_info "Output file: $output_file" + + if [[ $count -gt 0 ]]; then + echo "" + log_info "Sample entries (first 5):" + # Use a subshell to avoid SIGPIPE issues with set -e + # New format: event_idkindreasonreponpub + (grep -v '^#' "$output_file" | head -5 | while IFS=$'\t' read -r event_id kind reason repo npub; do + echo " kind=$kind event_id=${event_id:0:16}... reason=\"${reason:0:60}...\"" + done) || true + fi + + # Breakdown by kind + if [[ $count -gt 0 ]]; then + echo "" + log_info "Breakdown by event kind:" + # Use a subshell to avoid SIGPIPE issues with set -e + # kind is now column 2 + (grep -v '^#' "$output_file" | awk -F'\t' '{print $2}' | sort | uniq -c | sort -rn | while read -r cnt kind; do + echo " kind $kind: $cnt failures" + done) || true + fi + + # Breakdown by reason pattern (for invalid announcements) + if [[ $invalid_announcement_count -gt 0 ]]; then + echo "" + log_info "Breakdown by reason pattern:" + # Extract the main reason type (before the colon details) + (grep -v '^#' "$output_file" | awk -F'\t' '{print $3}' | sed 's/:.*//' | sort | uniq -c | sort -rn | head -10 | while read -r cnt reason; do + echo " $reason: $cnt" + done) || true + fi + + # Explicit success exit + exit 0 +} + +main "$@" -- cgit v1.2.3