diff options
Diffstat (limited to 'docs/how-to/migration-scripts/30-extract-parse-failures.sh')
| -rwxr-xr-x | docs/how-to/migration-scripts/30-extract-parse-failures.sh | 328 |
1 files changed, 328 insertions, 0 deletions
diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh new file mode 100755 index 0000000..753fd3e --- /dev/null +++ b/docs/how-to/migration-scripts/30-extract-parse-failures.sh | |||
| @@ -0,0 +1,328 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | # | ||
| 3 | # 30-extract-parse-failures.sh - Extract parse failure events from systemd logs | ||
| 4 | # | ||
| 5 | # PHASE 4a of the ngit-relay to ngit-grasp migration analysis pipeline. | ||
| 6 | # Extracts structured [PARSE_FAIL] log entries from journalctl. | ||
| 7 | # | ||
| 8 | # USAGE: | ||
| 9 | # ./30-extract-parse-failures.sh <service-name> <output-dir> [options] | ||
| 10 | # | ||
| 11 | # EXAMPLES: | ||
| 12 | # # Extract from ngit-grasp service (last 30 days, default) | ||
| 13 | # ./30-extract-parse-failures.sh ngit-grasp.service output/logs | ||
| 14 | # | ||
| 15 | # # Extract with custom time range | ||
| 16 | # ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-01" | ||
| 17 | # | ||
| 18 | # # Extract from specific time window | ||
| 19 | # ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-15" --until "2026-01-22" | ||
| 20 | # | ||
| 21 | # OPTIONS: | ||
| 22 | # --since <date> Start date for log extraction (default: 30 days ago) | ||
| 23 | # --until <date> End date for log extraction (default: now) | ||
| 24 | # --dry-run Show what would be extracted without writing files | ||
| 25 | # | ||
| 26 | # OUTPUT: | ||
| 27 | # <output-dir>/parse-failures.txt | ||
| 28 | # | ||
| 29 | # OUTPUT FORMAT (TSV): | ||
| 30 | # repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason | ||
| 31 | # | ||
| 32 | # EXPECTED LOG FORMAT: | ||
| 33 | # The script looks for structured log entries in this format: | ||
| 34 | # | ||
| 35 | # 2026-01-22T10:30:45Z ngit-grasp[1234]: [PARSE_FAIL] kind=30618 event_id=abc123... reason="invalid refs format" repo=myrepo npub=npub1... | ||
| 36 | # | ||
| 37 | # Required fields: kind, event_id, reason | ||
| 38 | # Optional fields: repo, npub (may not be available if parsing failed early) | ||
| 39 | # | ||
| 40 | # DEPENDENCY: | ||
| 41 | # This script requires logging improvements in ngit-grasp to emit structured | ||
| 42 | # [PARSE_FAIL] log entries. Until those are implemented, this script will | ||
| 43 | # find no matching entries (which is handled gracefully). | ||
| 44 | # | ||
| 45 | # See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section) | ||
| 46 | # | ||
| 47 | # Expected Rust logging code: | ||
| 48 | # tracing::warn!( | ||
| 49 | # target: "migration", | ||
| 50 | # "[PARSE_FAIL] kind={} event_id={} reason=\"{}\" repo={} npub={}", | ||
| 51 | # event.kind, event.id, reason, identifier, npub | ||
| 52 | # ); | ||
| 53 | # | ||
| 54 | # PREREQUISITES: | ||
| 55 | # - journalctl (systemd) | ||
| 56 | # - grep, awk (standard Unix tools) | ||
| 57 | # - Access to systemd journal (may require sudo or journal group membership) | ||
| 58 | # | ||
| 59 | # RUNTIME: Depends on log volume, typically < 30 seconds | ||
| 60 | # | ||
| 61 | # SEE ALSO: | ||
| 62 | # docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide | ||
| 63 | # 31-extract-purgatory-expiry.sh - Companion script for purgatory expiry logs | ||
| 64 | # | ||
| 65 | |||
| 66 | set -euo pipefail | ||
| 67 | |||
| 68 | # Colors for output (disabled if not a terminal) | ||
| 69 | if [[ -t 1 ]]; then | ||
| 70 | RED='\033[0;31m' | ||
| 71 | GREEN='\033[0;32m' | ||
| 72 | YELLOW='\033[0;33m' | ||
| 73 | BLUE='\033[0;34m' | ||
| 74 | NC='\033[0m' | ||
| 75 | else | ||
| 76 | RED='' | ||
| 77 | GREEN='' | ||
| 78 | YELLOW='' | ||
| 79 | BLUE='' | ||
| 80 | NC='' | ||
| 81 | fi | ||
| 82 | |||
| 83 | log_info() { | ||
| 84 | echo -e "${BLUE}[INFO]${NC} $*" >&2 | ||
| 85 | } | ||
| 86 | |||
| 87 | log_success() { | ||
| 88 | echo -e "${GREEN}[OK]${NC} $*" >&2 | ||
| 89 | } | ||
| 90 | |||
| 91 | log_warn() { | ||
| 92 | echo -e "${YELLOW}[WARN]${NC} $*" >&2 | ||
| 93 | } | ||
| 94 | |||
| 95 | log_error() { | ||
| 96 | echo -e "${RED}[ERROR]${NC} $*" >&2 | ||
| 97 | } | ||
| 98 | |||
| 99 | usage() { | ||
| 100 | echo "Usage: $0 <service-name> <output-dir> [options]" | ||
| 101 | echo "" | ||
| 102 | echo "Arguments:" | ||
| 103 | echo " service-name Systemd service name (e.g., ngit-grasp.service)" | ||
| 104 | echo " output-dir Directory to store extracted log data" | ||
| 105 | echo "" | ||
| 106 | echo "Options:" | ||
| 107 | echo " --since <date> Start date (default: 30 days ago)" | ||
| 108 | echo " --until <date> End date (default: now)" | ||
| 109 | echo " --dry-run Show what would be extracted without writing" | ||
| 110 | echo "" | ||
| 111 | echo "Examples:" | ||
| 112 | echo " $0 ngit-grasp.service output/logs" | ||
| 113 | echo " $0 ngit-grasp.service output/logs --since '2026-01-01'" | ||
| 114 | echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'" | ||
| 115 | echo "" | ||
| 116 | echo "Expected log format:" | ||
| 117 | echo " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." | ||
| 118 | exit 1 | ||
| 119 | } | ||
| 120 | |||
| 121 | # Parse a single log line and extract fields | ||
| 122 | # Input: log line containing [PARSE_FAIL] | ||
| 123 | # Output: TSV line: repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason | ||
| 124 | parse_log_line() { | ||
| 125 | local line="$1" | ||
| 126 | |||
| 127 | # Extract fields using grep -oP (Perl regex) or awk | ||
| 128 | # Fields: kind, event_id, reason, repo (optional), npub (optional) | ||
| 129 | |||
| 130 | local kind event_id reason repo npub | ||
| 131 | |||
| 132 | # Extract kind=VALUE | ||
| 133 | kind=$(echo "$line" | grep -oP 'kind=\K[0-9]+' || echo "") | ||
| 134 | |||
| 135 | # Extract event_id=VALUE (hex string, possibly truncated with ...) | ||
| 136 | event_id=$(echo "$line" | grep -oP 'event_id=\K[a-f0-9]+' || echo "") | ||
| 137 | |||
| 138 | # Extract reason="VALUE" (quoted string) | ||
| 139 | reason=$(echo "$line" | grep -oP 'reason="\K[^"]*' || echo "") | ||
| 140 | |||
| 141 | # Extract repo=VALUE (optional, unquoted identifier) | ||
| 142 | repo=$(echo "$line" | grep -oP 'repo=\K[^ ]+' || echo "") | ||
| 143 | |||
| 144 | # Extract npub=VALUE (optional, npub1... format) | ||
| 145 | npub=$(echo "$line" | grep -oP 'npub=\K[^ ]+' || echo "") | ||
| 146 | |||
| 147 | # Only output if we have the required fields | ||
| 148 | if [[ -n "$kind" && -n "$event_id" && -n "$reason" ]]; then | ||
| 149 | printf '%s\t%s\t%s\t%s\t%s\n' "$repo" "$npub" "$kind" "$event_id" "$reason" | ||
| 150 | fi | ||
| 151 | } | ||
| 152 | |||
| 153 | # Main | ||
| 154 | main() { | ||
| 155 | if [[ $# -lt 2 ]]; then | ||
| 156 | usage | ||
| 157 | fi | ||
| 158 | |||
| 159 | local service="$1" | ||
| 160 | local output_dir="$2" | ||
| 161 | shift 2 | ||
| 162 | |||
| 163 | # Default time range: last 30 days | ||
| 164 | local since_date | ||
| 165 | since_date=$(date -d "30 days ago" "+%Y-%m-%d" 2>/dev/null || date -v-30d "+%Y-%m-%d" 2>/dev/null || echo "") | ||
| 166 | local until_date="" | ||
| 167 | local dry_run=false | ||
| 168 | |||
| 169 | # Parse options | ||
| 170 | while [[ $# -gt 0 ]]; do | ||
| 171 | case "$1" in | ||
| 172 | --since) | ||
| 173 | since_date="$2" | ||
| 174 | shift 2 | ||
| 175 | ;; | ||
| 176 | --until) | ||
| 177 | until_date="$2" | ||
| 178 | shift 2 | ||
| 179 | ;; | ||
| 180 | --dry-run) | ||
| 181 | dry_run=true | ||
| 182 | shift | ||
| 183 | ;; | ||
| 184 | *) | ||
| 185 | log_error "Unknown option: $1" | ||
| 186 | usage | ||
| 187 | ;; | ||
| 188 | esac | ||
| 189 | done | ||
| 190 | |||
| 191 | # Validate service name | ||
| 192 | if [[ ! "$service" =~ \.service$ ]]; then | ||
| 193 | service="${service}.service" | ||
| 194 | fi | ||
| 195 | |||
| 196 | log_info "Extracting parse failures from systemd logs" | ||
| 197 | log_info "Service: $service" | ||
| 198 | log_info "Output: $output_dir" | ||
| 199 | log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" | ||
| 200 | |||
| 201 | # Check if journalctl is available | ||
| 202 | if ! command -v journalctl &> /dev/null; then | ||
| 203 | log_error "journalctl not found. This script requires systemd." | ||
| 204 | exit 1 | ||
| 205 | fi | ||
| 206 | |||
| 207 | # Build journalctl command | ||
| 208 | local journal_cmd="journalctl -u $service --no-pager -o short-iso" | ||
| 209 | |||
| 210 | if [[ -n "$since_date" ]]; then | ||
| 211 | journal_cmd="$journal_cmd --since '$since_date'" | ||
| 212 | fi | ||
| 213 | |||
| 214 | if [[ -n "$until_date" ]]; then | ||
| 215 | journal_cmd="$journal_cmd --until '$until_date'" | ||
| 216 | fi | ||
| 217 | |||
| 218 | log_info "Running: $journal_cmd | grep '\\[PARSE_FAIL\\]'" | ||
| 219 | |||
| 220 | if [[ "$dry_run" == true ]]; then | ||
| 221 | log_info "[DRY RUN] Would extract to: $output_dir/parse-failures.txt" | ||
| 222 | |||
| 223 | # Show sample of what would be extracted | ||
| 224 | log_info "Checking for matching log entries..." | ||
| 225 | local sample_count | ||
| 226 | sample_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0") | ||
| 227 | sample_count="${sample_count//[^0-9]/}" # Strip non-numeric characters | ||
| 228 | sample_count="${sample_count:-0}" | ||
| 229 | log_info "Found $sample_count matching log entries" | ||
| 230 | |||
| 231 | if [[ "$sample_count" -eq 0 ]]; then | ||
| 232 | log_warn "No [PARSE_FAIL] entries found in logs." | ||
| 233 | log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." | ||
| 234 | log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)" | ||
| 235 | fi | ||
| 236 | |||
| 237 | exit 0 | ||
| 238 | fi | ||
| 239 | |||
| 240 | # Create output directory | ||
| 241 | mkdir -p "$output_dir" | ||
| 242 | |||
| 243 | local output_file="$output_dir/parse-failures.txt" | ||
| 244 | local temp_file | ||
| 245 | temp_file=$(mktemp) | ||
| 246 | |||
| 247 | # Extract and parse log entries | ||
| 248 | log_info "Extracting log entries..." | ||
| 249 | |||
| 250 | # Get raw log lines containing [PARSE_FAIL] | ||
| 251 | local raw_lines | ||
| 252 | raw_lines=$(eval "$journal_cmd" 2>/dev/null | grep '\[PARSE_FAIL\]' || true) | ||
| 253 | |||
| 254 | if [[ -z "$raw_lines" ]]; then | ||
| 255 | log_warn "No [PARSE_FAIL] entries found in logs." | ||
| 256 | log_warn "" | ||
| 257 | log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." | ||
| 258 | log_warn "The structured log format required by this script:" | ||
| 259 | log_warn "" | ||
| 260 | log_warn " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." | ||
| 261 | log_warn "" | ||
| 262 | log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)" | ||
| 263 | log_warn "" | ||
| 264 | |||
| 265 | # Create empty output file with header comment | ||
| 266 | { | ||
| 267 | echo "# Parse failures extracted from $service" | ||
| 268 | echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" | ||
| 269 | echo "# Extracted: $(date -Iseconds)" | ||
| 270 | echo "# Format: repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason" | ||
| 271 | echo "#" | ||
| 272 | echo "# NOTE: No [PARSE_FAIL] entries found." | ||
| 273 | echo "# This is expected if ngit-grasp logging improvements are not yet deployed." | ||
| 274 | } > "$output_file" | ||
| 275 | |||
| 276 | log_info "Created empty output file: $output_file" | ||
| 277 | exit 0 | ||
| 278 | fi | ||
| 279 | |||
| 280 | # Write header | ||
| 281 | { | ||
| 282 | echo "# Parse failures extracted from $service" | ||
| 283 | echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" | ||
| 284 | echo "# Extracted: $(date -Iseconds)" | ||
| 285 | echo "# Format: repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason" | ||
| 286 | } > "$output_file" | ||
| 287 | |||
| 288 | # Parse each line | ||
| 289 | local count=0 | ||
| 290 | while IFS= read -r line; do | ||
| 291 | local parsed | ||
| 292 | parsed=$(parse_log_line "$line") | ||
| 293 | if [[ -n "$parsed" ]]; then | ||
| 294 | echo "$parsed" >> "$output_file" | ||
| 295 | ((count++)) | ||
| 296 | fi | ||
| 297 | done <<< "$raw_lines" | ||
| 298 | |||
| 299 | rm -f "$temp_file" | ||
| 300 | |||
| 301 | # Summary | ||
| 302 | echo "" | ||
| 303 | log_info "=== Extraction Summary ===" | ||
| 304 | log_info "Service: $service" | ||
| 305 | log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" | ||
| 306 | log_success "Extracted $count parse failure entries" | ||
| 307 | echo "" | ||
| 308 | log_info "Output file: $output_file" | ||
| 309 | |||
| 310 | if [[ $count -gt 0 ]]; then | ||
| 311 | echo "" | ||
| 312 | log_info "Sample entries (first 5):" | ||
| 313 | tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub kind event_id reason; do | ||
| 314 | echo " kind=$kind repo=$repo reason=\"$reason\"" | ||
| 315 | done | ||
| 316 | fi | ||
| 317 | |||
| 318 | # Breakdown by kind | ||
| 319 | if [[ $count -gt 0 ]]; then | ||
| 320 | echo "" | ||
| 321 | log_info "Breakdown by event kind:" | ||
| 322 | tail -n +5 "$output_file" | awk -F'\t' '{print $3}' | sort | uniq -c | sort -rn | while read -r cnt kind; do | ||
| 323 | echo " kind $kind: $cnt failures" | ||
| 324 | done | ||
| 325 | fi | ||
| 326 | } | ||
| 327 | |||
| 328 | main "$@" | ||