upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/docs/how-to/migration-scripts/30-extract-parse-failures.sh
diff options
context:
space:
mode:
authorDanConwayDev <DanConwayDev@protonmail.com>2026-01-23 11:16:50 +0000
committerDanConwayDev <DanConwayDev@protonmail.com>2026-01-27 20:37:57 +0000
commit28cc7820953efeafb2bc4d41ebcf3d682da86711 (patch)
tree36de05f70acf0a46d57b19e851ff4b95af4919fd /docs/how-to/migration-scripts/30-extract-parse-failures.sh
parent800dbfaa82428b897e271d0eb5d9e4c0f107f80b (diff)
Add Phase 4 migration scripts for log extraction
- 30-extract-parse-failures.sh: Extracts parse failure events from logs - 31-extract-purgatory-expiry.sh: Extracts purgatory expiry events from logs - Both support time range filtering (--since, --until) - Includes dry-run mode for testing - Gracefully handles missing logs with dependency notes - TSV output format for Phase 5 consumption - Ready for when structured logging is implemented in ngit-grasp
Diffstat (limited to 'docs/how-to/migration-scripts/30-extract-parse-failures.sh')
-rwxr-xr-xdocs/how-to/migration-scripts/30-extract-parse-failures.sh328
1 files changed, 328 insertions, 0 deletions
diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh
new file mode 100755
index 0000000..753fd3e
--- /dev/null
+++ b/docs/how-to/migration-scripts/30-extract-parse-failures.sh
@@ -0,0 +1,328 @@
1#!/usr/bin/env bash
2#
3# 30-extract-parse-failures.sh - Extract parse failure events from systemd logs
4#
5# PHASE 4a of the ngit-relay to ngit-grasp migration analysis pipeline.
6# Extracts structured [PARSE_FAIL] log entries from journalctl.
7#
8# USAGE:
9# ./30-extract-parse-failures.sh <service-name> <output-dir> [options]
10#
11# EXAMPLES:
12# # Extract from ngit-grasp service (last 30 days, default)
13# ./30-extract-parse-failures.sh ngit-grasp.service output/logs
14#
15# # Extract with custom time range
16# ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-01"
17#
18# # Extract from specific time window
19# ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-15" --until "2026-01-22"
20#
21# OPTIONS:
22# --since <date> Start date for log extraction (default: 30 days ago)
23# --until <date> End date for log extraction (default: now)
24# --dry-run Show what would be extracted without writing files
25#
26# OUTPUT:
27# <output-dir>/parse-failures.txt
28#
29# OUTPUT FORMAT (TSV):
30# repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason
31#
32# EXPECTED LOG FORMAT:
33# The script looks for structured log entries in this format:
34#
35# 2026-01-22T10:30:45Z ngit-grasp[1234]: [PARSE_FAIL] kind=30618 event_id=abc123... reason="invalid refs format" repo=myrepo npub=npub1...
36#
37# Required fields: kind, event_id, reason
38# Optional fields: repo, npub (may not be available if parsing failed early)
39#
40# DEPENDENCY:
41# This script requires logging improvements in ngit-grasp to emit structured
42# [PARSE_FAIL] log entries. Until those are implemented, this script will
43# find no matching entries (which is handled gracefully).
44#
45# See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)
46#
47# Expected Rust logging code:
48# tracing::warn!(
49# target: "migration",
50# "[PARSE_FAIL] kind={} event_id={} reason=\"{}\" repo={} npub={}",
51# event.kind, event.id, reason, identifier, npub
52# );
53#
54# PREREQUISITES:
55# - journalctl (systemd)
56# - grep, awk (standard Unix tools)
57# - Access to systemd journal (may require sudo or journal group membership)
58#
59# RUNTIME: Depends on log volume, typically < 30 seconds
60#
61# SEE ALSO:
62# docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide
63# 31-extract-purgatory-expiry.sh - Companion script for purgatory expiry logs
64#
65
66set -euo pipefail
67
68# Colors for output (disabled if not a terminal)
69if [[ -t 1 ]]; then
70 RED='\033[0;31m'
71 GREEN='\033[0;32m'
72 YELLOW='\033[0;33m'
73 BLUE='\033[0;34m'
74 NC='\033[0m'
75else
76 RED=''
77 GREEN=''
78 YELLOW=''
79 BLUE=''
80 NC=''
81fi
82
83log_info() {
84 echo -e "${BLUE}[INFO]${NC} $*" >&2
85}
86
87log_success() {
88 echo -e "${GREEN}[OK]${NC} $*" >&2
89}
90
91log_warn() {
92 echo -e "${YELLOW}[WARN]${NC} $*" >&2
93}
94
95log_error() {
96 echo -e "${RED}[ERROR]${NC} $*" >&2
97}
98
99usage() {
100 echo "Usage: $0 <service-name> <output-dir> [options]"
101 echo ""
102 echo "Arguments:"
103 echo " service-name Systemd service name (e.g., ngit-grasp.service)"
104 echo " output-dir Directory to store extracted log data"
105 echo ""
106 echo "Options:"
107 echo " --since <date> Start date (default: 30 days ago)"
108 echo " --until <date> End date (default: now)"
109 echo " --dry-run Show what would be extracted without writing"
110 echo ""
111 echo "Examples:"
112 echo " $0 ngit-grasp.service output/logs"
113 echo " $0 ngit-grasp.service output/logs --since '2026-01-01'"
114 echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'"
115 echo ""
116 echo "Expected log format:"
117 echo " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..."
118 exit 1
119}
120
121# Parse a single log line and extract fields
122# Input: log line containing [PARSE_FAIL]
123# Output: TSV line: repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason
124parse_log_line() {
125 local line="$1"
126
127 # Extract fields using grep -oP (Perl regex) or awk
128 # Fields: kind, event_id, reason, repo (optional), npub (optional)
129
130 local kind event_id reason repo npub
131
132 # Extract kind=VALUE
133 kind=$(echo "$line" | grep -oP 'kind=\K[0-9]+' || echo "")
134
135 # Extract event_id=VALUE (hex string, possibly truncated with ...)
136 event_id=$(echo "$line" | grep -oP 'event_id=\K[a-f0-9]+' || echo "")
137
138 # Extract reason="VALUE" (quoted string)
139 reason=$(echo "$line" | grep -oP 'reason="\K[^"]*' || echo "")
140
141 # Extract repo=VALUE (optional, unquoted identifier)
142 repo=$(echo "$line" | grep -oP 'repo=\K[^ ]+' || echo "")
143
144 # Extract npub=VALUE (optional, npub1... format)
145 npub=$(echo "$line" | grep -oP 'npub=\K[^ ]+' || echo "")
146
147 # Only output if we have the required fields
148 if [[ -n "$kind" && -n "$event_id" && -n "$reason" ]]; then
149 printf '%s\t%s\t%s\t%s\t%s\n' "$repo" "$npub" "$kind" "$event_id" "$reason"
150 fi
151}
152
153# Main
154main() {
155 if [[ $# -lt 2 ]]; then
156 usage
157 fi
158
159 local service="$1"
160 local output_dir="$2"
161 shift 2
162
163 # Default time range: last 30 days
164 local since_date
165 since_date=$(date -d "30 days ago" "+%Y-%m-%d" 2>/dev/null || date -v-30d "+%Y-%m-%d" 2>/dev/null || echo "")
166 local until_date=""
167 local dry_run=false
168
169 # Parse options
170 while [[ $# -gt 0 ]]; do
171 case "$1" in
172 --since)
173 since_date="$2"
174 shift 2
175 ;;
176 --until)
177 until_date="$2"
178 shift 2
179 ;;
180 --dry-run)
181 dry_run=true
182 shift
183 ;;
184 *)
185 log_error "Unknown option: $1"
186 usage
187 ;;
188 esac
189 done
190
191 # Validate service name
192 if [[ ! "$service" =~ \.service$ ]]; then
193 service="${service}.service"
194 fi
195
196 log_info "Extracting parse failures from systemd logs"
197 log_info "Service: $service"
198 log_info "Output: $output_dir"
199 log_info "Time range: ${since_date:-beginning} to ${until_date:-now}"
200
201 # Check if journalctl is available
202 if ! command -v journalctl &> /dev/null; then
203 log_error "journalctl not found. This script requires systemd."
204 exit 1
205 fi
206
207 # Build journalctl command
208 local journal_cmd="journalctl -u $service --no-pager -o short-iso"
209
210 if [[ -n "$since_date" ]]; then
211 journal_cmd="$journal_cmd --since '$since_date'"
212 fi
213
214 if [[ -n "$until_date" ]]; then
215 journal_cmd="$journal_cmd --until '$until_date'"
216 fi
217
218 log_info "Running: $journal_cmd | grep '\\[PARSE_FAIL\\]'"
219
220 if [[ "$dry_run" == true ]]; then
221 log_info "[DRY RUN] Would extract to: $output_dir/parse-failures.txt"
222
223 # Show sample of what would be extracted
224 log_info "Checking for matching log entries..."
225 local sample_count
226 sample_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0")
227 sample_count="${sample_count//[^0-9]/}" # Strip non-numeric characters
228 sample_count="${sample_count:-0}"
229 log_info "Found $sample_count matching log entries"
230
231 if [[ "$sample_count" -eq 0 ]]; then
232 log_warn "No [PARSE_FAIL] entries found in logs."
233 log_warn "This is expected if ngit-grasp logging improvements are not yet deployed."
234 log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)"
235 fi
236
237 exit 0
238 fi
239
240 # Create output directory
241 mkdir -p "$output_dir"
242
243 local output_file="$output_dir/parse-failures.txt"
244 local temp_file
245 temp_file=$(mktemp)
246
247 # Extract and parse log entries
248 log_info "Extracting log entries..."
249
250 # Get raw log lines containing [PARSE_FAIL]
251 local raw_lines
252 raw_lines=$(eval "$journal_cmd" 2>/dev/null | grep '\[PARSE_FAIL\]' || true)
253
254 if [[ -z "$raw_lines" ]]; then
255 log_warn "No [PARSE_FAIL] entries found in logs."
256 log_warn ""
257 log_warn "This is expected if ngit-grasp logging improvements are not yet deployed."
258 log_warn "The structured log format required by this script:"
259 log_warn ""
260 log_warn " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..."
261 log_warn ""
262 log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)"
263 log_warn ""
264
265 # Create empty output file with header comment
266 {
267 echo "# Parse failures extracted from $service"
268 echo "# Time range: ${since_date:-beginning} to ${until_date:-now}"
269 echo "# Extracted: $(date -Iseconds)"
270 echo "# Format: repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason"
271 echo "#"
272 echo "# NOTE: No [PARSE_FAIL] entries found."
273 echo "# This is expected if ngit-grasp logging improvements are not yet deployed."
274 } > "$output_file"
275
276 log_info "Created empty output file: $output_file"
277 exit 0
278 fi
279
280 # Write header
281 {
282 echo "# Parse failures extracted from $service"
283 echo "# Time range: ${since_date:-beginning} to ${until_date:-now}"
284 echo "# Extracted: $(date -Iseconds)"
285 echo "# Format: repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason"
286 } > "$output_file"
287
288 # Parse each line
289 local count=0
290 while IFS= read -r line; do
291 local parsed
292 parsed=$(parse_log_line "$line")
293 if [[ -n "$parsed" ]]; then
294 echo "$parsed" >> "$output_file"
295 ((count++))
296 fi
297 done <<< "$raw_lines"
298
299 rm -f "$temp_file"
300
301 # Summary
302 echo ""
303 log_info "=== Extraction Summary ==="
304 log_info "Service: $service"
305 log_info "Time range: ${since_date:-beginning} to ${until_date:-now}"
306 log_success "Extracted $count parse failure entries"
307 echo ""
308 log_info "Output file: $output_file"
309
310 if [[ $count -gt 0 ]]; then
311 echo ""
312 log_info "Sample entries (first 5):"
313 tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub kind event_id reason; do
314 echo " kind=$kind repo=$repo reason=\"$reason\""
315 done
316 fi
317
318 # Breakdown by kind
319 if [[ $count -gt 0 ]]; then
320 echo ""
321 log_info "Breakdown by event kind:"
322 tail -n +5 "$output_file" | awk -F'\t' '{print $3}' | sort | uniq -c | sort -rn | while read -r cnt kind; do
323 echo " kind $kind: $cnt failures"
324 done
325 fi
326}
327
328main "$@"