upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/docs
diff options
context:
space:
mode:
authorDanConwayDev <DanConwayDev@protonmail.com>2026-01-23 11:16:50 +0000
committerDanConwayDev <DanConwayDev@protonmail.com>2026-01-27 20:37:57 +0000
commit28cc7820953efeafb2bc4d41ebcf3d682da86711 (patch)
tree36de05f70acf0a46d57b19e851ff4b95af4919fd /docs
parent800dbfaa82428b897e271d0eb5d9e4c0f107f80b (diff)
Add Phase 4 migration scripts for log extraction
- 30-extract-parse-failures.sh: Extracts parse failure events from logs - 31-extract-purgatory-expiry.sh: Extracts purgatory expiry events from logs - Both support time range filtering (--since, --until) - Includes dry-run mode for testing - Gracefully handles missing logs with dependency notes - TSV output format for Phase 5 consumption - Ready for when structured logging is implemented in ngit-grasp
Diffstat (limited to 'docs')
-rwxr-xr-xdocs/how-to/migration-scripts/30-extract-parse-failures.sh328
-rwxr-xr-xdocs/how-to/migration-scripts/31-extract-purgatory-expiry.sh346
2 files changed, 674 insertions, 0 deletions
diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh
new file mode 100755
index 0000000..753fd3e
--- /dev/null
+++ b/docs/how-to/migration-scripts/30-extract-parse-failures.sh
@@ -0,0 +1,328 @@
1#!/usr/bin/env bash
2#
3# 30-extract-parse-failures.sh - Extract parse failure events from systemd logs
4#
5# PHASE 4a of the ngit-relay to ngit-grasp migration analysis pipeline.
6# Extracts structured [PARSE_FAIL] log entries from journalctl.
7#
8# USAGE:
9# ./30-extract-parse-failures.sh <service-name> <output-dir> [options]
10#
11# EXAMPLES:
12# # Extract from ngit-grasp service (last 30 days, default)
13# ./30-extract-parse-failures.sh ngit-grasp.service output/logs
14#
15# # Extract with custom time range
16# ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-01"
17#
18# # Extract from specific time window
19# ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-15" --until "2026-01-22"
20#
21# OPTIONS:
22# --since <date> Start date for log extraction (default: 30 days ago)
23# --until <date> End date for log extraction (default: now)
24# --dry-run Show what would be extracted without writing files
25#
26# OUTPUT:
27# <output-dir>/parse-failures.txt
28#
29# OUTPUT FORMAT (TSV):
30# repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason
31#
32# EXPECTED LOG FORMAT:
33# The script looks for structured log entries in this format:
34#
35# 2026-01-22T10:30:45Z ngit-grasp[1234]: [PARSE_FAIL] kind=30618 event_id=abc123... reason="invalid refs format" repo=myrepo npub=npub1...
36#
37# Required fields: kind, event_id, reason
38# Optional fields: repo, npub (may not be available if parsing failed early)
39#
40# DEPENDENCY:
41# This script requires logging improvements in ngit-grasp to emit structured
42# [PARSE_FAIL] log entries. Until those are implemented, this script will
43# find no matching entries (which is handled gracefully).
44#
45# See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)
46#
47# Expected Rust logging code:
48# tracing::warn!(
49# target: "migration",
50# "[PARSE_FAIL] kind={} event_id={} reason=\"{}\" repo={} npub={}",
51# event.kind, event.id, reason, identifier, npub
52# );
53#
54# PREREQUISITES:
55# - journalctl (systemd)
56# - grep, awk (standard Unix tools)
57# - Access to systemd journal (may require sudo or journal group membership)
58#
59# RUNTIME: Depends on log volume, typically < 30 seconds
60#
61# SEE ALSO:
62# docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide
63# 31-extract-purgatory-expiry.sh - Companion script for purgatory expiry logs
64#
65
66set -euo pipefail
67
68# Colors for output (disabled if not a terminal)
69if [[ -t 1 ]]; then
70 RED='\033[0;31m'
71 GREEN='\033[0;32m'
72 YELLOW='\033[0;33m'
73 BLUE='\033[0;34m'
74 NC='\033[0m'
75else
76 RED=''
77 GREEN=''
78 YELLOW=''
79 BLUE=''
80 NC=''
81fi
82
83log_info() {
84 echo -e "${BLUE}[INFO]${NC} $*" >&2
85}
86
87log_success() {
88 echo -e "${GREEN}[OK]${NC} $*" >&2
89}
90
91log_warn() {
92 echo -e "${YELLOW}[WARN]${NC} $*" >&2
93}
94
95log_error() {
96 echo -e "${RED}[ERROR]${NC} $*" >&2
97}
98
99usage() {
100 echo "Usage: $0 <service-name> <output-dir> [options]"
101 echo ""
102 echo "Arguments:"
103 echo " service-name Systemd service name (e.g., ngit-grasp.service)"
104 echo " output-dir Directory to store extracted log data"
105 echo ""
106 echo "Options:"
107 echo " --since <date> Start date (default: 30 days ago)"
108 echo " --until <date> End date (default: now)"
109 echo " --dry-run Show what would be extracted without writing"
110 echo ""
111 echo "Examples:"
112 echo " $0 ngit-grasp.service output/logs"
113 echo " $0 ngit-grasp.service output/logs --since '2026-01-01'"
114 echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'"
115 echo ""
116 echo "Expected log format:"
117 echo " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..."
118 exit 1
119}
120
121# Parse a single log line and extract fields
122# Input: log line containing [PARSE_FAIL]
123# Output: TSV line: repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason
124parse_log_line() {
125 local line="$1"
126
127 # Extract fields using grep -oP (Perl regex) or awk
128 # Fields: kind, event_id, reason, repo (optional), npub (optional)
129
130 local kind event_id reason repo npub
131
132 # Extract kind=VALUE
133 kind=$(echo "$line" | grep -oP 'kind=\K[0-9]+' || echo "")
134
135 # Extract event_id=VALUE (hex string, possibly truncated with ...)
136 event_id=$(echo "$line" | grep -oP 'event_id=\K[a-f0-9]+' || echo "")
137
138 # Extract reason="VALUE" (quoted string)
139 reason=$(echo "$line" | grep -oP 'reason="\K[^"]*' || echo "")
140
141 # Extract repo=VALUE (optional, unquoted identifier)
142 repo=$(echo "$line" | grep -oP 'repo=\K[^ ]+' || echo "")
143
144 # Extract npub=VALUE (optional, npub1... format)
145 npub=$(echo "$line" | grep -oP 'npub=\K[^ ]+' || echo "")
146
147 # Only output if we have the required fields
148 if [[ -n "$kind" && -n "$event_id" && -n "$reason" ]]; then
149 printf '%s\t%s\t%s\t%s\t%s\n' "$repo" "$npub" "$kind" "$event_id" "$reason"
150 fi
151}
152
153# Main
154main() {
155 if [[ $# -lt 2 ]]; then
156 usage
157 fi
158
159 local service="$1"
160 local output_dir="$2"
161 shift 2
162
163 # Default time range: last 30 days
164 local since_date
165 since_date=$(date -d "30 days ago" "+%Y-%m-%d" 2>/dev/null || date -v-30d "+%Y-%m-%d" 2>/dev/null || echo "")
166 local until_date=""
167 local dry_run=false
168
169 # Parse options
170 while [[ $# -gt 0 ]]; do
171 case "$1" in
172 --since)
173 since_date="$2"
174 shift 2
175 ;;
176 --until)
177 until_date="$2"
178 shift 2
179 ;;
180 --dry-run)
181 dry_run=true
182 shift
183 ;;
184 *)
185 log_error "Unknown option: $1"
186 usage
187 ;;
188 esac
189 done
190
191 # Validate service name
192 if [[ ! "$service" =~ \.service$ ]]; then
193 service="${service}.service"
194 fi
195
196 log_info "Extracting parse failures from systemd logs"
197 log_info "Service: $service"
198 log_info "Output: $output_dir"
199 log_info "Time range: ${since_date:-beginning} to ${until_date:-now}"
200
201 # Check if journalctl is available
202 if ! command -v journalctl &> /dev/null; then
203 log_error "journalctl not found. This script requires systemd."
204 exit 1
205 fi
206
207 # Build journalctl command
208 local journal_cmd="journalctl -u $service --no-pager -o short-iso"
209
210 if [[ -n "$since_date" ]]; then
211 journal_cmd="$journal_cmd --since '$since_date'"
212 fi
213
214 if [[ -n "$until_date" ]]; then
215 journal_cmd="$journal_cmd --until '$until_date'"
216 fi
217
218 log_info "Running: $journal_cmd | grep '\\[PARSE_FAIL\\]'"
219
220 if [[ "$dry_run" == true ]]; then
221 log_info "[DRY RUN] Would extract to: $output_dir/parse-failures.txt"
222
223 # Show sample of what would be extracted
224 log_info "Checking for matching log entries..."
225 local sample_count
226 sample_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0")
227 sample_count="${sample_count//[^0-9]/}" # Strip non-numeric characters
228 sample_count="${sample_count:-0}"
229 log_info "Found $sample_count matching log entries"
230
231 if [[ "$sample_count" -eq 0 ]]; then
232 log_warn "No [PARSE_FAIL] entries found in logs."
233 log_warn "This is expected if ngit-grasp logging improvements are not yet deployed."
234 log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)"
235 fi
236
237 exit 0
238 fi
239
240 # Create output directory
241 mkdir -p "$output_dir"
242
243 local output_file="$output_dir/parse-failures.txt"
244 local temp_file
245 temp_file=$(mktemp)
246
247 # Extract and parse log entries
248 log_info "Extracting log entries..."
249
250 # Get raw log lines containing [PARSE_FAIL]
251 local raw_lines
252 raw_lines=$(eval "$journal_cmd" 2>/dev/null | grep '\[PARSE_FAIL\]' || true)
253
254 if [[ -z "$raw_lines" ]]; then
255 log_warn "No [PARSE_FAIL] entries found in logs."
256 log_warn ""
257 log_warn "This is expected if ngit-grasp logging improvements are not yet deployed."
258 log_warn "The structured log format required by this script:"
259 log_warn ""
260 log_warn " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..."
261 log_warn ""
262 log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)"
263 log_warn ""
264
265 # Create empty output file with header comment
266 {
267 echo "# Parse failures extracted from $service"
268 echo "# Time range: ${since_date:-beginning} to ${until_date:-now}"
269 echo "# Extracted: $(date -Iseconds)"
270 echo "# Format: repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason"
271 echo "#"
272 echo "# NOTE: No [PARSE_FAIL] entries found."
273 echo "# This is expected if ngit-grasp logging improvements are not yet deployed."
274 } > "$output_file"
275
276 log_info "Created empty output file: $output_file"
277 exit 0
278 fi
279
280 # Write header
281 {
282 echo "# Parse failures extracted from $service"
283 echo "# Time range: ${since_date:-beginning} to ${until_date:-now}"
284 echo "# Extracted: $(date -Iseconds)"
285 echo "# Format: repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason"
286 } > "$output_file"
287
288 # Parse each line
289 local count=0
290 while IFS= read -r line; do
291 local parsed
292 parsed=$(parse_log_line "$line")
293 if [[ -n "$parsed" ]]; then
294 echo "$parsed" >> "$output_file"
295 ((count++))
296 fi
297 done <<< "$raw_lines"
298
299 rm -f "$temp_file"
300
301 # Summary
302 echo ""
303 log_info "=== Extraction Summary ==="
304 log_info "Service: $service"
305 log_info "Time range: ${since_date:-beginning} to ${until_date:-now}"
306 log_success "Extracted $count parse failure entries"
307 echo ""
308 log_info "Output file: $output_file"
309
310 if [[ $count -gt 0 ]]; then
311 echo ""
312 log_info "Sample entries (first 5):"
313 tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub kind event_id reason; do
314 echo " kind=$kind repo=$repo reason=\"$reason\""
315 done
316 fi
317
318 # Breakdown by kind
319 if [[ $count -gt 0 ]]; then
320 echo ""
321 log_info "Breakdown by event kind:"
322 tail -n +5 "$output_file" | awk -F'\t' '{print $3}' | sort | uniq -c | sort -rn | while read -r cnt kind; do
323 echo " kind $kind: $cnt failures"
324 done
325 fi
326}
327
328main "$@"
diff --git a/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh
new file mode 100755
index 0000000..38b2ca3
--- /dev/null
+++ b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh
@@ -0,0 +1,346 @@
1#!/usr/bin/env bash
2#
3# 31-extract-purgatory-expiry.sh - Extract purgatory expiry events from systemd logs
4#
5# PHASE 4b of the ngit-relay to ngit-grasp migration analysis pipeline.
6# Extracts structured [PURGATORY_EXPIRED] log entries from journalctl.
7#
8# USAGE:
9# ./31-extract-purgatory-expiry.sh <service-name> <output-dir> [options]
10#
11# EXAMPLES:
12# # Extract from ngit-grasp service (last 30 days, default)
13# ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs
14#
15# # Extract with custom time range
16# ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs --since "2026-01-01"
17#
18# # Extract from specific time window
19# ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs --since "2026-01-15" --until "2026-01-22"
20#
21# OPTIONS:
22# --since <date> Start date for log extraction (default: 30 days ago)
23# --until <date> End date for log extraction (default: now)
24# --dry-run Show what would be extracted without writing files
25#
26# OUTPUT:
27# <output-dir>/purgatory-expired.txt
28#
29# OUTPUT FORMAT (TSV):
30# repo<TAB>npub<TAB>timestamp<TAB>reason
31#
32# EXPECTED LOG FORMAT:
33# The script looks for structured log entries in this format:
34#
35# 2026-01-22T10:30:45Z ngit-grasp[1234]: [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason="clone URL unreachable after 7 days"
36#
37# Required fields: repo, npub
38# Optional fields: reason (explains why purgatory expired)
39#
40# BACKGROUND:
41# "Purgatory" is the state where ngit-grasp has received an announcement event
42# but cannot yet sync the git data (e.g., clone URL unreachable, git server down).
43# After a configurable timeout (default 7 days), the repository is marked as
44# expired and removed from purgatory.
45#
46# Purgatory expiry during migration analysis indicates repositories that:
47# - Had valid announcements on the production relay
48# - Could not be synced to the archive relay
49# - May need manual intervention or investigation
50#
51# DEPENDENCY:
52# This script requires logging improvements in ngit-grasp to emit structured
53# [PURGATORY_EXPIRED] log entries. Until those are implemented, this script
54# will find no matching entries (which is handled gracefully).
55#
56# See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)
57#
58# Expected Rust logging code:
59# tracing::warn!(
60# target: "migration",
61# "[PURGATORY_EXPIRED] repo={} npub={} reason=\"{}\"",
62# identifier, npub, reason
63# );
64#
65# PREREQUISITES:
66# - journalctl (systemd)
67# - grep, awk (standard Unix tools)
68# - Access to systemd journal (may require sudo or journal group membership)
69#
70# RUNTIME: Depends on log volume, typically < 30 seconds
71#
72# SEE ALSO:
73# docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide
74# 30-extract-parse-failures.sh - Companion script for parse failure logs
75#
76
77set -euo pipefail
78
79# Colors for output (disabled if not a terminal)
80if [[ -t 1 ]]; then
81 RED='\033[0;31m'
82 GREEN='\033[0;32m'
83 YELLOW='\033[0;33m'
84 BLUE='\033[0;34m'
85 NC='\033[0m'
86else
87 RED=''
88 GREEN=''
89 YELLOW=''
90 BLUE=''
91 NC=''
92fi
93
94log_info() {
95 echo -e "${BLUE}[INFO]${NC} $*" >&2
96}
97
98log_success() {
99 echo -e "${GREEN}[OK]${NC} $*" >&2
100}
101
102log_warn() {
103 echo -e "${YELLOW}[WARN]${NC} $*" >&2
104}
105
106log_error() {
107 echo -e "${RED}[ERROR]${NC} $*" >&2
108}
109
110usage() {
111 echo "Usage: $0 <service-name> <output-dir> [options]"
112 echo ""
113 echo "Arguments:"
114 echo " service-name Systemd service name (e.g., ngit-grasp.service)"
115 echo " output-dir Directory to store extracted log data"
116 echo ""
117 echo "Options:"
118 echo " --since <date> Start date (default: 30 days ago)"
119 echo " --until <date> End date (default: now)"
120 echo " --dry-run Show what would be extracted without writing"
121 echo ""
122 echo "Examples:"
123 echo " $0 ngit-grasp.service output/logs"
124 echo " $0 ngit-grasp.service output/logs --since '2026-01-01'"
125 echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'"
126 echo ""
127 echo "Expected log format:"
128 echo " [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason=\"...\""
129 exit 1
130}
131
132# Parse a single log line and extract fields
133# Input: log line containing [PURGATORY_EXPIRED]
134# Output: TSV line: repo<TAB>npub<TAB>timestamp<TAB>reason
135parse_log_line() {
136 local line="$1"
137
138 # Extract timestamp from the beginning of the log line
139 # Format: 2026-01-22T10:30:45+0000 or similar ISO format
140 local timestamp repo npub reason
141
142 # Extract ISO timestamp from beginning of line
143 timestamp=$(echo "$line" | grep -oP '^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}' || echo "")
144
145 # Extract repo=VALUE (unquoted identifier)
146 repo=$(echo "$line" | grep -oP 'repo=\K[^ ]+' || echo "")
147
148 # Extract npub=VALUE (npub1... format)
149 npub=$(echo "$line" | grep -oP 'npub=\K[^ ]+' || echo "")
150
151 # Extract reason="VALUE" (quoted string, optional)
152 reason=$(echo "$line" | grep -oP 'reason="\K[^"]*' || echo "")
153
154 # Only output if we have the required fields
155 if [[ -n "$repo" && -n "$npub" ]]; then
156 printf '%s\t%s\t%s\t%s\n' "$repo" "$npub" "$timestamp" "$reason"
157 fi
158}
159
160# Main
161main() {
162 if [[ $# -lt 2 ]]; then
163 usage
164 fi
165
166 local service="$1"
167 local output_dir="$2"
168 shift 2
169
170 # Default time range: last 30 days
171 local since_date
172 since_date=$(date -d "30 days ago" "+%Y-%m-%d" 2>/dev/null || date -v-30d "+%Y-%m-%d" 2>/dev/null || echo "")
173 local until_date=""
174 local dry_run=false
175
176 # Parse options
177 while [[ $# -gt 0 ]]; do
178 case "$1" in
179 --since)
180 since_date="$2"
181 shift 2
182 ;;
183 --until)
184 until_date="$2"
185 shift 2
186 ;;
187 --dry-run)
188 dry_run=true
189 shift
190 ;;
191 *)
192 log_error "Unknown option: $1"
193 usage
194 ;;
195 esac
196 done
197
198 # Validate service name
199 if [[ ! "$service" =~ \.service$ ]]; then
200 service="${service}.service"
201 fi
202
203 log_info "Extracting purgatory expiry events from systemd logs"
204 log_info "Service: $service"
205 log_info "Output: $output_dir"
206 log_info "Time range: ${since_date:-beginning} to ${until_date:-now}"
207
208 # Check if journalctl is available
209 if ! command -v journalctl &> /dev/null; then
210 log_error "journalctl not found. This script requires systemd."
211 exit 1
212 fi
213
214 # Build journalctl command
215 local journal_cmd="journalctl -u $service --no-pager -o short-iso"
216
217 if [[ -n "$since_date" ]]; then
218 journal_cmd="$journal_cmd --since '$since_date'"
219 fi
220
221 if [[ -n "$until_date" ]]; then
222 journal_cmd="$journal_cmd --until '$until_date'"
223 fi
224
225 log_info "Running: $journal_cmd | grep '\\[PURGATORY_EXPIRED\\]'"
226
227 if [[ "$dry_run" == true ]]; then
228 log_info "[DRY RUN] Would extract to: $output_dir/purgatory-expired.txt"
229
230 # Show sample of what would be extracted
231 log_info "Checking for matching log entries..."
232 local sample_count
233 sample_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PURGATORY_EXPIRED\]' || echo "0")
234 sample_count="${sample_count//[^0-9]/}" # Strip non-numeric characters
235 sample_count="${sample_count:-0}"
236 log_info "Found $sample_count matching log entries"
237
238 if [[ "$sample_count" -eq 0 ]]; then
239 log_warn "No [PURGATORY_EXPIRED] entries found in logs."
240 log_warn "This is expected if ngit-grasp logging improvements are not yet deployed."
241 log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)"
242 fi
243
244 exit 0
245 fi
246
247 # Create output directory
248 mkdir -p "$output_dir"
249
250 local output_file="$output_dir/purgatory-expired.txt"
251 local temp_file
252 temp_file=$(mktemp)
253
254 # Extract and parse log entries
255 log_info "Extracting log entries..."
256
257 # Get raw log lines containing [PURGATORY_EXPIRED]
258 local raw_lines
259 raw_lines=$(eval "$journal_cmd" 2>/dev/null | grep '\[PURGATORY_EXPIRED\]' || true)
260
261 if [[ -z "$raw_lines" ]]; then
262 log_warn "No [PURGATORY_EXPIRED] entries found in logs."
263 log_warn ""
264 log_warn "This is expected if ngit-grasp logging improvements are not yet deployed."
265 log_warn "The structured log format required by this script:"
266 log_warn ""
267 log_warn " [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason=\"...\""
268 log_warn ""
269 log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)"
270 log_warn ""
271
272 # Create empty output file with header comment
273 {
274 echo "# Purgatory expiry events extracted from $service"
275 echo "# Time range: ${since_date:-beginning} to ${until_date:-now}"
276 echo "# Extracted: $(date -Iseconds)"
277 echo "# Format: repo<TAB>npub<TAB>timestamp<TAB>reason"
278 echo "#"
279 echo "# NOTE: No [PURGATORY_EXPIRED] entries found."
280 echo "# This is expected if ngit-grasp logging improvements are not yet deployed."
281 } > "$output_file"
282
283 log_info "Created empty output file: $output_file"
284 exit 0
285 fi
286
287 # Write header
288 {
289 echo "# Purgatory expiry events extracted from $service"
290 echo "# Time range: ${since_date:-beginning} to ${until_date:-now}"
291 echo "# Extracted: $(date -Iseconds)"
292 echo "# Format: repo<TAB>npub<TAB>timestamp<TAB>reason"
293 } > "$output_file"
294
295 # Parse each line
296 local count=0
297 while IFS= read -r line; do
298 local parsed
299 parsed=$(parse_log_line "$line")
300 if [[ -n "$parsed" ]]; then
301 echo "$parsed" >> "$output_file"
302 ((count++))
303 fi
304 done <<< "$raw_lines"
305
306 rm -f "$temp_file"
307
308 # Summary
309 echo ""
310 log_info "=== Extraction Summary ==="
311 log_info "Service: $service"
312 log_info "Time range: ${since_date:-beginning} to ${until_date:-now}"
313 log_success "Extracted $count purgatory expiry entries"
314 echo ""
315 log_info "Output file: $output_file"
316
317 if [[ $count -gt 0 ]]; then
318 echo ""
319 log_info "Sample entries (first 5):"
320 tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub timestamp reason; do
321 echo " repo=$repo npub=${npub:0:20}... timestamp=$timestamp"
322 done
323 fi
324
325 # Show unique repos affected
326 if [[ $count -gt 0 ]]; then
327 echo ""
328 local unique_repos
329 unique_repos=$(tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort -u | wc -l)
330 log_info "Unique repositories affected: $unique_repos"
331
332 echo ""
333 log_info "Repositories with purgatory expiry:"
334 tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort | uniq -c | sort -rn | head -10 | while read -r cnt repo; do
335 echo " $repo: $cnt expiry events"
336 done
337
338 local total_repos
339 total_repos=$(tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort -u | wc -l)
340 if [[ $total_repos -gt 10 ]]; then
341 echo " ... and $((total_repos - 10)) more repositories"
342 fi
343 fi
344}
345
346main "$@"