upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh
diff options
context:
space:
mode:
Diffstat (limited to 'docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh')
-rwxr-xr-xdocs/how-to/migration-scripts/31-extract-purgatory-expiry.sh346
1 files changed, 346 insertions, 0 deletions
diff --git a/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh
new file mode 100755
index 0000000..38b2ca3
--- /dev/null
+++ b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh
@@ -0,0 +1,346 @@
1#!/usr/bin/env bash
2#
3# 31-extract-purgatory-expiry.sh - Extract purgatory expiry events from systemd logs
4#
5# PHASE 4b of the ngit-relay to ngit-grasp migration analysis pipeline.
6# Extracts structured [PURGATORY_EXPIRED] log entries from journalctl.
7#
8# USAGE:
9# ./31-extract-purgatory-expiry.sh <service-name> <output-dir> [options]
10#
11# EXAMPLES:
12# # Extract from ngit-grasp service (last 30 days, default)
13# ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs
14#
15# # Extract with custom time range
16# ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs --since "2026-01-01"
17#
18# # Extract from specific time window
19# ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs --since "2026-01-15" --until "2026-01-22"
20#
21# OPTIONS:
22# --since <date> Start date for log extraction (default: 30 days ago)
23# --until <date> End date for log extraction (default: now)
24# --dry-run Show what would be extracted without writing files
25#
26# OUTPUT:
27# <output-dir>/purgatory-expired.txt
28#
29# OUTPUT FORMAT (TSV):
30# repo<TAB>npub<TAB>timestamp<TAB>reason
31#
32# EXPECTED LOG FORMAT:
33# The script looks for structured log entries in this format:
34#
35# 2026-01-22T10:30:45Z ngit-grasp[1234]: [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason="clone URL unreachable after 7 days"
36#
37# Required fields: repo, npub
38# Optional fields: reason (explains why purgatory expired)
39#
40# BACKGROUND:
41# "Purgatory" is the state where ngit-grasp has received an announcement event
42# but cannot yet sync the git data (e.g., clone URL unreachable, git server down).
43# After a configurable timeout (default 7 days), the repository is marked as
44# expired and removed from purgatory.
45#
46# Purgatory expiry during migration analysis indicates repositories that:
47# - Had valid announcements on the production relay
48# - Could not be synced to the archive relay
49# - May need manual intervention or investigation
50#
51# DEPENDENCY:
52# This script requires logging improvements in ngit-grasp to emit structured
53# [PURGATORY_EXPIRED] log entries. Until those are implemented, this script
54# will find no matching entries (which is handled gracefully).
55#
56# See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)
57#
58# Expected Rust logging code:
59# tracing::warn!(
60# target: "migration",
61# "[PURGATORY_EXPIRED] repo={} npub={} reason=\"{}\"",
62# identifier, npub, reason
63# );
64#
65# PREREQUISITES:
66# - journalctl (systemd)
67# - grep, awk (standard Unix tools)
68# - Access to systemd journal (may require sudo or journal group membership)
69#
70# RUNTIME: Depends on log volume, typically < 30 seconds
71#
72# SEE ALSO:
73# docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide
74# 30-extract-parse-failures.sh - Companion script for parse failure logs
75#
76
77set -euo pipefail
78
79# Colors for output (disabled if not a terminal)
80if [[ -t 1 ]]; then
81 RED='\033[0;31m'
82 GREEN='\033[0;32m'
83 YELLOW='\033[0;33m'
84 BLUE='\033[0;34m'
85 NC='\033[0m'
86else
87 RED=''
88 GREEN=''
89 YELLOW=''
90 BLUE=''
91 NC=''
92fi
93
94log_info() {
95 echo -e "${BLUE}[INFO]${NC} $*" >&2
96}
97
98log_success() {
99 echo -e "${GREEN}[OK]${NC} $*" >&2
100}
101
102log_warn() {
103 echo -e "${YELLOW}[WARN]${NC} $*" >&2
104}
105
106log_error() {
107 echo -e "${RED}[ERROR]${NC} $*" >&2
108}
109
110usage() {
111 echo "Usage: $0 <service-name> <output-dir> [options]"
112 echo ""
113 echo "Arguments:"
114 echo " service-name Systemd service name (e.g., ngit-grasp.service)"
115 echo " output-dir Directory to store extracted log data"
116 echo ""
117 echo "Options:"
118 echo " --since <date> Start date (default: 30 days ago)"
119 echo " --until <date> End date (default: now)"
120 echo " --dry-run Show what would be extracted without writing"
121 echo ""
122 echo "Examples:"
123 echo " $0 ngit-grasp.service output/logs"
124 echo " $0 ngit-grasp.service output/logs --since '2026-01-01'"
125 echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'"
126 echo ""
127 echo "Expected log format:"
128 echo " [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason=\"...\""
129 exit 1
130}
131
132# Parse a single log line and extract fields
133# Input: log line containing [PURGATORY_EXPIRED]
134# Output: TSV line: repo<TAB>npub<TAB>timestamp<TAB>reason
135parse_log_line() {
136 local line="$1"
137
138 # Extract timestamp from the beginning of the log line
139 # Format: 2026-01-22T10:30:45+0000 or similar ISO format
140 local timestamp repo npub reason
141
142 # Extract ISO timestamp from beginning of line
143 timestamp=$(echo "$line" | grep -oP '^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}' || echo "")
144
145 # Extract repo=VALUE (unquoted identifier)
146 repo=$(echo "$line" | grep -oP 'repo=\K[^ ]+' || echo "")
147
148 # Extract npub=VALUE (npub1... format)
149 npub=$(echo "$line" | grep -oP 'npub=\K[^ ]+' || echo "")
150
151 # Extract reason="VALUE" (quoted string, optional)
152 reason=$(echo "$line" | grep -oP 'reason="\K[^"]*' || echo "")
153
154 # Only output if we have the required fields
155 if [[ -n "$repo" && -n "$npub" ]]; then
156 printf '%s\t%s\t%s\t%s\n' "$repo" "$npub" "$timestamp" "$reason"
157 fi
158}
159
160# Main
161main() {
162 if [[ $# -lt 2 ]]; then
163 usage
164 fi
165
166 local service="$1"
167 local output_dir="$2"
168 shift 2
169
170 # Default time range: last 30 days
171 local since_date
172 since_date=$(date -d "30 days ago" "+%Y-%m-%d" 2>/dev/null || date -v-30d "+%Y-%m-%d" 2>/dev/null || echo "")
173 local until_date=""
174 local dry_run=false
175
176 # Parse options
177 while [[ $# -gt 0 ]]; do
178 case "$1" in
179 --since)
180 since_date="$2"
181 shift 2
182 ;;
183 --until)
184 until_date="$2"
185 shift 2
186 ;;
187 --dry-run)
188 dry_run=true
189 shift
190 ;;
191 *)
192 log_error "Unknown option: $1"
193 usage
194 ;;
195 esac
196 done
197
198 # Validate service name
199 if [[ ! "$service" =~ \.service$ ]]; then
200 service="${service}.service"
201 fi
202
203 log_info "Extracting purgatory expiry events from systemd logs"
204 log_info "Service: $service"
205 log_info "Output: $output_dir"
206 log_info "Time range: ${since_date:-beginning} to ${until_date:-now}"
207
208 # Check if journalctl is available
209 if ! command -v journalctl &> /dev/null; then
210 log_error "journalctl not found. This script requires systemd."
211 exit 1
212 fi
213
214 # Build journalctl command
215 local journal_cmd="journalctl -u $service --no-pager -o short-iso"
216
217 if [[ -n "$since_date" ]]; then
218 journal_cmd="$journal_cmd --since '$since_date'"
219 fi
220
221 if [[ -n "$until_date" ]]; then
222 journal_cmd="$journal_cmd --until '$until_date'"
223 fi
224
225 log_info "Running: $journal_cmd | grep '\\[PURGATORY_EXPIRED\\]'"
226
227 if [[ "$dry_run" == true ]]; then
228 log_info "[DRY RUN] Would extract to: $output_dir/purgatory-expired.txt"
229
230 # Show sample of what would be extracted
231 log_info "Checking for matching log entries..."
232 local sample_count
233 sample_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PURGATORY_EXPIRED\]' || echo "0")
234 sample_count="${sample_count//[^0-9]/}" # Strip non-numeric characters
235 sample_count="${sample_count:-0}"
236 log_info "Found $sample_count matching log entries"
237
238 if [[ "$sample_count" -eq 0 ]]; then
239 log_warn "No [PURGATORY_EXPIRED] entries found in logs."
240 log_warn "This is expected if ngit-grasp logging improvements are not yet deployed."
241 log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)"
242 fi
243
244 exit 0
245 fi
246
247 # Create output directory
248 mkdir -p "$output_dir"
249
250 local output_file="$output_dir/purgatory-expired.txt"
251 local temp_file
252 temp_file=$(mktemp)
253
254 # Extract and parse log entries
255 log_info "Extracting log entries..."
256
257 # Get raw log lines containing [PURGATORY_EXPIRED]
258 local raw_lines
259 raw_lines=$(eval "$journal_cmd" 2>/dev/null | grep '\[PURGATORY_EXPIRED\]' || true)
260
261 if [[ -z "$raw_lines" ]]; then
262 log_warn "No [PURGATORY_EXPIRED] entries found in logs."
263 log_warn ""
264 log_warn "This is expected if ngit-grasp logging improvements are not yet deployed."
265 log_warn "The structured log format required by this script:"
266 log_warn ""
267 log_warn " [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason=\"...\""
268 log_warn ""
269 log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)"
270 log_warn ""
271
272 # Create empty output file with header comment
273 {
274 echo "# Purgatory expiry events extracted from $service"
275 echo "# Time range: ${since_date:-beginning} to ${until_date:-now}"
276 echo "# Extracted: $(date -Iseconds)"
277 echo "# Format: repo<TAB>npub<TAB>timestamp<TAB>reason"
278 echo "#"
279 echo "# NOTE: No [PURGATORY_EXPIRED] entries found."
280 echo "# This is expected if ngit-grasp logging improvements are not yet deployed."
281 } > "$output_file"
282
283 log_info "Created empty output file: $output_file"
284 exit 0
285 fi
286
287 # Write header
288 {
289 echo "# Purgatory expiry events extracted from $service"
290 echo "# Time range: ${since_date:-beginning} to ${until_date:-now}"
291 echo "# Extracted: $(date -Iseconds)"
292 echo "# Format: repo<TAB>npub<TAB>timestamp<TAB>reason"
293 } > "$output_file"
294
295 # Parse each line
296 local count=0
297 while IFS= read -r line; do
298 local parsed
299 parsed=$(parse_log_line "$line")
300 if [[ -n "$parsed" ]]; then
301 echo "$parsed" >> "$output_file"
302 ((count++))
303 fi
304 done <<< "$raw_lines"
305
306 rm -f "$temp_file"
307
308 # Summary
309 echo ""
310 log_info "=== Extraction Summary ==="
311 log_info "Service: $service"
312 log_info "Time range: ${since_date:-beginning} to ${until_date:-now}"
313 log_success "Extracted $count purgatory expiry entries"
314 echo ""
315 log_info "Output file: $output_file"
316
317 if [[ $count -gt 0 ]]; then
318 echo ""
319 log_info "Sample entries (first 5):"
320 tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub timestamp reason; do
321 echo " repo=$repo npub=${npub:0:20}... timestamp=$timestamp"
322 done
323 fi
324
325 # Show unique repos affected
326 if [[ $count -gt 0 ]]; then
327 echo ""
328 local unique_repos
329 unique_repos=$(tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort -u | wc -l)
330 log_info "Unique repositories affected: $unique_repos"
331
332 echo ""
333 log_info "Repositories with purgatory expiry:"
334 tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort | uniq -c | sort -rn | head -10 | while read -r cnt repo; do
335 echo " $repo: $cnt expiry events"
336 done
337
338 local total_repos
339 total_repos=$(tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort -u | wc -l)
340 if [[ $total_repos -gt 10 ]]; then
341 echo " ... and $((total_repos - 10)) more repositories"
342 fi
343 fi
344}
345
346main "$@"