diff options
| author | DanConwayDev <DanConwayDev@protonmail.com> | 2026-01-23 11:16:50 +0000 |
|---|---|---|
| committer | DanConwayDev <DanConwayDev@protonmail.com> | 2026-01-27 20:37:57 +0000 |
| commit | 28cc7820953efeafb2bc4d41ebcf3d682da86711 (patch) | |
| tree | 36de05f70acf0a46d57b19e851ff4b95af4919fd /docs/how-to/migration-scripts | |
| parent | 800dbfaa82428b897e271d0eb5d9e4c0f107f80b (diff) | |
Add Phase 4 migration scripts for log extraction
- 30-extract-parse-failures.sh: Extracts parse failure events from logs
- 31-extract-purgatory-expiry.sh: Extracts purgatory expiry events from logs
- Both support time range filtering (--since, --until)
- Includes dry-run mode for testing
- Gracefully handles missing logs with dependency notes
- TSV output format for Phase 5 consumption
- Ready for when structured logging is implemented in ngit-grasp
Diffstat (limited to 'docs/how-to/migration-scripts')
| -rwxr-xr-x | docs/how-to/migration-scripts/30-extract-parse-failures.sh | 328 | ||||
| -rwxr-xr-x | docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh | 346 |
2 files changed, 674 insertions, 0 deletions
diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh new file mode 100755 index 0000000..753fd3e --- /dev/null +++ b/docs/how-to/migration-scripts/30-extract-parse-failures.sh | |||
| @@ -0,0 +1,328 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | # | ||
| 3 | # 30-extract-parse-failures.sh - Extract parse failure events from systemd logs | ||
| 4 | # | ||
| 5 | # PHASE 4a of the ngit-relay to ngit-grasp migration analysis pipeline. | ||
| 6 | # Extracts structured [PARSE_FAIL] log entries from journalctl. | ||
| 7 | # | ||
| 8 | # USAGE: | ||
| 9 | # ./30-extract-parse-failures.sh <service-name> <output-dir> [options] | ||
| 10 | # | ||
| 11 | # EXAMPLES: | ||
| 12 | # # Extract from ngit-grasp service (last 30 days, default) | ||
| 13 | # ./30-extract-parse-failures.sh ngit-grasp.service output/logs | ||
| 14 | # | ||
| 15 | # # Extract with custom time range | ||
| 16 | # ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-01" | ||
| 17 | # | ||
| 18 | # # Extract from specific time window | ||
| 19 | # ./30-extract-parse-failures.sh ngit-grasp.service output/logs --since "2026-01-15" --until "2026-01-22" | ||
| 20 | # | ||
| 21 | # OPTIONS: | ||
| 22 | # --since <date> Start date for log extraction (default: 30 days ago) | ||
| 23 | # --until <date> End date for log extraction (default: now) | ||
| 24 | # --dry-run Show what would be extracted without writing files | ||
| 25 | # | ||
| 26 | # OUTPUT: | ||
| 27 | # <output-dir>/parse-failures.txt | ||
| 28 | # | ||
| 29 | # OUTPUT FORMAT (TSV): | ||
| 30 | # repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason | ||
| 31 | # | ||
| 32 | # EXPECTED LOG FORMAT: | ||
| 33 | # The script looks for structured log entries in this format: | ||
| 34 | # | ||
| 35 | # 2026-01-22T10:30:45Z ngit-grasp[1234]: [PARSE_FAIL] kind=30618 event_id=abc123... reason="invalid refs format" repo=myrepo npub=npub1... | ||
| 36 | # | ||
| 37 | # Required fields: kind, event_id, reason | ||
| 38 | # Optional fields: repo, npub (may not be available if parsing failed early) | ||
| 39 | # | ||
| 40 | # DEPENDENCY: | ||
| 41 | # This script requires logging improvements in ngit-grasp to emit structured | ||
| 42 | # [PARSE_FAIL] log entries. Until those are implemented, this script will | ||
| 43 | # find no matching entries (which is handled gracefully). | ||
| 44 | # | ||
| 45 | # See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section) | ||
| 46 | # | ||
| 47 | # Expected Rust logging code: | ||
| 48 | # tracing::warn!( | ||
| 49 | # target: "migration", | ||
| 50 | # "[PARSE_FAIL] kind={} event_id={} reason=\"{}\" repo={} npub={}", | ||
| 51 | # event.kind, event.id, reason, identifier, npub | ||
| 52 | # ); | ||
| 53 | # | ||
| 54 | # PREREQUISITES: | ||
| 55 | # - journalctl (systemd) | ||
| 56 | # - grep, awk (standard Unix tools) | ||
| 57 | # - Access to systemd journal (may require sudo or journal group membership) | ||
| 58 | # | ||
| 59 | # RUNTIME: Depends on log volume, typically < 30 seconds | ||
| 60 | # | ||
| 61 | # SEE ALSO: | ||
| 62 | # docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide | ||
| 63 | # 31-extract-purgatory-expiry.sh - Companion script for purgatory expiry logs | ||
| 64 | # | ||
| 65 | |||
| 66 | set -euo pipefail | ||
| 67 | |||
| 68 | # Colors for output (disabled if not a terminal) | ||
| 69 | if [[ -t 1 ]]; then | ||
| 70 | RED='\033[0;31m' | ||
| 71 | GREEN='\033[0;32m' | ||
| 72 | YELLOW='\033[0;33m' | ||
| 73 | BLUE='\033[0;34m' | ||
| 74 | NC='\033[0m' | ||
| 75 | else | ||
| 76 | RED='' | ||
| 77 | GREEN='' | ||
| 78 | YELLOW='' | ||
| 79 | BLUE='' | ||
| 80 | NC='' | ||
| 81 | fi | ||
| 82 | |||
| 83 | log_info() { | ||
| 84 | echo -e "${BLUE}[INFO]${NC} $*" >&2 | ||
| 85 | } | ||
| 86 | |||
| 87 | log_success() { | ||
| 88 | echo -e "${GREEN}[OK]${NC} $*" >&2 | ||
| 89 | } | ||
| 90 | |||
| 91 | log_warn() { | ||
| 92 | echo -e "${YELLOW}[WARN]${NC} $*" >&2 | ||
| 93 | } | ||
| 94 | |||
| 95 | log_error() { | ||
| 96 | echo -e "${RED}[ERROR]${NC} $*" >&2 | ||
| 97 | } | ||
| 98 | |||
| 99 | usage() { | ||
| 100 | echo "Usage: $0 <service-name> <output-dir> [options]" | ||
| 101 | echo "" | ||
| 102 | echo "Arguments:" | ||
| 103 | echo " service-name Systemd service name (e.g., ngit-grasp.service)" | ||
| 104 | echo " output-dir Directory to store extracted log data" | ||
| 105 | echo "" | ||
| 106 | echo "Options:" | ||
| 107 | echo " --since <date> Start date (default: 30 days ago)" | ||
| 108 | echo " --until <date> End date (default: now)" | ||
| 109 | echo " --dry-run Show what would be extracted without writing" | ||
| 110 | echo "" | ||
| 111 | echo "Examples:" | ||
| 112 | echo " $0 ngit-grasp.service output/logs" | ||
| 113 | echo " $0 ngit-grasp.service output/logs --since '2026-01-01'" | ||
| 114 | echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'" | ||
| 115 | echo "" | ||
| 116 | echo "Expected log format:" | ||
| 117 | echo " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." | ||
| 118 | exit 1 | ||
| 119 | } | ||
| 120 | |||
| 121 | # Parse a single log line and extract fields | ||
| 122 | # Input: log line containing [PARSE_FAIL] | ||
| 123 | # Output: TSV line: repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason | ||
| 124 | parse_log_line() { | ||
| 125 | local line="$1" | ||
| 126 | |||
| 127 | # Extract fields using grep -oP (Perl regex) or awk | ||
| 128 | # Fields: kind, event_id, reason, repo (optional), npub (optional) | ||
| 129 | |||
| 130 | local kind event_id reason repo npub | ||
| 131 | |||
| 132 | # Extract kind=VALUE | ||
| 133 | kind=$(echo "$line" | grep -oP 'kind=\K[0-9]+' || echo "") | ||
| 134 | |||
| 135 | # Extract event_id=VALUE (hex string, possibly truncated with ...) | ||
| 136 | event_id=$(echo "$line" | grep -oP 'event_id=\K[a-f0-9]+' || echo "") | ||
| 137 | |||
| 138 | # Extract reason="VALUE" (quoted string) | ||
| 139 | reason=$(echo "$line" | grep -oP 'reason="\K[^"]*' || echo "") | ||
| 140 | |||
| 141 | # Extract repo=VALUE (optional, unquoted identifier) | ||
| 142 | repo=$(echo "$line" | grep -oP 'repo=\K[^ ]+' || echo "") | ||
| 143 | |||
| 144 | # Extract npub=VALUE (optional, npub1... format) | ||
| 145 | npub=$(echo "$line" | grep -oP 'npub=\K[^ ]+' || echo "") | ||
| 146 | |||
| 147 | # Only output if we have the required fields | ||
| 148 | if [[ -n "$kind" && -n "$event_id" && -n "$reason" ]]; then | ||
| 149 | printf '%s\t%s\t%s\t%s\t%s\n' "$repo" "$npub" "$kind" "$event_id" "$reason" | ||
| 150 | fi | ||
| 151 | } | ||
| 152 | |||
| 153 | # Main | ||
| 154 | main() { | ||
| 155 | if [[ $# -lt 2 ]]; then | ||
| 156 | usage | ||
| 157 | fi | ||
| 158 | |||
| 159 | local service="$1" | ||
| 160 | local output_dir="$2" | ||
| 161 | shift 2 | ||
| 162 | |||
| 163 | # Default time range: last 30 days | ||
| 164 | local since_date | ||
| 165 | since_date=$(date -d "30 days ago" "+%Y-%m-%d" 2>/dev/null || date -v-30d "+%Y-%m-%d" 2>/dev/null || echo "") | ||
| 166 | local until_date="" | ||
| 167 | local dry_run=false | ||
| 168 | |||
| 169 | # Parse options | ||
| 170 | while [[ $# -gt 0 ]]; do | ||
| 171 | case "$1" in | ||
| 172 | --since) | ||
| 173 | since_date="$2" | ||
| 174 | shift 2 | ||
| 175 | ;; | ||
| 176 | --until) | ||
| 177 | until_date="$2" | ||
| 178 | shift 2 | ||
| 179 | ;; | ||
| 180 | --dry-run) | ||
| 181 | dry_run=true | ||
| 182 | shift | ||
| 183 | ;; | ||
| 184 | *) | ||
| 185 | log_error "Unknown option: $1" | ||
| 186 | usage | ||
| 187 | ;; | ||
| 188 | esac | ||
| 189 | done | ||
| 190 | |||
| 191 | # Validate service name | ||
| 192 | if [[ ! "$service" =~ \.service$ ]]; then | ||
| 193 | service="${service}.service" | ||
| 194 | fi | ||
| 195 | |||
| 196 | log_info "Extracting parse failures from systemd logs" | ||
| 197 | log_info "Service: $service" | ||
| 198 | log_info "Output: $output_dir" | ||
| 199 | log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" | ||
| 200 | |||
| 201 | # Check if journalctl is available | ||
| 202 | if ! command -v journalctl &> /dev/null; then | ||
| 203 | log_error "journalctl not found. This script requires systemd." | ||
| 204 | exit 1 | ||
| 205 | fi | ||
| 206 | |||
| 207 | # Build journalctl command | ||
| 208 | local journal_cmd="journalctl -u $service --no-pager -o short-iso" | ||
| 209 | |||
| 210 | if [[ -n "$since_date" ]]; then | ||
| 211 | journal_cmd="$journal_cmd --since '$since_date'" | ||
| 212 | fi | ||
| 213 | |||
| 214 | if [[ -n "$until_date" ]]; then | ||
| 215 | journal_cmd="$journal_cmd --until '$until_date'" | ||
| 216 | fi | ||
| 217 | |||
| 218 | log_info "Running: $journal_cmd | grep '\\[PARSE_FAIL\\]'" | ||
| 219 | |||
| 220 | if [[ "$dry_run" == true ]]; then | ||
| 221 | log_info "[DRY RUN] Would extract to: $output_dir/parse-failures.txt" | ||
| 222 | |||
| 223 | # Show sample of what would be extracted | ||
| 224 | log_info "Checking for matching log entries..." | ||
| 225 | local sample_count | ||
| 226 | sample_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0") | ||
| 227 | sample_count="${sample_count//[^0-9]/}" # Strip non-numeric characters | ||
| 228 | sample_count="${sample_count:-0}" | ||
| 229 | log_info "Found $sample_count matching log entries" | ||
| 230 | |||
| 231 | if [[ "$sample_count" -eq 0 ]]; then | ||
| 232 | log_warn "No [PARSE_FAIL] entries found in logs." | ||
| 233 | log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." | ||
| 234 | log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)" | ||
| 235 | fi | ||
| 236 | |||
| 237 | exit 0 | ||
| 238 | fi | ||
| 239 | |||
| 240 | # Create output directory | ||
| 241 | mkdir -p "$output_dir" | ||
| 242 | |||
| 243 | local output_file="$output_dir/parse-failures.txt" | ||
| 244 | local temp_file | ||
| 245 | temp_file=$(mktemp) | ||
| 246 | |||
| 247 | # Extract and parse log entries | ||
| 248 | log_info "Extracting log entries..." | ||
| 249 | |||
| 250 | # Get raw log lines containing [PARSE_FAIL] | ||
| 251 | local raw_lines | ||
| 252 | raw_lines=$(eval "$journal_cmd" 2>/dev/null | grep '\[PARSE_FAIL\]' || true) | ||
| 253 | |||
| 254 | if [[ -z "$raw_lines" ]]; then | ||
| 255 | log_warn "No [PARSE_FAIL] entries found in logs." | ||
| 256 | log_warn "" | ||
| 257 | log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." | ||
| 258 | log_warn "The structured log format required by this script:" | ||
| 259 | log_warn "" | ||
| 260 | log_warn " [PARSE_FAIL] kind=30618 event_id=abc123 reason=\"...\" repo=myrepo npub=npub1..." | ||
| 261 | log_warn "" | ||
| 262 | log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)" | ||
| 263 | log_warn "" | ||
| 264 | |||
| 265 | # Create empty output file with header comment | ||
| 266 | { | ||
| 267 | echo "# Parse failures extracted from $service" | ||
| 268 | echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" | ||
| 269 | echo "# Extracted: $(date -Iseconds)" | ||
| 270 | echo "# Format: repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason" | ||
| 271 | echo "#" | ||
| 272 | echo "# NOTE: No [PARSE_FAIL] entries found." | ||
| 273 | echo "# This is expected if ngit-grasp logging improvements are not yet deployed." | ||
| 274 | } > "$output_file" | ||
| 275 | |||
| 276 | log_info "Created empty output file: $output_file" | ||
| 277 | exit 0 | ||
| 278 | fi | ||
| 279 | |||
| 280 | # Write header | ||
| 281 | { | ||
| 282 | echo "# Parse failures extracted from $service" | ||
| 283 | echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" | ||
| 284 | echo "# Extracted: $(date -Iseconds)" | ||
| 285 | echo "# Format: repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason" | ||
| 286 | } > "$output_file" | ||
| 287 | |||
| 288 | # Parse each line | ||
| 289 | local count=0 | ||
| 290 | while IFS= read -r line; do | ||
| 291 | local parsed | ||
| 292 | parsed=$(parse_log_line "$line") | ||
| 293 | if [[ -n "$parsed" ]]; then | ||
| 294 | echo "$parsed" >> "$output_file" | ||
| 295 | ((count++)) | ||
| 296 | fi | ||
| 297 | done <<< "$raw_lines" | ||
| 298 | |||
| 299 | rm -f "$temp_file" | ||
| 300 | |||
| 301 | # Summary | ||
| 302 | echo "" | ||
| 303 | log_info "=== Extraction Summary ===" | ||
| 304 | log_info "Service: $service" | ||
| 305 | log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" | ||
| 306 | log_success "Extracted $count parse failure entries" | ||
| 307 | echo "" | ||
| 308 | log_info "Output file: $output_file" | ||
| 309 | |||
| 310 | if [[ $count -gt 0 ]]; then | ||
| 311 | echo "" | ||
| 312 | log_info "Sample entries (first 5):" | ||
| 313 | tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub kind event_id reason; do | ||
| 314 | echo " kind=$kind repo=$repo reason=\"$reason\"" | ||
| 315 | done | ||
| 316 | fi | ||
| 317 | |||
| 318 | # Breakdown by kind | ||
| 319 | if [[ $count -gt 0 ]]; then | ||
| 320 | echo "" | ||
| 321 | log_info "Breakdown by event kind:" | ||
| 322 | tail -n +5 "$output_file" | awk -F'\t' '{print $3}' | sort | uniq -c | sort -rn | while read -r cnt kind; do | ||
| 323 | echo " kind $kind: $cnt failures" | ||
| 324 | done | ||
| 325 | fi | ||
| 326 | } | ||
| 327 | |||
| 328 | main "$@" | ||
diff --git a/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh new file mode 100755 index 0000000..38b2ca3 --- /dev/null +++ b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh | |||
| @@ -0,0 +1,346 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | # | ||
| 3 | # 31-extract-purgatory-expiry.sh - Extract purgatory expiry events from systemd logs | ||
| 4 | # | ||
| 5 | # PHASE 4b of the ngit-relay to ngit-grasp migration analysis pipeline. | ||
| 6 | # Extracts structured [PURGATORY_EXPIRED] log entries from journalctl. | ||
| 7 | # | ||
| 8 | # USAGE: | ||
| 9 | # ./31-extract-purgatory-expiry.sh <service-name> <output-dir> [options] | ||
| 10 | # | ||
| 11 | # EXAMPLES: | ||
| 12 | # # Extract from ngit-grasp service (last 30 days, default) | ||
| 13 | # ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs | ||
| 14 | # | ||
| 15 | # # Extract with custom time range | ||
| 16 | # ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs --since "2026-01-01" | ||
| 17 | # | ||
| 18 | # # Extract from specific time window | ||
| 19 | # ./31-extract-purgatory-expiry.sh ngit-grasp.service output/logs --since "2026-01-15" --until "2026-01-22" | ||
| 20 | # | ||
| 21 | # OPTIONS: | ||
| 22 | # --since <date> Start date for log extraction (default: 30 days ago) | ||
| 23 | # --until <date> End date for log extraction (default: now) | ||
| 24 | # --dry-run Show what would be extracted without writing files | ||
| 25 | # | ||
| 26 | # OUTPUT: | ||
| 27 | # <output-dir>/purgatory-expired.txt | ||
| 28 | # | ||
| 29 | # OUTPUT FORMAT (TSV): | ||
| 30 | # repo<TAB>npub<TAB>timestamp<TAB>reason | ||
| 31 | # | ||
| 32 | # EXPECTED LOG FORMAT: | ||
| 33 | # The script looks for structured log entries in this format: | ||
| 34 | # | ||
| 35 | # 2026-01-22T10:30:45Z ngit-grasp[1234]: [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason="clone URL unreachable after 7 days" | ||
| 36 | # | ||
| 37 | # Required fields: repo, npub | ||
| 38 | # Optional fields: reason (explains why purgatory expired) | ||
| 39 | # | ||
| 40 | # BACKGROUND: | ||
| 41 | # "Purgatory" is the state where ngit-grasp has received an announcement event | ||
| 42 | # but cannot yet sync the git data (e.g., clone URL unreachable, git server down). | ||
| 43 | # After a configurable timeout (default 7 days), the repository is marked as | ||
| 44 | # expired and removed from purgatory. | ||
| 45 | # | ||
| 46 | # Purgatory expiry during migration analysis indicates repositories that: | ||
| 47 | # - Had valid announcements on the production relay | ||
| 48 | # - Could not be synced to the archive relay | ||
| 49 | # - May need manual intervention or investigation | ||
| 50 | # | ||
| 51 | # DEPENDENCY: | ||
| 52 | # This script requires logging improvements in ngit-grasp to emit structured | ||
| 53 | # [PURGATORY_EXPIRED] log entries. Until those are implemented, this script | ||
| 54 | # will find no matching entries (which is handled gracefully). | ||
| 55 | # | ||
| 56 | # See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section) | ||
| 57 | # | ||
| 58 | # Expected Rust logging code: | ||
| 59 | # tracing::warn!( | ||
| 60 | # target: "migration", | ||
| 61 | # "[PURGATORY_EXPIRED] repo={} npub={} reason=\"{}\"", | ||
| 62 | # identifier, npub, reason | ||
| 63 | # ); | ||
| 64 | # | ||
| 65 | # PREREQUISITES: | ||
| 66 | # - journalctl (systemd) | ||
| 67 | # - grep, awk (standard Unix tools) | ||
| 68 | # - Access to systemd journal (may require sudo or journal group membership) | ||
| 69 | # | ||
| 70 | # RUNTIME: Depends on log volume, typically < 30 seconds | ||
| 71 | # | ||
| 72 | # SEE ALSO: | ||
| 73 | # docs/how-to/migrate-ngit-relay-to-ngit-grasp.md - Full migration guide | ||
| 74 | # 30-extract-parse-failures.sh - Companion script for parse failure logs | ||
| 75 | # | ||
| 76 | |||
| 77 | set -euo pipefail | ||
| 78 | |||
| 79 | # Colors for output (disabled if not a terminal) | ||
| 80 | if [[ -t 1 ]]; then | ||
| 81 | RED='\033[0;31m' | ||
| 82 | GREEN='\033[0;32m' | ||
| 83 | YELLOW='\033[0;33m' | ||
| 84 | BLUE='\033[0;34m' | ||
| 85 | NC='\033[0m' | ||
| 86 | else | ||
| 87 | RED='' | ||
| 88 | GREEN='' | ||
| 89 | YELLOW='' | ||
| 90 | BLUE='' | ||
| 91 | NC='' | ||
| 92 | fi | ||
| 93 | |||
| 94 | log_info() { | ||
| 95 | echo -e "${BLUE}[INFO]${NC} $*" >&2 | ||
| 96 | } | ||
| 97 | |||
| 98 | log_success() { | ||
| 99 | echo -e "${GREEN}[OK]${NC} $*" >&2 | ||
| 100 | } | ||
| 101 | |||
| 102 | log_warn() { | ||
| 103 | echo -e "${YELLOW}[WARN]${NC} $*" >&2 | ||
| 104 | } | ||
| 105 | |||
| 106 | log_error() { | ||
| 107 | echo -e "${RED}[ERROR]${NC} $*" >&2 | ||
| 108 | } | ||
| 109 | |||
| 110 | usage() { | ||
| 111 | echo "Usage: $0 <service-name> <output-dir> [options]" | ||
| 112 | echo "" | ||
| 113 | echo "Arguments:" | ||
| 114 | echo " service-name Systemd service name (e.g., ngit-grasp.service)" | ||
| 115 | echo " output-dir Directory to store extracted log data" | ||
| 116 | echo "" | ||
| 117 | echo "Options:" | ||
| 118 | echo " --since <date> Start date (default: 30 days ago)" | ||
| 119 | echo " --until <date> End date (default: now)" | ||
| 120 | echo " --dry-run Show what would be extracted without writing" | ||
| 121 | echo "" | ||
| 122 | echo "Examples:" | ||
| 123 | echo " $0 ngit-grasp.service output/logs" | ||
| 124 | echo " $0 ngit-grasp.service output/logs --since '2026-01-01'" | ||
| 125 | echo " $0 ngit-grasp.service output/logs --since '2026-01-15' --until '2026-01-22'" | ||
| 126 | echo "" | ||
| 127 | echo "Expected log format:" | ||
| 128 | echo " [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason=\"...\"" | ||
| 129 | exit 1 | ||
| 130 | } | ||
| 131 | |||
| 132 | # Parse a single log line and extract fields | ||
| 133 | # Input: log line containing [PURGATORY_EXPIRED] | ||
| 134 | # Output: TSV line: repo<TAB>npub<TAB>timestamp<TAB>reason | ||
| 135 | parse_log_line() { | ||
| 136 | local line="$1" | ||
| 137 | |||
| 138 | # Extract timestamp from the beginning of the log line | ||
| 139 | # Format: 2026-01-22T10:30:45+0000 or similar ISO format | ||
| 140 | local timestamp repo npub reason | ||
| 141 | |||
| 142 | # Extract ISO timestamp from beginning of line | ||
| 143 | timestamp=$(echo "$line" | grep -oP '^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}' || echo "") | ||
| 144 | |||
| 145 | # Extract repo=VALUE (unquoted identifier) | ||
| 146 | repo=$(echo "$line" | grep -oP 'repo=\K[^ ]+' || echo "") | ||
| 147 | |||
| 148 | # Extract npub=VALUE (npub1... format) | ||
| 149 | npub=$(echo "$line" | grep -oP 'npub=\K[^ ]+' || echo "") | ||
| 150 | |||
| 151 | # Extract reason="VALUE" (quoted string, optional) | ||
| 152 | reason=$(echo "$line" | grep -oP 'reason="\K[^"]*' || echo "") | ||
| 153 | |||
| 154 | # Only output if we have the required fields | ||
| 155 | if [[ -n "$repo" && -n "$npub" ]]; then | ||
| 156 | printf '%s\t%s\t%s\t%s\n' "$repo" "$npub" "$timestamp" "$reason" | ||
| 157 | fi | ||
| 158 | } | ||
| 159 | |||
| 160 | # Main | ||
| 161 | main() { | ||
| 162 | if [[ $# -lt 2 ]]; then | ||
| 163 | usage | ||
| 164 | fi | ||
| 165 | |||
| 166 | local service="$1" | ||
| 167 | local output_dir="$2" | ||
| 168 | shift 2 | ||
| 169 | |||
| 170 | # Default time range: last 30 days | ||
| 171 | local since_date | ||
| 172 | since_date=$(date -d "30 days ago" "+%Y-%m-%d" 2>/dev/null || date -v-30d "+%Y-%m-%d" 2>/dev/null || echo "") | ||
| 173 | local until_date="" | ||
| 174 | local dry_run=false | ||
| 175 | |||
| 176 | # Parse options | ||
| 177 | while [[ $# -gt 0 ]]; do | ||
| 178 | case "$1" in | ||
| 179 | --since) | ||
| 180 | since_date="$2" | ||
| 181 | shift 2 | ||
| 182 | ;; | ||
| 183 | --until) | ||
| 184 | until_date="$2" | ||
| 185 | shift 2 | ||
| 186 | ;; | ||
| 187 | --dry-run) | ||
| 188 | dry_run=true | ||
| 189 | shift | ||
| 190 | ;; | ||
| 191 | *) | ||
| 192 | log_error "Unknown option: $1" | ||
| 193 | usage | ||
| 194 | ;; | ||
| 195 | esac | ||
| 196 | done | ||
| 197 | |||
| 198 | # Validate service name | ||
| 199 | if [[ ! "$service" =~ \.service$ ]]; then | ||
| 200 | service="${service}.service" | ||
| 201 | fi | ||
| 202 | |||
| 203 | log_info "Extracting purgatory expiry events from systemd logs" | ||
| 204 | log_info "Service: $service" | ||
| 205 | log_info "Output: $output_dir" | ||
| 206 | log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" | ||
| 207 | |||
| 208 | # Check if journalctl is available | ||
| 209 | if ! command -v journalctl &> /dev/null; then | ||
| 210 | log_error "journalctl not found. This script requires systemd." | ||
| 211 | exit 1 | ||
| 212 | fi | ||
| 213 | |||
| 214 | # Build journalctl command | ||
| 215 | local journal_cmd="journalctl -u $service --no-pager -o short-iso" | ||
| 216 | |||
| 217 | if [[ -n "$since_date" ]]; then | ||
| 218 | journal_cmd="$journal_cmd --since '$since_date'" | ||
| 219 | fi | ||
| 220 | |||
| 221 | if [[ -n "$until_date" ]]; then | ||
| 222 | journal_cmd="$journal_cmd --until '$until_date'" | ||
| 223 | fi | ||
| 224 | |||
| 225 | log_info "Running: $journal_cmd | grep '\\[PURGATORY_EXPIRED\\]'" | ||
| 226 | |||
| 227 | if [[ "$dry_run" == true ]]; then | ||
| 228 | log_info "[DRY RUN] Would extract to: $output_dir/purgatory-expired.txt" | ||
| 229 | |||
| 230 | # Show sample of what would be extracted | ||
| 231 | log_info "Checking for matching log entries..." | ||
| 232 | local sample_count | ||
| 233 | sample_count=$(eval "$journal_cmd" 2>/dev/null | grep -c '\[PURGATORY_EXPIRED\]' || echo "0") | ||
| 234 | sample_count="${sample_count//[^0-9]/}" # Strip non-numeric characters | ||
| 235 | sample_count="${sample_count:-0}" | ||
| 236 | log_info "Found $sample_count matching log entries" | ||
| 237 | |||
| 238 | if [[ "$sample_count" -eq 0 ]]; then | ||
| 239 | log_warn "No [PURGATORY_EXPIRED] entries found in logs." | ||
| 240 | log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." | ||
| 241 | log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)" | ||
| 242 | fi | ||
| 243 | |||
| 244 | exit 0 | ||
| 245 | fi | ||
| 246 | |||
| 247 | # Create output directory | ||
| 248 | mkdir -p "$output_dir" | ||
| 249 | |||
| 250 | local output_file="$output_dir/purgatory-expired.txt" | ||
| 251 | local temp_file | ||
| 252 | temp_file=$(mktemp) | ||
| 253 | |||
| 254 | # Extract and parse log entries | ||
| 255 | log_info "Extracting log entries..." | ||
| 256 | |||
| 257 | # Get raw log lines containing [PURGATORY_EXPIRED] | ||
| 258 | local raw_lines | ||
| 259 | raw_lines=$(eval "$journal_cmd" 2>/dev/null | grep '\[PURGATORY_EXPIRED\]' || true) | ||
| 260 | |||
| 261 | if [[ -z "$raw_lines" ]]; then | ||
| 262 | log_warn "No [PURGATORY_EXPIRED] entries found in logs." | ||
| 263 | log_warn "" | ||
| 264 | log_warn "This is expected if ngit-grasp logging improvements are not yet deployed." | ||
| 265 | log_warn "The structured log format required by this script:" | ||
| 266 | log_warn "" | ||
| 267 | log_warn " [PURGATORY_EXPIRED] repo=myrepo npub=npub1... reason=\"...\"" | ||
| 268 | log_warn "" | ||
| 269 | log_warn "See: docs/how-to/migrate-ngit-relay-to-ngit-grasp.md (Dependencies section)" | ||
| 270 | log_warn "" | ||
| 271 | |||
| 272 | # Create empty output file with header comment | ||
| 273 | { | ||
| 274 | echo "# Purgatory expiry events extracted from $service" | ||
| 275 | echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" | ||
| 276 | echo "# Extracted: $(date -Iseconds)" | ||
| 277 | echo "# Format: repo<TAB>npub<TAB>timestamp<TAB>reason" | ||
| 278 | echo "#" | ||
| 279 | echo "# NOTE: No [PURGATORY_EXPIRED] entries found." | ||
| 280 | echo "# This is expected if ngit-grasp logging improvements are not yet deployed." | ||
| 281 | } > "$output_file" | ||
| 282 | |||
| 283 | log_info "Created empty output file: $output_file" | ||
| 284 | exit 0 | ||
| 285 | fi | ||
| 286 | |||
| 287 | # Write header | ||
| 288 | { | ||
| 289 | echo "# Purgatory expiry events extracted from $service" | ||
| 290 | echo "# Time range: ${since_date:-beginning} to ${until_date:-now}" | ||
| 291 | echo "# Extracted: $(date -Iseconds)" | ||
| 292 | echo "# Format: repo<TAB>npub<TAB>timestamp<TAB>reason" | ||
| 293 | } > "$output_file" | ||
| 294 | |||
| 295 | # Parse each line | ||
| 296 | local count=0 | ||
| 297 | while IFS= read -r line; do | ||
| 298 | local parsed | ||
| 299 | parsed=$(parse_log_line "$line") | ||
| 300 | if [[ -n "$parsed" ]]; then | ||
| 301 | echo "$parsed" >> "$output_file" | ||
| 302 | ((count++)) | ||
| 303 | fi | ||
| 304 | done <<< "$raw_lines" | ||
| 305 | |||
| 306 | rm -f "$temp_file" | ||
| 307 | |||
| 308 | # Summary | ||
| 309 | echo "" | ||
| 310 | log_info "=== Extraction Summary ===" | ||
| 311 | log_info "Service: $service" | ||
| 312 | log_info "Time range: ${since_date:-beginning} to ${until_date:-now}" | ||
| 313 | log_success "Extracted $count purgatory expiry entries" | ||
| 314 | echo "" | ||
| 315 | log_info "Output file: $output_file" | ||
| 316 | |||
| 317 | if [[ $count -gt 0 ]]; then | ||
| 318 | echo "" | ||
| 319 | log_info "Sample entries (first 5):" | ||
| 320 | tail -n +5 "$output_file" | head -5 | while IFS=$'\t' read -r repo npub timestamp reason; do | ||
| 321 | echo " repo=$repo npub=${npub:0:20}... timestamp=$timestamp" | ||
| 322 | done | ||
| 323 | fi | ||
| 324 | |||
| 325 | # Show unique repos affected | ||
| 326 | if [[ $count -gt 0 ]]; then | ||
| 327 | echo "" | ||
| 328 | local unique_repos | ||
| 329 | unique_repos=$(tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort -u | wc -l) | ||
| 330 | log_info "Unique repositories affected: $unique_repos" | ||
| 331 | |||
| 332 | echo "" | ||
| 333 | log_info "Repositories with purgatory expiry:" | ||
| 334 | tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort | uniq -c | sort -rn | head -10 | while read -r cnt repo; do | ||
| 335 | echo " $repo: $cnt expiry events" | ||
| 336 | done | ||
| 337 | |||
| 338 | local total_repos | ||
| 339 | total_repos=$(tail -n +5 "$output_file" | awk -F'\t' '{print $1}' | sort -u | wc -l) | ||
| 340 | if [[ $total_repos -gt 10 ]]; then | ||
| 341 | echo " ... and $((total_repos - 10)) more repositories" | ||
| 342 | fi | ||
| 343 | fi | ||
| 344 | } | ||
| 345 | |||
| 346 | main "$@" | ||