diff options
| author | DanConwayDev <DanConwayDev@protonmail.com> | 2026-01-26 07:19:19 +0000 |
|---|---|---|
| committer | DanConwayDev <DanConwayDev@protonmail.com> | 2026-01-27 20:38:09 +0000 |
| commit | ef50f4bd9775e86011d8d636df13422c0dcec287 (patch) | |
| tree | 068396117f44e59318a4ffff6535547e7020c6f7 /docs/how-to/migration-scripts/30-extract-parse-failures.sh | |
| parent | 0e00db4decfa779c26c6c7648b2badcc5704e6f8 (diff) | |
Fix parse failure output to show repo|npub instead of event_id|kind
Phase 4 (30-extract-parse-failures.sh) now enriches parse failures with
repo name and npub by looking up event_id in announcements.json. This is
critical because 'Invalid announcement' rejections only log event_id and
kind, not the repo name or npub.
Phase 5 (40-classify-actions.sh) was also fixed to extract columns 4 and 5
(repo|npub) instead of columns 1 and 2 (event_id|kind) from parse-failures.txt.
Without this fix, action-required.txt showed unusable output like:
000014b2... | 30617 | parse failure logged | fix event format...
Now it correctly shows:
scripts | npub1hs5244... | parse failure logged | fix event format...
The enrichment uses jq to build a lookup table from announcements.json and
optionally uses 'nak' to convert hex pubkeys to npub format.
Diffstat (limited to 'docs/how-to/migration-scripts/30-extract-parse-failures.sh')
| -rwxr-xr-x | docs/how-to/migration-scripts/30-extract-parse-failures.sh | 141 |
1 files changed, 141 insertions, 0 deletions
diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh index f821834..f86e9f8 100755 --- a/docs/how-to/migration-scripts/30-extract-parse-failures.sh +++ b/docs/how-to/migration-scripts/30-extract-parse-failures.sh | |||
| @@ -211,6 +211,140 @@ parse_write_policy_rejection_line() { | |||
| 211 | # the same event to be counted twice. Write policy logs contain the same | 211 | # the same event to be counted twice. Write policy logs contain the same |
| 212 | # events, so we don't lose any data by only extracting from that source. | 212 | # events, so we don't lose any data by only extracting from that source. |
| 213 | 213 | ||
| 214 | # Enrich parse failures with repo/npub by looking up event_id in announcements.json | ||
| 215 | # This is critical because "Invalid announcement" rejections only log event_id and kind, | ||
| 216 | # not the repo name or npub. Without enrichment, Phase 5 shows event_id|kind instead | ||
| 217 | # of repo|npub in action-required.txt, making the output unusable. | ||
| 218 | # | ||
| 219 | # Arguments: | ||
| 220 | # $1 - parse failures file to enrich (modified in place) | ||
| 221 | # $2 - analysis root directory containing prod/raw/announcements.json | ||
| 222 | # | ||
| 223 | # The function: | ||
| 224 | # 1. Builds a lookup table from announcements.json: event_id -> repo|npub | ||
| 225 | # 2. For each parse failure with empty repo/npub, looks up the event_id | ||
| 226 | # 3. Populates repo and npub columns from the lookup | ||
| 227 | enrich_with_repo_npub() { | ||
| 228 | local parse_failures_file="$1" | ||
| 229 | local analysis_root="$2" | ||
| 230 | |||
| 231 | local prod_announcements="$analysis_root/prod/raw/announcements.json" | ||
| 232 | |||
| 233 | # Validate required file exists | ||
| 234 | if [[ ! -f "$prod_announcements" ]]; then | ||
| 235 | log_warn "Production announcements file not found: $prod_announcements" | ||
| 236 | log_warn "Skipping enrichment - repo/npub columns will remain empty" | ||
| 237 | return 0 | ||
| 238 | fi | ||
| 239 | |||
| 240 | # Check if jq is available | ||
| 241 | if ! command -v jq &> /dev/null; then | ||
| 242 | log_warn "jq not found - cannot enrich parse failures with repo/npub" | ||
| 243 | log_warn "Install jq or run without --analysis-root" | ||
| 244 | return 0 | ||
| 245 | fi | ||
| 246 | |||
| 247 | log_info "Enriching parse failures with repo/npub from announcements..." | ||
| 248 | |||
| 249 | # Step 1: Build lookup table from announcements.json | ||
| 250 | # Output format: event_id<TAB>repo<TAB>npub | ||
| 251 | local lookup_file | ||
| 252 | lookup_file=$(mktemp) | ||
| 253 | |||
| 254 | # Extract id, d-tag (repo identifier), and pubkey from announcements | ||
| 255 | # Convert pubkey to npub using bech32 encoding | ||
| 256 | # Note: We use a simple hex-to-npub conversion via external tool if available, | ||
| 257 | # otherwise we'll use the hex pubkey (Phase 5 can still match on it) | ||
| 258 | log_info " Building event_id -> repo/npub lookup table..." | ||
| 259 | |||
| 260 | # First, extract the raw data: id, d-tag, pubkey (hex) | ||
| 261 | jq -r 'select(.kind == 30617) | | ||
| 262 | .id as $id | | ||
| 263 | .pubkey as $pubkey | | ||
| 264 | ((.tags[] | select(.[0] == "d") | .[1]) // "") as $dtag | | ||
| 265 | "\($id)\t\($dtag)\t\($pubkey)"' "$prod_announcements" > "$lookup_file.raw" 2>/dev/null || { | ||
| 266 | log_warn "Failed to parse production announcements JSON" | ||
| 267 | rm -f "$lookup_file" "$lookup_file.raw" | ||
| 268 | return 0 | ||
| 269 | } | ||
| 270 | |||
| 271 | # Convert hex pubkeys to npub format | ||
| 272 | # Check if we have a tool to do bech32 encoding (nak, nostr-tool, etc.) | ||
| 273 | local can_convert_npub=false | ||
| 274 | if command -v nak &> /dev/null; then | ||
| 275 | can_convert_npub=true | ||
| 276 | log_info " Using 'nak' for pubkey->npub conversion" | ||
| 277 | fi | ||
| 278 | |||
| 279 | # Process the lookup file, converting pubkeys to npubs if possible | ||
| 280 | while IFS=$'\t' read -r event_id repo pubkey_hex; do | ||
| 281 | local npub | ||
| 282 | if [[ "$can_convert_npub" == true && -n "$pubkey_hex" ]]; then | ||
| 283 | # Use nak to encode pubkey as npub | ||
| 284 | npub=$(nak encode npub "$pubkey_hex" 2>/dev/null || echo "") | ||
| 285 | fi | ||
| 286 | # Fall back to hex pubkey if conversion failed | ||
| 287 | [[ -z "$npub" ]] && npub="$pubkey_hex" | ||
| 288 | printf '%s\t%s\t%s\n' "$event_id" "$repo" "$npub" | ||
| 289 | done < "$lookup_file.raw" > "$lookup_file" | ||
| 290 | |||
| 291 | rm -f "$lookup_file.raw" | ||
| 292 | |||
| 293 | local lookup_count | ||
| 294 | lookup_count=$(wc -l < "$lookup_file") | ||
| 295 | lookup_count="${lookup_count//[^0-9]/}" | ||
| 296 | log_info " Built lookup table with $lookup_count announcements" | ||
| 297 | |||
| 298 | # Step 2: Enrich parse failures | ||
| 299 | local enriched_file | ||
| 300 | enriched_file=$(mktemp) | ||
| 301 | |||
| 302 | # Copy header lines | ||
| 303 | grep '^#' "$parse_failures_file" > "$enriched_file" | ||
| 304 | |||
| 305 | # Process data lines | ||
| 306 | local enriched_count=0 | ||
| 307 | local total_count=0 | ||
| 308 | while IFS=$'\t' read -r event_id kind reason repo npub; do | ||
| 309 | # Skip header lines (already copied) | ||
| 310 | [[ "$event_id" =~ ^# ]] && continue | ||
| 311 | |||
| 312 | total_count=$((total_count + 1)) | ||
| 313 | |||
| 314 | # If repo and npub are already populated, keep them | ||
| 315 | if [[ -n "$repo" && -n "$npub" ]]; then | ||
| 316 | printf '%s\t%s\t%s\t%s\t%s\n' "$event_id" "$kind" "$reason" "$repo" "$npub" >> "$enriched_file" | ||
| 317 | continue | ||
| 318 | fi | ||
| 319 | |||
| 320 | # Look up event_id in our table | ||
| 321 | local lookup_result | ||
| 322 | lookup_result=$(grep "^${event_id}"$'\t' "$lookup_file" 2>/dev/null | head -1 || echo "") | ||
| 323 | |||
| 324 | if [[ -n "$lookup_result" ]]; then | ||
| 325 | local looked_up_repo looked_up_npub | ||
| 326 | looked_up_repo=$(echo "$lookup_result" | cut -f2) | ||
| 327 | looked_up_npub=$(echo "$lookup_result" | cut -f3) | ||
| 328 | |||
| 329 | # Use looked-up values if original was empty | ||
| 330 | [[ -z "$repo" ]] && repo="$looked_up_repo" | ||
| 331 | [[ -z "$npub" ]] && npub="$looked_up_npub" | ||
| 332 | enriched_count=$((enriched_count + 1)) | ||
| 333 | fi | ||
| 334 | |||
| 335 | printf '%s\t%s\t%s\t%s\t%s\n' "$event_id" "$kind" "$reason" "$repo" "$npub" >> "$enriched_file" | ||
| 336 | done < "$parse_failures_file" | ||
| 337 | |||
| 338 | # Replace original with enriched version | ||
| 339 | mv "$enriched_file" "$parse_failures_file" | ||
| 340 | |||
| 341 | # Cleanup | ||
| 342 | rm -f "$lookup_file" | ||
| 343 | |||
| 344 | log_info " Enriched $enriched_count of $total_count parse failures with repo/npub" | ||
| 345 | log_success "Enrichment complete" | ||
| 346 | } | ||
| 347 | |||
| 214 | # Filter parse failures to only those for missing announcements | 348 | # Filter parse failures to only those for missing announcements |
| 215 | # This is used when --analysis-root is provided to scope results to the migration | 349 | # This is used when --analysis-root is provided to scope results to the migration |
| 216 | # | 350 | # |
| @@ -630,6 +764,13 @@ main() { | |||
| 630 | grep -v '^#' "$output_file" | sort -t$'\t' -k1,1 -u >> "$deduped_file" | 764 | grep -v '^#' "$output_file" | sort -t$'\t' -k1,1 -u >> "$deduped_file" |
| 631 | mv "$deduped_file" "$output_file" | 765 | mv "$deduped_file" "$output_file" |
| 632 | 766 | ||
| 767 | # Enrich with repo/npub from announcements.json if analysis root provided | ||
| 768 | # This is critical for usability - without it, action-required.txt shows | ||
| 769 | # event_id|kind instead of repo|npub, making parse failures unidentifiable | ||
| 770 | if [[ -n "$analysis_root" ]]; then | ||
| 771 | enrich_with_repo_npub "$output_file" "$analysis_root" | ||
| 772 | fi | ||
| 773 | |||
| 633 | # Filter to missing announcements only if analysis root provided | 774 | # Filter to missing announcements only if analysis root provided |
| 634 | if [[ -n "$analysis_root" ]]; then | 775 | if [[ -n "$analysis_root" ]]; then |
| 635 | filter_to_missing_announcements "$output_file" "$analysis_root" | 776 | filter_to_missing_announcements "$output_file" "$analysis_root" |