upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xdocs/how-to/migration-scripts/30-extract-parse-failures.sh141
-rwxr-xr-xdocs/how-to/migration-scripts/40-classify-actions.sh6
2 files changed, 145 insertions, 2 deletions
diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh
index f821834..f86e9f8 100755
--- a/docs/how-to/migration-scripts/30-extract-parse-failures.sh
+++ b/docs/how-to/migration-scripts/30-extract-parse-failures.sh
@@ -211,6 +211,140 @@ parse_write_policy_rejection_line() {
211# the same event to be counted twice. Write policy logs contain the same 211# the same event to be counted twice. Write policy logs contain the same
212# events, so we don't lose any data by only extracting from that source. 212# events, so we don't lose any data by only extracting from that source.
213 213
214# Enrich parse failures with repo/npub by looking up event_id in announcements.json
215# This is critical because "Invalid announcement" rejections only log event_id and kind,
216# not the repo name or npub. Without enrichment, Phase 5 shows event_id|kind instead
217# of repo|npub in action-required.txt, making the output unusable.
218#
219# Arguments:
220# $1 - parse failures file to enrich (modified in place)
221# $2 - analysis root directory containing prod/raw/announcements.json
222#
223# The function:
224# 1. Builds a lookup table from announcements.json: event_id -> repo|npub
225# 2. For each parse failure with empty repo/npub, looks up the event_id
226# 3. Populates repo and npub columns from the lookup
227enrich_with_repo_npub() {
228 local parse_failures_file="$1"
229 local analysis_root="$2"
230
231 local prod_announcements="$analysis_root/prod/raw/announcements.json"
232
233 # Validate required file exists
234 if [[ ! -f "$prod_announcements" ]]; then
235 log_warn "Production announcements file not found: $prod_announcements"
236 log_warn "Skipping enrichment - repo/npub columns will remain empty"
237 return 0
238 fi
239
240 # Check if jq is available
241 if ! command -v jq &> /dev/null; then
242 log_warn "jq not found - cannot enrich parse failures with repo/npub"
243 log_warn "Install jq or run without --analysis-root"
244 return 0
245 fi
246
247 log_info "Enriching parse failures with repo/npub from announcements..."
248
249 # Step 1: Build lookup table from announcements.json
250 # Output format: event_id<TAB>repo<TAB>npub
251 local lookup_file
252 lookup_file=$(mktemp)
253
254 # Extract id, d-tag (repo identifier), and pubkey from announcements
255 # Convert pubkey to npub using bech32 encoding
256 # Note: We use a simple hex-to-npub conversion via external tool if available,
257 # otherwise we'll use the hex pubkey (Phase 5 can still match on it)
258 log_info " Building event_id -> repo/npub lookup table..."
259
260 # First, extract the raw data: id, d-tag, pubkey (hex)
261 jq -r 'select(.kind == 30617) |
262 .id as $id |
263 .pubkey as $pubkey |
264 ((.tags[] | select(.[0] == "d") | .[1]) // "") as $dtag |
265 "\($id)\t\($dtag)\t\($pubkey)"' "$prod_announcements" > "$lookup_file.raw" 2>/dev/null || {
266 log_warn "Failed to parse production announcements JSON"
267 rm -f "$lookup_file" "$lookup_file.raw"
268 return 0
269 }
270
271 # Convert hex pubkeys to npub format
272 # Check if we have a tool to do bech32 encoding (nak, nostr-tool, etc.)
273 local can_convert_npub=false
274 if command -v nak &> /dev/null; then
275 can_convert_npub=true
276 log_info " Using 'nak' for pubkey->npub conversion"
277 fi
278
279 # Process the lookup file, converting pubkeys to npubs if possible
280 while IFS=$'\t' read -r event_id repo pubkey_hex; do
281 local npub
282 if [[ "$can_convert_npub" == true && -n "$pubkey_hex" ]]; then
283 # Use nak to encode pubkey as npub
284 npub=$(nak encode npub "$pubkey_hex" 2>/dev/null || echo "")
285 fi
286 # Fall back to hex pubkey if conversion failed
287 [[ -z "$npub" ]] && npub="$pubkey_hex"
288 printf '%s\t%s\t%s\n' "$event_id" "$repo" "$npub"
289 done < "$lookup_file.raw" > "$lookup_file"
290
291 rm -f "$lookup_file.raw"
292
293 local lookup_count
294 lookup_count=$(wc -l < "$lookup_file")
295 lookup_count="${lookup_count//[^0-9]/}"
296 log_info " Built lookup table with $lookup_count announcements"
297
298 # Step 2: Enrich parse failures
299 local enriched_file
300 enriched_file=$(mktemp)
301
302 # Copy header lines
303 grep '^#' "$parse_failures_file" > "$enriched_file"
304
305 # Process data lines
306 local enriched_count=0
307 local total_count=0
308 while IFS=$'\t' read -r event_id kind reason repo npub; do
309 # Skip header lines (already copied)
310 [[ "$event_id" =~ ^# ]] && continue
311
312 total_count=$((total_count + 1))
313
314 # If repo and npub are already populated, keep them
315 if [[ -n "$repo" && -n "$npub" ]]; then
316 printf '%s\t%s\t%s\t%s\t%s\n' "$event_id" "$kind" "$reason" "$repo" "$npub" >> "$enriched_file"
317 continue
318 fi
319
320 # Look up event_id in our table
321 local lookup_result
322 lookup_result=$(grep "^${event_id}"$'\t' "$lookup_file" 2>/dev/null | head -1 || echo "")
323
324 if [[ -n "$lookup_result" ]]; then
325 local looked_up_repo looked_up_npub
326 looked_up_repo=$(echo "$lookup_result" | cut -f2)
327 looked_up_npub=$(echo "$lookup_result" | cut -f3)
328
329 # Use looked-up values if original was empty
330 [[ -z "$repo" ]] && repo="$looked_up_repo"
331 [[ -z "$npub" ]] && npub="$looked_up_npub"
332 enriched_count=$((enriched_count + 1))
333 fi
334
335 printf '%s\t%s\t%s\t%s\t%s\n' "$event_id" "$kind" "$reason" "$repo" "$npub" >> "$enriched_file"
336 done < "$parse_failures_file"
337
338 # Replace original with enriched version
339 mv "$enriched_file" "$parse_failures_file"
340
341 # Cleanup
342 rm -f "$lookup_file"
343
344 log_info " Enriched $enriched_count of $total_count parse failures with repo/npub"
345 log_success "Enrichment complete"
346}
347
214# Filter parse failures to only those for missing announcements 348# Filter parse failures to only those for missing announcements
215# This is used when --analysis-root is provided to scope results to the migration 349# This is used when --analysis-root is provided to scope results to the migration
216# 350#
@@ -630,6 +764,13 @@ main() {
630 grep -v '^#' "$output_file" | sort -t$'\t' -k1,1 -u >> "$deduped_file" 764 grep -v '^#' "$output_file" | sort -t$'\t' -k1,1 -u >> "$deduped_file"
631 mv "$deduped_file" "$output_file" 765 mv "$deduped_file" "$output_file"
632 766
767 # Enrich with repo/npub from announcements.json if analysis root provided
768 # This is critical for usability - without it, action-required.txt shows
769 # event_id|kind instead of repo|npub, making parse failures unidentifiable
770 if [[ -n "$analysis_root" ]]; then
771 enrich_with_repo_npub "$output_file" "$analysis_root"
772 fi
773
633 # Filter to missing announcements only if analysis root provided 774 # Filter to missing announcements only if analysis root provided
634 if [[ -n "$analysis_root" ]]; then 775 if [[ -n "$analysis_root" ]]; then
635 filter_to_missing_announcements "$output_file" "$analysis_root" 776 filter_to_missing_announcements "$output_file" "$analysis_root"
diff --git a/docs/how-to/migration-scripts/40-classify-actions.sh b/docs/how-to/migration-scripts/40-classify-actions.sh
index 1706e47..53c0f9d 100755
--- a/docs/how-to/migration-scripts/40-classify-actions.sh
+++ b/docs/how-to/migration-scripts/40-classify-actions.sh
@@ -329,9 +329,11 @@ main() {
329 # ========================================================================= 329 # =========================================================================
330 log_info "Parsing log-based categories..." 330 log_info "Parsing log-based categories..."
331 331
332 # Parse failures: repo<TAB>npub<TAB>kind<TAB>event_id<TAB>reason 332 # Parse failures: event_id<TAB>kind<TAB>reason<TAB>repo<TAB>npub
333 # Note: repo and npub are in columns 4 and 5 (enriched by Phase 4 from announcements.json)
334 # Some entries may have empty repo/npub if the event_id wasn't found in announcements
333 if [[ -f "$logs_dir/parse-failures.txt" ]] && file_has_content "$logs_dir/parse-failures.txt"; then 335 if [[ -f "$logs_dir/parse-failures.txt" ]] && file_has_content "$logs_dir/parse-failures.txt"; then
334 grep -v '^#' "$logs_dir/parse-failures.txt" | awk -F'\t' '{print $1 "|" $2}' | sort -u > "$tmp_dir/parse_failures.txt" 336 grep -v '^#' "$logs_dir/parse-failures.txt" | awk -F'\t' '{print $4 "|" $5}' | sort -u > "$tmp_dir/parse_failures.txt"
335 log_info "Found $(wc -l < "$tmp_dir/parse_failures.txt" | tr -d ' ') parse failure entries" 337 log_info "Found $(wc -l < "$tmp_dir/parse_failures.txt" | tr -d ' ') parse failure entries"
336 else 338 else
337 touch "$tmp_dir/parse_failures.txt" 339 touch "$tmp_dir/parse_failures.txt"