From 2b21b807bdf6c0bab548ffceb5c41eee0902890c Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 16:12:03 +0000 Subject: Prevent Phase 4 from using wrong service (ngit-relay vs ngit-grasp) Add validation to ensure Phase 4 scripts use ngit-grasp service (with structured logging) instead of ngit-relay service. Changes: - Add validate-service.sh helper for reusable service validation - Add validation to run-migration-analysis.sh before Phase 4 - Add validation to 30-extract-parse-failures.sh - Add validation to 31-extract-purgatory-expiry.sh - Update migration guide with clear warnings about service selection - Expand troubleshooting for 'Phase 4 finds no logs' issue - Emphasize lesson learned in relay.ngit.dev notes This prevents the issue where Phase 4 was run against ngit-relay.service and found no parse failures because structured logging only exists in ngit-grasp services. --- .../migration-scripts/30-extract-parse-failures.sh | 34 ++++- .../31-extract-purgatory-expiry.sh | 34 ++++- .../migration-scripts/run-migration-analysis.sh | 28 ++++ docs/how-to/migration-scripts/validate-service.sh | 150 +++++++++++++++++++++ 4 files changed, 244 insertions(+), 2 deletions(-) create mode 100755 docs/how-to/migration-scripts/validate-service.sh (limited to 'docs/how-to/migration-scripts') diff --git a/docs/how-to/migration-scripts/30-extract-parse-failures.sh b/docs/how-to/migration-scripts/30-extract-parse-failures.sh index bc2049a..410fcbc 100755 --- a/docs/how-to/migration-scripts/30-extract-parse-failures.sh +++ b/docs/how-to/migration-scripts/30-extract-parse-failures.sh @@ -65,6 +65,14 @@ set -euo pipefail +# Get script directory for sourcing helpers +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Source the service validation helper +if [[ -f "$SCRIPT_DIR/validate-service.sh" ]]; then + source "$SCRIPT_DIR/validate-service.sh" +fi + # Colors for output (disabled if not a terminal) if [[ -t 1 ]]; then RED='\033[0;31m' @@ -188,11 +196,35 @@ main() { esac done - # Validate service name + # Validate service name format if [[ ! "$service" =~ \.service$ ]]; then service="${service}.service" fi + # Validate service is appropriate for structured logging + # This prevents the common mistake of using ngit-relay instead of ngit-grasp + if type validate_service_for_structured_logging &>/dev/null; then + # Use non-interactive mode if not a terminal, skip log check (we'll do our own) + local interactive="true" + [[ ! -t 0 ]] && interactive="false" + + if ! validate_service_for_structured_logging "$service" "false" "$interactive"; then + log_error "Service validation failed. Use an ngit-grasp service for structured logging." + exit 1 + fi + else + # Fallback validation if helper not available + if [[ "$service" == *"ngit-relay"* ]]; then + log_error "Service name appears to be ngit-relay: $service" + log_error "Structured logging ([PARSE_FAIL]) only exists in ngit-grasp services." + log_error "Please use the ngit-grasp archive service instead." + log_error "" + log_error "To find the correct service:" + log_error " systemctl list-units 'ngit-grasp*' --all" + exit 1 + fi + fi + log_info "Extracting parse failures from systemd logs" log_info "Service: $service" log_info "Output: $output_dir" diff --git a/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh index 8cadad9..a20780e 100755 --- a/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh +++ b/docs/how-to/migration-scripts/31-extract-purgatory-expiry.sh @@ -76,6 +76,14 @@ set -euo pipefail +# Get script directory for sourcing helpers +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Source the service validation helper +if [[ -f "$SCRIPT_DIR/validate-service.sh" ]]; then + source "$SCRIPT_DIR/validate-service.sh" +fi + # Colors for output (disabled if not a terminal) if [[ -t 1 ]]; then RED='\033[0;31m' @@ -195,11 +203,35 @@ main() { esac done - # Validate service name + # Validate service name format if [[ ! "$service" =~ \.service$ ]]; then service="${service}.service" fi + # Validate service is appropriate for structured logging + # This prevents the common mistake of using ngit-relay instead of ngit-grasp + if type validate_service_for_structured_logging &>/dev/null; then + # Use non-interactive mode if not a terminal, skip log check (we'll do our own) + local interactive="true" + [[ ! -t 0 ]] && interactive="false" + + if ! validate_service_for_structured_logging "$service" "false" "$interactive"; then + log_error "Service validation failed. Use an ngit-grasp service for structured logging." + exit 1 + fi + else + # Fallback validation if helper not available + if [[ "$service" == *"ngit-relay"* ]]; then + log_error "Service name appears to be ngit-relay: $service" + log_error "Structured logging ([PURGATORY_EXPIRED]) only exists in ngit-grasp services." + log_error "Please use the ngit-grasp archive service instead." + log_error "" + log_error "To find the correct service:" + log_error " systemctl list-units 'ngit-grasp*' --all" + exit 1 + fi + fi + log_info "Extracting purgatory expiry events from systemd logs" log_info "Service: $service" log_info "Output: $output_dir" diff --git a/docs/how-to/migration-scripts/run-migration-analysis.sh b/docs/how-to/migration-scripts/run-migration-analysis.sh index 65d9d17..b2ca142 100755 --- a/docs/how-to/migration-scripts/run-migration-analysis.sh +++ b/docs/how-to/migration-scripts/run-migration-analysis.sh @@ -548,6 +548,34 @@ run_phase_4() { return 0 fi + # Validate service name before running Phase 4 + # Structured logging only exists in ngit-grasp, not ngit-relay + if [[ "$SERVICE_NAME" == *"ngit-relay"* ]]; then + log_error "SERVICE_NAME appears to be ngit-relay: $SERVICE_NAME" + log_error "" + log_error "Phase 4 requires an ngit-grasp service with structured logging." + log_error "Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) only exists" + log_error "in ngit-grasp services, NOT in ngit-relay services." + log_error "" + log_error "Please update --service to use the ngit-grasp archive service." + log_error "" + log_error "To find the correct service name:" + log_error " systemctl list-units 'ngit-grasp*' --all" + log_error "" + log_error "Common ngit-grasp service names:" + log_error " - ngit-grasp.service" + log_error " - ngit-grasp-relay-ngit-dev.service (NixOS multi-instance)" + log_error " - ngit-grasp-archive.service" + return 1 + fi + + # Warn if service name doesn't look like ngit-grasp + if [[ "$SERVICE_NAME" != *"ngit-grasp"* && "$SERVICE_NAME" != *"grasp"* ]]; then + log_warn "SERVICE_NAME doesn't contain 'ngit-grasp': $SERVICE_NAME" + log_warn "Structured logging only exists in ngit-grasp services." + log_warn "If this is not an ngit-grasp service, Phase 4 will find no logs." + fi + local cmds=() cmds+=("'$SCRIPT_DIR/30-extract-parse-failures.sh' '$SERVICE_NAME' '$OUTPUT_DIR/logs'") diff --git a/docs/how-to/migration-scripts/validate-service.sh b/docs/how-to/migration-scripts/validate-service.sh new file mode 100755 index 0000000..2525a3f --- /dev/null +++ b/docs/how-to/migration-scripts/validate-service.sh @@ -0,0 +1,150 @@ +#!/usr/bin/env bash +# +# validate-service.sh - Validate service name for structured logging +# +# This helper script validates that a service name is appropriate for +# Phase 4 log extraction. Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) +# only exists in ngit-grasp services, NOT in ngit-relay services. +# +# USAGE: +# Source this script and call the validation function: +# +# source validate-service.sh +# validate_service_for_structured_logging "$SERVICE_NAME" || exit 1 +# +# BACKGROUND: +# Phase 4 of the migration analysis extracts structured log entries from +# journald. These log entries only exist in ngit-grasp services. If you +# accidentally specify an ngit-relay service, Phase 4 will find no logs +# and produce empty results. +# +# This validation prevents that common mistake by: +# 1. Checking if the service name contains "ngit-relay" (error) +# 2. Warning if the service name doesn't contain "ngit-grasp" +# 3. Optionally checking if structured logs actually exist +# +# SEE ALSO: +# docs/how-to/migrate-to-ngit-grasp.md - Full migration guide +# 30-extract-parse-failures.sh - Uses this validation +# 31-extract-purgatory-expiry.sh - Uses this validation +# + +# Colors for output (disabled if not a terminal) +if [[ -t 1 ]]; then + _VS_RED='\033[0;31m' + _VS_YELLOW='\033[0;33m' + _VS_NC='\033[0m' +else + _VS_RED='' + _VS_YELLOW='' + _VS_NC='' +fi + +# Validates that the service name is appropriate for structured logging +# +# Arguments: +# $1 - service_name: The systemd service name to validate +# $2 - check_logs: Whether to check if logs actually exist (default: "true") +# $3 - interactive: Whether to prompt for confirmation (default: "true") +# +# Returns: +# 0 - Service is valid for structured logging +# 1 - Service is invalid or user declined to continue +# +# Example: +# validate_service_for_structured_logging "ngit-grasp.service" || exit 1 +# validate_service_for_structured_logging "ngit-grasp.service" "false" # Skip log check +# validate_service_for_structured_logging "ngit-grasp.service" "true" "false" # Non-interactive +# +validate_service_for_structured_logging() { + local service_name="$1" + local check_logs="${2:-true}" + local interactive="${3:-true}" + + # Check if service name looks like ngit-relay (ERROR - wrong service type) + if [[ "$service_name" == *"ngit-relay"* ]]; then + echo -e "${_VS_RED}ERROR: Service name appears to be ngit-relay: $service_name${_VS_NC}" >&2 + echo "" >&2 + echo "Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) only exists in" >&2 + echo "ngit-grasp services, NOT in ngit-relay services." >&2 + echo "" >&2 + echo "Please use the ngit-grasp archive service instead." >&2 + echo "" >&2 + echo "To find the correct service name:" >&2 + echo " systemctl list-units 'ngit-grasp*' --all" >&2 + echo "" >&2 + echo "Common ngit-grasp service names:" >&2 + echo " - ngit-grasp.service" >&2 + echo " - ngit-grasp-relay-ngit-dev.service (NixOS multi-instance)" >&2 + echo " - ngit-grasp-archive.service" >&2 + return 1 + fi + + # Check if service name looks like ngit-grasp (WARNING if not) + if [[ "$service_name" != *"ngit-grasp"* && "$service_name" != *"grasp"* ]]; then + echo -e "${_VS_YELLOW}WARNING: Service name doesn't contain 'ngit-grasp': $service_name${_VS_NC}" >&2 + echo "" >&2 + echo "Structured logging ([PARSE_FAIL], [PURGATORY_EXPIRED]) only exists in" >&2 + echo "ngit-grasp services." >&2 + echo "" >&2 + + if [[ "$interactive" == "true" ]]; then + read -p "Continue anyway? (y/N) " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + return 1 + fi + else + echo "Non-interactive mode: proceeding despite warning" >&2 + fi + fi + + # Optionally check if structured logs actually exist + if [[ "$check_logs" == "true" ]]; then + # Check if journalctl is available + if ! command -v journalctl &> /dev/null; then + echo -e "${_VS_YELLOW}WARNING: journalctl not available, cannot verify logs exist${_VS_NC}" >&2 + return 0 + fi + + # Check for structured log entries + local has_parse_fail has_purgatory + has_parse_fail=$(journalctl -u "$service_name" --since "7 days ago" 2>/dev/null | grep -c '\[PARSE_FAIL\]' || echo "0") + has_purgatory=$(journalctl -u "$service_name" --since "7 days ago" 2>/dev/null | grep -c '\[PURGATORY_EXPIRED\]' || echo "0") + + # Strip any non-numeric characters (grep -c can have trailing whitespace) + has_parse_fail="${has_parse_fail//[^0-9]/}" + has_purgatory="${has_purgatory//[^0-9]/}" + has_parse_fail="${has_parse_fail:-0}" + has_purgatory="${has_purgatory:-0}" + + if [[ "$has_parse_fail" -eq 0 && "$has_purgatory" -eq 0 ]]; then + echo -e "${_VS_YELLOW}WARNING: No structured logs found in $service_name (last 7 days)${_VS_NC}" >&2 + echo "" >&2 + echo "This may indicate:" >&2 + echo " 1. Wrong service (should be ngit-grasp archive service, not ngit-relay)" >&2 + echo " 2. Structured logging not yet deployed to this ngit-grasp instance" >&2 + echo " 3. No parse failures or purgatory expiry events in the time window" >&2 + echo "" >&2 + echo "To verify you have the right service:" >&2 + echo " systemctl list-units 'ngit-grasp*' --all" >&2 + echo " journalctl -u | grep -E '\\[PARSE_FAIL\\]|\\[PURGATORY_EXPIRED\\]' | head -5" >&2 + echo "" >&2 + + if [[ "$interactive" == "true" ]]; then + read -p "Continue anyway? (y/N) " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + return 1 + fi + else + echo "Non-interactive mode: proceeding despite warning" >&2 + fi + fi + fi + + return 0 +} + +# Export the function so it can be used after sourcing +export -f validate_service_for_structured_logging -- cgit v1.2.3