mirror of
https://github.com/community-scripts/ProxmoxVE.git
synced 2026-02-18 00:17:43 +01:00
fix: send telemetry BEFORE log collection in signal handlers
- Swap ensure_log_on_host/post_update_to_api order in on_interrupt, on_terminate, api_exit_script, and inline SIGHUP/SIGINT/SIGTERM traps - For signal exits (>128): send telemetry immediately, then best-effort log collection - Add 2>/dev/null || true to all I/O in signal handlers to prevent SIGPIPE - Fix on_exit: exit_code=0 now reports 'done' instead of 'failed 1' - Root cause: pct pull hangs on dying containers blocked telemetry updates, leaving 595+ records stuck in 'installing' daily
This commit is contained in:
@@ -5552,6 +5552,8 @@ ensure_log_on_host() {
|
||||
# - Exit trap handler for reporting to API telemetry
|
||||
# - Captures exit code and reports to PocketBase using centralized error descriptions
|
||||
# - Uses explain_exit_code() from api.func for consistent error messages
|
||||
# - For signal exits (>128): sends telemetry FIRST before log collection
|
||||
# to prevent pct pull hangs from blocking status updates
|
||||
# - For non-zero exit codes: posts "failed" status
|
||||
# - For zero exit codes where post_update_to_api was never called:
|
||||
# catches orphaned "installing" records (e.g., script exited cleanly
|
||||
@@ -5560,8 +5562,15 @@ ensure_log_on_host() {
|
||||
api_exit_script() {
|
||||
local exit_code=$?
|
||||
if [ $exit_code -ne 0 ]; then
|
||||
ensure_log_on_host
|
||||
post_update_to_api "failed" "$exit_code"
|
||||
if [ $exit_code -gt 128 ]; then
|
||||
# Signal exit: send telemetry IMMEDIATELY (container may be dying)
|
||||
post_update_to_api "failed" "$exit_code" 2>/dev/null || true
|
||||
ensure_log_on_host 2>/dev/null || true
|
||||
else
|
||||
# Normal error: collect logs first for better error details
|
||||
ensure_log_on_host 2>/dev/null || true
|
||||
post_update_to_api "failed" "$exit_code"
|
||||
fi
|
||||
elif [[ "${POST_TO_API_DONE:-}" == "true" && "${POST_UPDATE_DONE:-}" != "true" ]]; then
|
||||
# Script exited with 0 but never sent a completion status
|
||||
# exit_code=0 is never an error — report as success
|
||||
@@ -5572,7 +5581,7 @@ api_exit_script() {
|
||||
if command -v pveversion >/dev/null 2>&1; then
|
||||
trap 'api_exit_script' EXIT
|
||||
fi
|
||||
trap 'local _ec=$?; if [[ $_ec -ne 0 ]]; then ensure_log_on_host; post_update_to_api "failed" "$_ec"; fi' ERR
|
||||
trap 'ensure_log_on_host; post_update_to_api "failed" "129"; exit 129' SIGHUP
|
||||
trap 'ensure_log_on_host; post_update_to_api "failed" "130"; exit 130' SIGINT
|
||||
trap 'ensure_log_on_host; post_update_to_api "failed" "143"; exit 143' SIGTERM
|
||||
trap 'local _ec=$?; if [[ $_ec -ne 0 ]]; then ensure_log_on_host 2>/dev/null || true; post_update_to_api "failed" "$_ec"; fi' ERR
|
||||
trap 'post_update_to_api "failed" "129" 2>/dev/null || true; ensure_log_on_host 2>/dev/null || true; exit 129' SIGHUP
|
||||
trap 'post_update_to_api "failed" "130" 2>/dev/null || true; ensure_log_on_host 2>/dev/null || true; exit 130' SIGINT
|
||||
trap 'post_update_to_api "failed" "143" 2>/dev/null || true; ensure_log_on_host 2>/dev/null || true; exit 143' SIGTERM
|
||||
|
||||
@@ -329,6 +329,8 @@ error_handler() {
|
||||
# - Cleans up lock files if lockfile variable is set
|
||||
# - Exits with captured exit code
|
||||
# - Always runs on script termination (success or failure)
|
||||
# - For signal exits (>128): sends telemetry FIRST before log collection
|
||||
# to prevent pct pull hangs from blocking status updates
|
||||
# ------------------------------------------------------------------------------
|
||||
on_exit() {
|
||||
local exit_code=$?
|
||||
@@ -337,14 +339,24 @@ on_exit() {
|
||||
# post_to_api was called ("installing" sent) but post_update_to_api was never called
|
||||
if [[ "${POST_TO_API_DONE:-}" == "true" && "${POST_UPDATE_DONE:-}" != "true" ]]; then
|
||||
if declare -f post_update_to_api >/dev/null 2>&1; then
|
||||
# Ensure log is accessible on host before reporting
|
||||
if declare -f ensure_log_on_host >/dev/null 2>&1; then
|
||||
ensure_log_on_host
|
||||
fi
|
||||
if [[ $exit_code -ne 0 ]]; then
|
||||
post_update_to_api "failed" "$exit_code"
|
||||
if [[ $exit_code -gt 128 ]]; then
|
||||
# Signal exit: send telemetry IMMEDIATELY (container may be dying, pct pull could hang)
|
||||
post_update_to_api "failed" "$exit_code" 2>/dev/null || true
|
||||
# Then try log collection (non-critical, best-effort)
|
||||
if declare -f ensure_log_on_host >/dev/null 2>&1; then
|
||||
ensure_log_on_host 2>/dev/null || true
|
||||
fi
|
||||
else
|
||||
post_update_to_api "failed" "1"
|
||||
# Normal exit: collect logs first for better error details
|
||||
if declare -f ensure_log_on_host >/dev/null 2>&1; then
|
||||
ensure_log_on_host 2>/dev/null || true
|
||||
fi
|
||||
if [[ $exit_code -ne 0 ]]; then
|
||||
post_update_to_api "failed" "$exit_code"
|
||||
else
|
||||
# exit_code=0 is never an error — report as success
|
||||
post_update_to_api "done" "0"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
@@ -356,22 +368,26 @@ on_exit() {
|
||||
# on_interrupt()
|
||||
#
|
||||
# - SIGINT (Ctrl+C) trap handler
|
||||
# - Reports to telemetry FIRST (time-critical: container may be dying)
|
||||
# - Displays "Interrupted by user" message
|
||||
# - Exits with code 130 (128 + SIGINT=2)
|
||||
# - Output redirected to /dev/null fallback to prevent SIGPIPE on closed terminals
|
||||
# ------------------------------------------------------------------------------
|
||||
on_interrupt() {
|
||||
# Ensure log is accessible on host before reporting
|
||||
if declare -f ensure_log_on_host >/dev/null 2>&1; then
|
||||
ensure_log_on_host
|
||||
fi
|
||||
# Report interruption to telemetry API (prevents stuck "installing" records)
|
||||
# CRITICAL: Send telemetry FIRST before any cleanup or output
|
||||
# If ensure_log_on_host hangs (e.g. pct pull on dying container),
|
||||
# the status update would never be sent, leaving records stuck in "installing"
|
||||
if declare -f post_update_to_api >/dev/null 2>&1; then
|
||||
post_update_to_api "failed" "130"
|
||||
post_update_to_api "failed" "130" 2>/dev/null || true
|
||||
fi
|
||||
# Best-effort log collection (non-critical after telemetry is sent)
|
||||
if declare -f ensure_log_on_host >/dev/null 2>&1; then
|
||||
ensure_log_on_host 2>/dev/null || true
|
||||
fi
|
||||
if declare -f msg_error >/dev/null 2>&1; then
|
||||
msg_error "Interrupted by user (SIGINT)"
|
||||
msg_error "Interrupted by user (SIGINT)" 2>/dev/null || true
|
||||
else
|
||||
echo -e "\n${RD}Interrupted by user (SIGINT)${CL}"
|
||||
echo -e "\n${RD}Interrupted by user (SIGINT)${CL}" 2>/dev/null || true
|
||||
fi
|
||||
exit 130
|
||||
}
|
||||
@@ -380,23 +396,27 @@ on_interrupt() {
|
||||
# on_terminate()
|
||||
#
|
||||
# - SIGTERM trap handler
|
||||
# - Reports to telemetry FIRST (time-critical: process being killed)
|
||||
# - Displays "Terminated by signal" message
|
||||
# - Exits with code 143 (128 + SIGTERM=15)
|
||||
# - Triggered by external process termination
|
||||
# - Output redirected to /dev/null fallback to prevent SIGPIPE on closed terminals
|
||||
# ------------------------------------------------------------------------------
|
||||
on_terminate() {
|
||||
# Ensure log is accessible on host before reporting
|
||||
if declare -f ensure_log_on_host >/dev/null 2>&1; then
|
||||
ensure_log_on_host
|
||||
fi
|
||||
# Report termination to telemetry API (prevents stuck "installing" records)
|
||||
# CRITICAL: Send telemetry FIRST before any cleanup or output
|
||||
# Same rationale as on_interrupt: ensure status gets reported even if
|
||||
# ensure_log_on_host hangs or terminal is already closed
|
||||
if declare -f post_update_to_api >/dev/null 2>&1; then
|
||||
post_update_to_api "failed" "143"
|
||||
post_update_to_api "failed" "143" 2>/dev/null || true
|
||||
fi
|
||||
# Best-effort log collection (non-critical after telemetry is sent)
|
||||
if declare -f ensure_log_on_host >/dev/null 2>&1; then
|
||||
ensure_log_on_host 2>/dev/null || true
|
||||
fi
|
||||
if declare -f msg_error >/dev/null 2>&1; then
|
||||
msg_error "Terminated by signal (SIGTERM)"
|
||||
msg_error "Terminated by signal (SIGTERM)" 2>/dev/null || true
|
||||
else
|
||||
echo -e "\n${RD}Terminated by signal (SIGTERM)${CL}"
|
||||
echo -e "\n${RD}Terminated by signal (SIGTERM)${CL}" 2>/dev/null || true
|
||||
fi
|
||||
exit 143
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user