Files
Pulse/scripts/pulse-sensor-cleanup.sh
2025-11-24 22:54:58 +00:00

287 lines
12 KiB
Bash
Executable File

#!/bin/bash
# pulse-sensor-cleanup.sh - Complete Pulse footprint removal when nodes are removed
# Removes: SSH keys, proxy service, binaries, API tokens, and LXC bind mounts
# This script is triggered by systemd path unit when cleanup-request.json is created
set -euo pipefail
# Configuration
WORK_DIR="/var/lib/pulse-sensor-proxy"
CLEANUP_REQUEST="${WORK_DIR}/cleanup-request.json"
LOCKFILE="${WORK_DIR}/cleanup.lock"
LOG_TAG="pulse-sensor-cleanup"
INSTALLER_PATH="/opt/pulse/sensor-proxy/install-sensor-proxy.sh"
# Logging functions
log_info() {
logger -t "$LOG_TAG" -p user.info "$1"
echo "[INFO] $1"
}
log_warn() {
logger -t "$LOG_TAG" -p user.warning "$1"
echo "[WARN] $1"
}
log_error() {
logger -t "$LOG_TAG" -p user.err "$1"
echo "[ERROR] $1" >&2
}
# Acquire exclusive lock to prevent concurrent cleanup runs
exec 200>"$LOCKFILE"
if ! flock -n 200; then
log_info "Another cleanup instance is running, exiting"
exit 0
fi
# Check if cleanup request file exists
if [[ ! -f "$CLEANUP_REQUEST" ]]; then
log_info "No cleanup request found at $CLEANUP_REQUEST"
exit 0
fi
log_info "Processing cleanup request from $CLEANUP_REQUEST"
# Read and parse the cleanup request
CLEANUP_DATA=$(cat "$CLEANUP_REQUEST")
HOST=$(echo "$CLEANUP_DATA" | grep -o '"host":"[^"]*"' | cut -d'"' -f4 || echo "")
REQUESTED_AT=$(echo "$CLEANUP_DATA" | grep -o '"requestedAt":"[^"]*"' | cut -d'"' -f4 || echo "")
log_info "Cleanup requested at: ${REQUESTED_AT:-unknown}"
# Rename request file to .processing to prevent re-triggering while allowing retry on failure
PROCESSING_FILE="${CLEANUP_REQUEST}.processing"
mv "$CLEANUP_REQUEST" "$PROCESSING_FILE" 2>/dev/null || {
log_warn "Failed to rename cleanup request file, may have been processed by another instance"
exit 0
}
# If no specific host was provided, clean up all known nodes
if [[ -z "$HOST" ]]; then
log_info "No specific host provided - cleaning up all cluster nodes"
# Discover cluster nodes
if command -v pvecm >/dev/null 2>&1; then
CLUSTER_NODES=$(pvecm status 2>/dev/null | grep -vEi "qdevice" | awk '/0x[0-9a-f]+.*[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/) print $i}' || true)
if [[ -n "$CLUSTER_NODES" ]]; then
for node_ip in $CLUSTER_NODES; do
log_info "Cleaning up SSH keys on node $node_ip"
# Remove both pulse-managed-key and pulse-proxy-key entries
ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=5 root@"$node_ip" \
"sed -i -e '/# pulse-managed-key\$/d' -e '/# pulse-proxy-key\$/d' /root/.ssh/authorized_keys" 2>&1 | \
logger -t "$LOG_TAG" -p user.info || \
log_warn "Failed to clean up SSH keys on $node_ip"
done
log_info "Cluster cleanup completed"
else
# Standalone node - clean up localhost
log_info "Standalone node detected - cleaning up localhost"
sed -i -e '/# pulse-managed-key$/d' -e '/# pulse-proxy-key$/d' /root/.ssh/authorized_keys 2>&1 | \
logger -t "$LOG_TAG" -p user.info || \
log_warn "Failed to clean up SSH keys on localhost"
fi
else
log_warn "pvecm command not available - cleaning up localhost only"
sed -i -e '/# pulse-managed-key$/d' -e '/# pulse-proxy-key$/d' /root/.ssh/authorized_keys 2>&1 | \
logger -t "$LOG_TAG" -p user.info || \
log_warn "Failed to clean up SSH keys on localhost"
fi
else
log_info "Cleaning up specific host: $HOST"
# Extract hostname/IP from host URL
HOST_CLEAN=$(echo "$HOST" | sed -e 's|^https\?://||' -e 's|:.*$||')
# Check if this is localhost (by IP, hostname, or FQDN)
LOCAL_IPS=$(hostname -I 2>/dev/null || echo "")
LOCAL_HOSTNAME=$(hostname 2>/dev/null || echo "")
LOCAL_FQDN=$(hostname -f 2>/dev/null || echo "")
IS_LOCAL=false
# Check against all local IPs
for local_ip in $LOCAL_IPS; do
if [[ "$HOST_CLEAN" == "$local_ip" ]]; then
IS_LOCAL=true
break
fi
done
# Check against hostname and FQDN
if [[ "$HOST_CLEAN" == "127.0.0.1" || "$HOST_CLEAN" == "localhost" || \
"$HOST_CLEAN" == "$LOCAL_HOSTNAME" || "$HOST_CLEAN" == "$LOCAL_FQDN" ]]; then
IS_LOCAL=true
fi
if [[ "$IS_LOCAL" == true ]]; then
log_info "Performing full cleanup on localhost"
# 1. Remove SSH keys
log_info "Removing SSH keys from authorized_keys"
sed -i -e '/# pulse-managed-key$/d' -e '/# pulse-proxy-key$/d' /root/.ssh/authorized_keys 2>&1 | \
logger -t "$LOG_TAG" -p user.info || \
log_warn "Failed to clean up SSH keys"
# 2. Delete API tokens and user
log_info "Removing Proxmox API tokens and pulse-monitor user"
if command -v pveum >/dev/null 2>&1; then
# Try JSON output first (pveum with --output-format json)
TOKEN_IDS=""
if command -v python3 >/dev/null 2>&1; then
# Try pveum with JSON output
if TOKEN_JSON=$(pveum user token list pulse-monitor@pam --output-format json 2>/dev/null); then
TOKEN_IDS=$(echo "$TOKEN_JSON" | python3 -c '
import sys, json
try:
data = json.load(sys.stdin)
if isinstance(data, list):
for item in data:
if "tokenid" in item:
print(item["tokenid"])
except: pass
' || true)
fi
fi
# Fall back to pvesh JSON API if pveum JSON didn't work
if [[ -z "$TOKEN_IDS" ]] && command -v pvesh >/dev/null 2>&1; then
if TOKEN_JSON=$(pvesh get /access/users/pulse-monitor@pam/token 2>/dev/null); then
TOKEN_IDS=$(echo "$TOKEN_JSON" | python3 -c '
import sys, json
try:
data = json.load(sys.stdin)
if isinstance(data, dict) and "data" in data:
for item in data["data"]:
if "tokenid" in item:
print(item["tokenid"])
except: pass
' 2>/dev/null || true)
fi
fi
# Last resort: parse table output with better filtering
if [[ -z "$TOKEN_IDS" ]]; then
TOKEN_IDS=$(pveum user token list pulse-monitor@pam 2>/dev/null | \
awk 'NR>1 && /^[[:space:]]*pulse/ {print $1}' | grep -v '^[│┌└╞─]' | grep -v '^$' || true)
fi
if [[ -n "$TOKEN_IDS" ]]; then
for token_id in $TOKEN_IDS; do
log_info "Deleting API token: $token_id"
pveum user token remove pulse-monitor@pam "${token_id}" 2>&1 | \
logger -t "$LOG_TAG" -p user.info || \
log_warn "Failed to delete token $token_id"
done
else
log_info "No API tokens found for pulse-monitor@pam"
fi
# Remove the pulse-monitor user
log_info "Removing pulse-monitor@pam user"
pveum user delete pulse-monitor@pam 2>&1 | \
logger -t "$LOG_TAG" -p user.info || \
log_warn "pulse-monitor@pam user not found or already removed"
else
log_warn "pveum command not available, skipping API token cleanup"
fi
# 3. Remove LXC bind mounts
log_info "Removing LXC bind mounts from container configs"
if command -v pct >/dev/null 2>&1; then
for ctid in $(pct list 2>/dev/null | awk 'NR>1 {print $1}' || true); do
CONF_FILE="/etc/pve/lxc/${ctid}.conf"
if [[ -f "$CONF_FILE" ]]; then
# Find pulse-sensor-proxy mount points and remove them using pct
for mp_key in $(grep -o "^mp[0-9]\+:" "$CONF_FILE" | grep -f <(grep "pulse-sensor-proxy" "$CONF_FILE" | grep -o "^mp[0-9]\+:") || true); do
mp_num="${mp_key%:}"
log_info "Removing ${mp_num} (pulse-sensor-proxy) from container $ctid"
if pct set "$ctid" -delete "${mp_num}" 2>&1 | logger -t "$LOG_TAG" -p user.info; then
log_info "Successfully removed ${mp_num} from container $ctid"
else
log_warn "Failed to remove ${mp_num} from container $ctid"
fi
done
fi
done
fi
# 4. Uninstall proxy service and remove binaries via isolated transient unit
log_info "Starting full uninstallation (service, binaries, configs)"
if [[ -x "$INSTALLER_PATH" ]]; then
# Use systemd-run to create isolated transient unit that won't be killed
# when we stop pulse-sensor-proxy.service
if command -v systemd-run >/dev/null 2>&1; then
# Use UUID for unique unit name (prevents same-second collisions)
UNINSTALL_UUID=$(cat /proc/sys/kernel/random/uuid 2>/dev/null || date +%s%N)
UNINSTALL_UNIT="pulse-uninstall-${UNINSTALL_UUID}"
log_info "Spawning isolated uninstaller unit: $UNINSTALL_UNIT"
systemd-run \
--unit="${UNINSTALL_UNIT}" \
--property="Type=oneshot" \
--property="Conflicts=pulse-sensor-proxy.service" \
--collect \
--wait \
--quiet \
-- bash -c "$INSTALLER_PATH --uninstall --purge --quiet >> /var/log/pulse/sensor-proxy/uninstall.log 2>&1" \
2>&1 | logger -t "$LOG_TAG" -p user.info
UNINSTALL_EXIT=$?
if [[ $UNINSTALL_EXIT -eq 0 ]]; then
log_info "Uninstaller completed successfully"
else
log_error "Uninstaller failed with exit code $UNINSTALL_EXIT"
exit 1
fi
else
log_warn "systemd-run not available, attempting direct uninstall (may fail)"
bash "$INSTALLER_PATH" --uninstall --quiet >> /var/log/pulse/sensor-proxy/uninstall.log 2>&1 || \
log_error "Uninstaller failed - manual cleanup may be required"
fi
else
log_warn "Installer not found at $INSTALLER_PATH, cannot run uninstaller"
log_info "Manual cleanup required: systemctl stop pulse-sensor-proxy && systemctl disable pulse-sensor-proxy"
fi
log_info "Localhost cleanup initiated (uninstaller running in background)"
else
log_info "Cleaning up remote host: $HOST_CLEAN"
# Try to use proxy's SSH key first (for standalone nodes), fall back to default
PROXY_KEY="/var/lib/pulse-sensor-proxy/ssh/id_ed25519"
SSH_CMD="ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=5"
if [[ -f "$PROXY_KEY" ]]; then
log_info "Using proxy SSH key for cleanup"
SSH_CMD="$SSH_CMD -i $PROXY_KEY"
fi
# Remove both pulse-managed-key and pulse-proxy-key entries from remote host
CLEANUP_OUTPUT=$($SSH_CMD root@"$HOST_CLEAN" \
"sed -i -e '/# pulse-managed-key\$/d' -e '/# pulse-proxy-key\$/d' /root/.ssh/authorized_keys && echo 'SUCCESS'" 2>&1)
if echo "$CLEANUP_OUTPUT" | grep -q "SUCCESS"; then
log_info "Successfully cleaned up SSH keys on $HOST_CLEAN"
else
# Check if this is a standalone node with forced commands (common case)
if echo "$CLEANUP_OUTPUT" | grep -q "cpu_thermal\|coretemp\|k10temp"; then
log_warn "Cannot cleanup standalone node $HOST_CLEAN (forced command prevents cleanup)"
log_info "Standalone node keys are read-only (sensors -j) - low security risk"
log_info "Manual cleanup: ssh root@$HOST_CLEAN \"sed -i '/# pulse-proxy-key\$/d' /root/.ssh/authorized_keys\""
else
log_error "Failed to clean up SSH keys on $HOST_CLEAN: $CLEANUP_OUTPUT"
exit 1
fi
fi
fi
fi
# Remove processing file on success
rm -f "$PROCESSING_FILE"
log_info "Cleanup completed successfully"
exit 0