mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-02-18 00:17:39 +01:00
287 lines
12 KiB
Bash
Executable File
287 lines
12 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# pulse-sensor-cleanup.sh - Complete Pulse footprint removal when nodes are removed
|
|
# Removes: SSH keys, proxy service, binaries, API tokens, and LXC bind mounts
|
|
# This script is triggered by systemd path unit when cleanup-request.json is created
|
|
|
|
set -euo pipefail
|
|
|
|
# Configuration
|
|
WORK_DIR="/var/lib/pulse-sensor-proxy"
|
|
CLEANUP_REQUEST="${WORK_DIR}/cleanup-request.json"
|
|
LOCKFILE="${WORK_DIR}/cleanup.lock"
|
|
LOG_TAG="pulse-sensor-cleanup"
|
|
INSTALLER_PATH="/opt/pulse/sensor-proxy/install-sensor-proxy.sh"
|
|
|
|
# Logging functions
|
|
log_info() {
|
|
logger -t "$LOG_TAG" -p user.info "$1"
|
|
echo "[INFO] $1"
|
|
}
|
|
|
|
log_warn() {
|
|
logger -t "$LOG_TAG" -p user.warning "$1"
|
|
echo "[WARN] $1"
|
|
}
|
|
|
|
log_error() {
|
|
logger -t "$LOG_TAG" -p user.err "$1"
|
|
echo "[ERROR] $1" >&2
|
|
}
|
|
|
|
# Acquire exclusive lock to prevent concurrent cleanup runs
|
|
exec 200>"$LOCKFILE"
|
|
if ! flock -n 200; then
|
|
log_info "Another cleanup instance is running, exiting"
|
|
exit 0
|
|
fi
|
|
|
|
# Check if cleanup request file exists
|
|
if [[ ! -f "$CLEANUP_REQUEST" ]]; then
|
|
log_info "No cleanup request found at $CLEANUP_REQUEST"
|
|
exit 0
|
|
fi
|
|
|
|
log_info "Processing cleanup request from $CLEANUP_REQUEST"
|
|
|
|
# Read and parse the cleanup request
|
|
CLEANUP_DATA=$(cat "$CLEANUP_REQUEST")
|
|
HOST=$(echo "$CLEANUP_DATA" | grep -o '"host":"[^"]*"' | cut -d'"' -f4 || echo "")
|
|
REQUESTED_AT=$(echo "$CLEANUP_DATA" | grep -o '"requestedAt":"[^"]*"' | cut -d'"' -f4 || echo "")
|
|
|
|
log_info "Cleanup requested at: ${REQUESTED_AT:-unknown}"
|
|
|
|
# Rename request file to .processing to prevent re-triggering while allowing retry on failure
|
|
PROCESSING_FILE="${CLEANUP_REQUEST}.processing"
|
|
mv "$CLEANUP_REQUEST" "$PROCESSING_FILE" 2>/dev/null || {
|
|
log_warn "Failed to rename cleanup request file, may have been processed by another instance"
|
|
exit 0
|
|
}
|
|
|
|
# If no specific host was provided, clean up all known nodes
|
|
if [[ -z "$HOST" ]]; then
|
|
log_info "No specific host provided - cleaning up all cluster nodes"
|
|
|
|
# Discover cluster nodes
|
|
if command -v pvecm >/dev/null 2>&1; then
|
|
CLUSTER_NODES=$(pvecm status 2>/dev/null | grep -vEi "qdevice" | awk '/0x[0-9a-f]+.*[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/) print $i}' || true)
|
|
|
|
if [[ -n "$CLUSTER_NODES" ]]; then
|
|
for node_ip in $CLUSTER_NODES; do
|
|
log_info "Cleaning up SSH keys on node $node_ip"
|
|
|
|
# Remove both pulse-managed-key and pulse-proxy-key entries
|
|
ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=5 root@"$node_ip" \
|
|
"sed -i -e '/# pulse-managed-key\$/d' -e '/# pulse-proxy-key\$/d' /root/.ssh/authorized_keys" 2>&1 | \
|
|
logger -t "$LOG_TAG" -p user.info || \
|
|
log_warn "Failed to clean up SSH keys on $node_ip"
|
|
done
|
|
log_info "Cluster cleanup completed"
|
|
else
|
|
# Standalone node - clean up localhost
|
|
log_info "Standalone node detected - cleaning up localhost"
|
|
sed -i -e '/# pulse-managed-key$/d' -e '/# pulse-proxy-key$/d' /root/.ssh/authorized_keys 2>&1 | \
|
|
logger -t "$LOG_TAG" -p user.info || \
|
|
log_warn "Failed to clean up SSH keys on localhost"
|
|
fi
|
|
else
|
|
log_warn "pvecm command not available - cleaning up localhost only"
|
|
sed -i -e '/# pulse-managed-key$/d' -e '/# pulse-proxy-key$/d' /root/.ssh/authorized_keys 2>&1 | \
|
|
logger -t "$LOG_TAG" -p user.info || \
|
|
log_warn "Failed to clean up SSH keys on localhost"
|
|
fi
|
|
else
|
|
log_info "Cleaning up specific host: $HOST"
|
|
|
|
# Extract hostname/IP from host URL
|
|
HOST_CLEAN=$(echo "$HOST" | sed -e 's|^https\?://||' -e 's|:.*$||')
|
|
|
|
# Check if this is localhost (by IP, hostname, or FQDN)
|
|
LOCAL_IPS=$(hostname -I 2>/dev/null || echo "")
|
|
LOCAL_HOSTNAME=$(hostname 2>/dev/null || echo "")
|
|
LOCAL_FQDN=$(hostname -f 2>/dev/null || echo "")
|
|
IS_LOCAL=false
|
|
|
|
# Check against all local IPs
|
|
for local_ip in $LOCAL_IPS; do
|
|
if [[ "$HOST_CLEAN" == "$local_ip" ]]; then
|
|
IS_LOCAL=true
|
|
break
|
|
fi
|
|
done
|
|
|
|
# Check against hostname and FQDN
|
|
if [[ "$HOST_CLEAN" == "127.0.0.1" || "$HOST_CLEAN" == "localhost" || \
|
|
"$HOST_CLEAN" == "$LOCAL_HOSTNAME" || "$HOST_CLEAN" == "$LOCAL_FQDN" ]]; then
|
|
IS_LOCAL=true
|
|
fi
|
|
|
|
if [[ "$IS_LOCAL" == true ]]; then
|
|
log_info "Performing full cleanup on localhost"
|
|
|
|
# 1. Remove SSH keys
|
|
log_info "Removing SSH keys from authorized_keys"
|
|
sed -i -e '/# pulse-managed-key$/d' -e '/# pulse-proxy-key$/d' /root/.ssh/authorized_keys 2>&1 | \
|
|
logger -t "$LOG_TAG" -p user.info || \
|
|
log_warn "Failed to clean up SSH keys"
|
|
|
|
# 2. Delete API tokens and user
|
|
log_info "Removing Proxmox API tokens and pulse-monitor user"
|
|
if command -v pveum >/dev/null 2>&1; then
|
|
# Try JSON output first (pveum with --output-format json)
|
|
TOKEN_IDS=""
|
|
if command -v python3 >/dev/null 2>&1; then
|
|
# Try pveum with JSON output
|
|
if TOKEN_JSON=$(pveum user token list pulse-monitor@pam --output-format json 2>/dev/null); then
|
|
TOKEN_IDS=$(echo "$TOKEN_JSON" | python3 -c '
|
|
import sys, json
|
|
try:
|
|
data = json.load(sys.stdin)
|
|
if isinstance(data, list):
|
|
for item in data:
|
|
if "tokenid" in item:
|
|
print(item["tokenid"])
|
|
except: pass
|
|
' || true)
|
|
fi
|
|
fi
|
|
|
|
# Fall back to pvesh JSON API if pveum JSON didn't work
|
|
if [[ -z "$TOKEN_IDS" ]] && command -v pvesh >/dev/null 2>&1; then
|
|
if TOKEN_JSON=$(pvesh get /access/users/pulse-monitor@pam/token 2>/dev/null); then
|
|
TOKEN_IDS=$(echo "$TOKEN_JSON" | python3 -c '
|
|
import sys, json
|
|
try:
|
|
data = json.load(sys.stdin)
|
|
if isinstance(data, dict) and "data" in data:
|
|
for item in data["data"]:
|
|
if "tokenid" in item:
|
|
print(item["tokenid"])
|
|
except: pass
|
|
' 2>/dev/null || true)
|
|
fi
|
|
fi
|
|
|
|
# Last resort: parse table output with better filtering
|
|
if [[ -z "$TOKEN_IDS" ]]; then
|
|
TOKEN_IDS=$(pveum user token list pulse-monitor@pam 2>/dev/null | \
|
|
awk 'NR>1 && /^[[:space:]]*pulse/ {print $1}' | grep -v '^[│┌└╞─]' | grep -v '^$' || true)
|
|
fi
|
|
|
|
if [[ -n "$TOKEN_IDS" ]]; then
|
|
for token_id in $TOKEN_IDS; do
|
|
log_info "Deleting API token: $token_id"
|
|
pveum user token remove pulse-monitor@pam "${token_id}" 2>&1 | \
|
|
logger -t "$LOG_TAG" -p user.info || \
|
|
log_warn "Failed to delete token $token_id"
|
|
done
|
|
else
|
|
log_info "No API tokens found for pulse-monitor@pam"
|
|
fi
|
|
|
|
# Remove the pulse-monitor user
|
|
log_info "Removing pulse-monitor@pam user"
|
|
pveum user delete pulse-monitor@pam 2>&1 | \
|
|
logger -t "$LOG_TAG" -p user.info || \
|
|
log_warn "pulse-monitor@pam user not found or already removed"
|
|
else
|
|
log_warn "pveum command not available, skipping API token cleanup"
|
|
fi
|
|
|
|
# 3. Remove LXC bind mounts
|
|
log_info "Removing LXC bind mounts from container configs"
|
|
if command -v pct >/dev/null 2>&1; then
|
|
for ctid in $(pct list 2>/dev/null | awk 'NR>1 {print $1}' || true); do
|
|
CONF_FILE="/etc/pve/lxc/${ctid}.conf"
|
|
if [[ -f "$CONF_FILE" ]]; then
|
|
# Find pulse-sensor-proxy mount points and remove them using pct
|
|
for mp_key in $(grep -o "^mp[0-9]\+:" "$CONF_FILE" | grep -f <(grep "pulse-sensor-proxy" "$CONF_FILE" | grep -o "^mp[0-9]\+:") || true); do
|
|
mp_num="${mp_key%:}"
|
|
log_info "Removing ${mp_num} (pulse-sensor-proxy) from container $ctid"
|
|
if pct set "$ctid" -delete "${mp_num}" 2>&1 | logger -t "$LOG_TAG" -p user.info; then
|
|
log_info "Successfully removed ${mp_num} from container $ctid"
|
|
else
|
|
log_warn "Failed to remove ${mp_num} from container $ctid"
|
|
fi
|
|
done
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# 4. Uninstall proxy service and remove binaries via isolated transient unit
|
|
log_info "Starting full uninstallation (service, binaries, configs)"
|
|
if [[ -x "$INSTALLER_PATH" ]]; then
|
|
# Use systemd-run to create isolated transient unit that won't be killed
|
|
# when we stop pulse-sensor-proxy.service
|
|
if command -v systemd-run >/dev/null 2>&1; then
|
|
# Use UUID for unique unit name (prevents same-second collisions)
|
|
UNINSTALL_UUID=$(cat /proc/sys/kernel/random/uuid 2>/dev/null || date +%s%N)
|
|
UNINSTALL_UNIT="pulse-uninstall-${UNINSTALL_UUID}"
|
|
log_info "Spawning isolated uninstaller unit: $UNINSTALL_UNIT"
|
|
|
|
systemd-run \
|
|
--unit="${UNINSTALL_UNIT}" \
|
|
--property="Type=oneshot" \
|
|
--property="Conflicts=pulse-sensor-proxy.service" \
|
|
--collect \
|
|
--wait \
|
|
--quiet \
|
|
-- bash -c "$INSTALLER_PATH --uninstall --purge --quiet >> /var/log/pulse/sensor-proxy/uninstall.log 2>&1" \
|
|
2>&1 | logger -t "$LOG_TAG" -p user.info
|
|
|
|
UNINSTALL_EXIT=$?
|
|
if [[ $UNINSTALL_EXIT -eq 0 ]]; then
|
|
log_info "Uninstaller completed successfully"
|
|
else
|
|
log_error "Uninstaller failed with exit code $UNINSTALL_EXIT"
|
|
exit 1
|
|
fi
|
|
else
|
|
log_warn "systemd-run not available, attempting direct uninstall (may fail)"
|
|
bash "$INSTALLER_PATH" --uninstall --quiet >> /var/log/pulse/sensor-proxy/uninstall.log 2>&1 || \
|
|
log_error "Uninstaller failed - manual cleanup may be required"
|
|
fi
|
|
else
|
|
log_warn "Installer not found at $INSTALLER_PATH, cannot run uninstaller"
|
|
log_info "Manual cleanup required: systemctl stop pulse-sensor-proxy && systemctl disable pulse-sensor-proxy"
|
|
fi
|
|
|
|
log_info "Localhost cleanup initiated (uninstaller running in background)"
|
|
else
|
|
log_info "Cleaning up remote host: $HOST_CLEAN"
|
|
|
|
# Try to use proxy's SSH key first (for standalone nodes), fall back to default
|
|
PROXY_KEY="/var/lib/pulse-sensor-proxy/ssh/id_ed25519"
|
|
SSH_CMD="ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=5"
|
|
|
|
if [[ -f "$PROXY_KEY" ]]; then
|
|
log_info "Using proxy SSH key for cleanup"
|
|
SSH_CMD="$SSH_CMD -i $PROXY_KEY"
|
|
fi
|
|
|
|
# Remove both pulse-managed-key and pulse-proxy-key entries from remote host
|
|
CLEANUP_OUTPUT=$($SSH_CMD root@"$HOST_CLEAN" \
|
|
"sed -i -e '/# pulse-managed-key\$/d' -e '/# pulse-proxy-key\$/d' /root/.ssh/authorized_keys && echo 'SUCCESS'" 2>&1)
|
|
|
|
if echo "$CLEANUP_OUTPUT" | grep -q "SUCCESS"; then
|
|
log_info "Successfully cleaned up SSH keys on $HOST_CLEAN"
|
|
else
|
|
# Check if this is a standalone node with forced commands (common case)
|
|
if echo "$CLEANUP_OUTPUT" | grep -q "cpu_thermal\|coretemp\|k10temp"; then
|
|
log_warn "Cannot cleanup standalone node $HOST_CLEAN (forced command prevents cleanup)"
|
|
log_info "Standalone node keys are read-only (sensors -j) - low security risk"
|
|
log_info "Manual cleanup: ssh root@$HOST_CLEAN \"sed -i '/# pulse-proxy-key\$/d' /root/.ssh/authorized_keys\""
|
|
else
|
|
log_error "Failed to clean up SSH keys on $HOST_CLEAN: $CLEANUP_OUTPUT"
|
|
exit 1
|
|
fi
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
# Remove processing file on success
|
|
rm -f "$PROCESSING_FILE"
|
|
|
|
log_info "Cleanup completed successfully"
|
|
exit 0
|