fix: Preserve alert acknowledgement across transient clears

When a powered-off VM is backed up by Proxmox, the VM status briefly
changes (e.g., to "running" during backup). This caused the powered-off
alert to be cleared, deleting the ackState record. When the backup
completed and the alert was recreated, it appeared as a new unacknowledged
alert, generating a new notification.

The fix preserves ackState when alerts are removed, allowing
preserveAlertState to restore the acknowledgement when the same alert
reappears. Stale ackState entries (for alerts that don't exist) are
cleaned up after 1 hour.

Related to #937
This commit is contained in:
rcourtman
2025-12-28 10:24:04 +00:00
parent fb8ba32b97
commit 3830e701b4

View File

@@ -6159,7 +6159,9 @@ func (m *Manager) preserveAlertState(alertID string, updated *Alert) {
func (m *Manager) removeActiveAlertNoLock(alertID string) {
delete(m.activeAlerts, alertID)
delete(m.ackState, alertID)
// NOTE: Don't delete ackState here - preserve it so if the same alert
// reappears (e.g., powered-off VM during backup), the acknowledgement
// is restored via preserveAlertState. ackState is cleaned up in Cleanup().
}
// GetActiveAlerts returns all active alerts
@@ -7945,6 +7947,17 @@ func (m *Manager) Cleanup(maxAge time.Duration) {
}
}
// Clean up stale ackState entries for alerts that no longer exist
// Keep ackState for 1 hour to handle transient alert clears (e.g., backups)
ackStateTTL := 1 * time.Hour
for id, record := range m.ackState {
if _, alertExists := m.activeAlerts[id]; !alertExists {
if now.Sub(record.time) > ackStateTTL {
delete(m.ackState, id)
}
}
}
// Clean up recent alerts older than suppression window
suppressionWindow := time.Duration(m.config.SuppressionWindow) * time.Minute
if suppressionWindow == 0 {