Fix: alert resolution now records incident timeline during quiet hours

- Fixed early return in handleAlertResolved that skipped incident recording
  when quiet hours suppressed recovery notifications
- Added Host Agent alert delay configuration (backend + UI)
- Host Agents now have dedicated time threshold settings like other resource types

Related to #1179
This commit is contained in:
rcourtman
2026-02-03 12:49:41 +00:00
parent 174ac481c8
commit 71f80c8a99
6 changed files with 35 additions and 20 deletions

View File

@@ -279,7 +279,7 @@ interface ThresholdsTableProps {
factoryHostDefaults?: Record<string, number | undefined>;
factoryDockerDefaults?: Record<string, number | undefined>;
factoryStorageDefault?: number;
timeThresholds: () => { guest: number; node: number; storage: number; pbs: number };
timeThresholds: () => { guest: number; node: number; storage: number; pbs: number; host: number };
metricTimeThresholds: () => Record<string, Record<string, number>>;
setMetricTimeThresholds: (
value:
@@ -2085,7 +2085,7 @@ export function ThresholdsTable(props: ThresholdsTableProps) {
};
const updateMetricDelay = (
typeKey: 'guest' | 'node' | 'storage' | 'pbs',
typeKey: 'guest' | 'node' | 'storage' | 'pbs' | 'host',
metricKey: string,
value: number | null,
) => {
@@ -3401,6 +3401,10 @@ export function ThresholdsTable(props: ThresholdsTableProps) {
onToggleGlobalDisableOffline={() =>
props.setDisableAllHostsOffline(!props.disableAllHostsOffline())
}
showDelayColumn={true}
globalDelaySeconds={props.timeThresholds().host}
metricDelaySeconds={props.metricTimeThresholds().host ?? {}}
onMetricDelayChange={(metric, value) => updateMetricDelay('host', metric, value)}
factoryDefaults={props.factoryHostDefaults}
onResetDefaults={props.resetHostDefaults}
/>

View File

@@ -154,7 +154,7 @@ const baseProps = () => ({
criticalSizeGiB: 0,
} as SnapshotAlertConfig,
resetSnapshotDefaults: vi.fn(),
timeThresholds: () => ({ guest: 5, node: 5, storage: 5, pbs: 5 }),
timeThresholds: () => ({ guest: 5, node: 5, storage: 5, pbs: 5, host: 5 }),
metricTimeThresholds: () => ({}),
setMetricTimeThresholds: vi.fn(),
activeAlerts: {},

View File

@@ -1001,6 +1001,7 @@ export function Alerts() {
node: DEFAULT_DELAY_SECONDS,
storage: DEFAULT_DELAY_SECONDS,
pbs: DEFAULT_DELAY_SECONDS,
host: DEFAULT_DELAY_SECONDS,
});
setMetricTimeThresholds({});
setScheduleQuietHours(createDefaultQuietHours());
@@ -1136,6 +1137,7 @@ export function Alerts() {
node: config.timeThresholds.node ?? DEFAULT_DELAY_SECONDS,
storage: config.timeThresholds.storage ?? DEFAULT_DELAY_SECONDS,
pbs: config.timeThresholds.pbs ?? DEFAULT_DELAY_SECONDS,
host: config.timeThresholds.host ?? DEFAULT_DELAY_SECONDS,
});
} else {
const fallback = config.timeThreshold && config.timeThreshold > 0 ? config.timeThreshold : DEFAULT_DELAY_SECONDS;
@@ -1144,6 +1146,7 @@ export function Alerts() {
node: fallback,
storage: fallback,
pbs: fallback,
host: fallback,
});
}
if (config.metricTimeThresholds) {
@@ -1615,6 +1618,7 @@ export function Alerts() {
node: DEFAULT_DELAY_SECONDS,
storage: DEFAULT_DELAY_SECONDS,
pbs: DEFAULT_DELAY_SECONDS,
host: DEFAULT_DELAY_SECONDS,
});
const [metricTimeThresholds, setMetricTimeThresholds] =
createSignal<Record<string, Record<string, number>>>({});
@@ -2907,7 +2911,7 @@ interface ThresholdsTabProps {
guestTagWhitelist: () => string[];
guestTagBlacklist: () => string[];
storageDefault: () => number;
timeThresholds: () => { guest: number; node: number; storage: number; pbs: number };
timeThresholds: () => { guest: number; node: number; storage: number; pbs: number; host: number };
metricTimeThresholds: () => Record<string, Record<string, number>>;
overrides: () => Override[];
rawOverridesConfig: () => Record<string, RawOverrideConfig>;

View File

@@ -145,6 +145,7 @@ export interface AlertConfig {
node?: number;
storage?: number;
pbs?: number;
host?: number;
};
metricTimeThresholds?: Record<string, Record<string, number>>;
aggregation?: {

View File

@@ -1475,6 +1475,7 @@ func normalizeTimeThresholds(config *AlertConfig) {
ensureDelay("node")
ensureDelay("storage")
ensureDelay("pbs")
ensureDelay("host")
if delay, ok := config.TimeThresholds["all"]; ok && delay < 0 {
config.TimeThresholds["all"] = defaultDelaySeconds
}

View File

@@ -8649,29 +8649,17 @@ func (m *Monitor) handleAlertResolved(alertID string) {
if m.wsHub != nil {
m.wsHub.BroadcastAlertResolved(alertID)
}
if m.notificationMgr != nil {
m.notificationMgr.CancelAlert(alertID)
if m.notificationMgr.GetNotifyOnResolve() {
if resolved := m.alertManager.GetResolvedAlert(alertID); resolved != nil {
resolvedAlert = resolved
// Check if recovery notification should be suppressed during quiet hours
if m.alertManager.ShouldSuppressResolvedNotification(resolved.Alert) {
return
}
go m.notificationMgr.SendResolvedAlert(resolved)
}
}
}
// Always record incident timeline, regardless of notification suppression.
// This ensures we have a complete history even during quiet hours.
if m.incidentStore != nil {
if resolvedAlert == nil {
resolvedAlert = m.alertManager.GetResolvedAlert(alertID)
}
resolvedAlert = m.alertManager.GetResolvedAlert(alertID)
if resolvedAlert != nil && resolvedAlert.Alert != nil {
m.incidentStore.RecordAlertResolved(resolvedAlert.Alert, resolvedAlert.ResolvedTime)
}
}
// Always trigger AI callback, regardless of notification suppression.
if m.alertResolvedAICallback != nil {
if resolvedAlert == nil {
resolvedAlert = m.alertManager.GetResolvedAlert(alertID)
@@ -8680,6 +8668,23 @@ func (m *Monitor) handleAlertResolved(alertID string) {
go m.alertResolvedAICallback(resolvedAlert.Alert)
}
}
// Handle notifications (may be suppressed by quiet hours)
if m.notificationMgr != nil {
m.notificationMgr.CancelAlert(alertID)
if m.notificationMgr.GetNotifyOnResolve() {
if resolvedAlert == nil {
resolvedAlert = m.alertManager.GetResolvedAlert(alertID)
}
if resolvedAlert != nil {
// Check if recovery notification should be suppressed during quiet hours
if m.alertManager.ShouldSuppressResolvedNotification(resolvedAlert.Alert) {
return
}
go m.notificationMgr.SendResolvedAlert(resolvedAlert)
}
}
}
}
func (m *Monitor) handleAlertAcknowledged(alert *alerts.Alert, user string) {