diff --git a/frontend-modern/src/stores/websocket.ts b/frontend-modern/src/stores/websocket.ts index 1d613d530..efbb51251 100644 --- a/frontend-modern/src/stores/websocket.ts +++ b/frontend-modern/src/stores/websocket.ts @@ -99,6 +99,11 @@ export function createWebSocketStore(url: string) { let consecutiveEmptyHostUpdates = 0; let hasReceivedNonEmptyHosts = false; + // Track consecutive empty Kubernetes clusters payloads (same protection as dockerHosts/hosts) + // This prevents clusters from disappearing when transient empty arrays are received. + let consecutiveEmptyK8sUpdates = 0; + let hasReceivedNonEmptyK8sClusters = false; + const mergeDockerHostRevocations = (incomingHosts: DockerHost[]) => { if (!Array.isArray(incomingHosts) || incomingHosts.length === 0) { return incomingHosts; @@ -311,6 +316,8 @@ export function createWebSocketStore(url: string) { hasReceivedNonEmptyDockerHosts = false; consecutiveEmptyHostUpdates = 0; hasReceivedNonEmptyHosts = false; + consecutiveEmptyK8sUpdates = 0; + hasReceivedNonEmptyK8sClusters = false; // Start heartbeat to keep connection alive if (heartbeatInterval) { @@ -597,11 +604,53 @@ export function createWebSocketStore(url: string) { : []; setState('removedDockerHosts', reconcile(removed, { key: 'id' })); } + // Process Kubernetes clusters with transient empty payload protection + // (same logic as dockerHosts/hosts to prevent UI flapping) if (message.data.kubernetesClusters !== undefined) { - const clusters = Array.isArray(message.data.kubernetesClusters) - ? (message.data.kubernetesClusters as KubernetesCluster[]) - : []; - setState('kubernetesClusters', reconcile(clusters, { key: 'id' })); + if (Array.isArray(message.data.kubernetesClusters)) { + const incomingClusters = message.data.kubernetesClusters as KubernetesCluster[]; + if (incomingClusters.length === 0) { + consecutiveEmptyK8sUpdates += 1; + + // Check if all existing clusters are stale (>60s since lastSeen) + // If so, they're probably really gone - apply the empty update immediately + const now = Date.now(); + const staleThresholdMs = 60_000; // 60 seconds + const existingClusters = state.kubernetesClusters || []; + const allStale = existingClusters.length === 0 || existingClusters.every( + (c) => !c.lastSeen || (now - c.lastSeen) > staleThresholdMs + ); + + const shouldApply = + !hasReceivedNonEmptyK8sClusters || + allStale || + consecutiveEmptyK8sUpdates >= 3 || + message.type === WEBSOCKET.MESSAGE_TYPES.INITIAL_STATE; + + if (shouldApply) { + logger.debug('[WebSocket] Updating kubernetesClusters', { + count: incomingClusters.length, + reason: allStale ? 'allStale' : 'threshold', + }); + setState('kubernetesClusters', reconcile(incomingClusters, { key: 'id' })); + } else { + logger.debug('[WebSocket] Skipping transient empty kubernetesClusters payload', { + streak: consecutiveEmptyK8sUpdates, + }); + } + } else { + consecutiveEmptyK8sUpdates = 0; + hasReceivedNonEmptyK8sClusters = true; + logger.debug('[WebSocket] Updating kubernetesClusters', { + count: incomingClusters.length, + }); + setState('kubernetesClusters', reconcile(incomingClusters, { key: 'id' })); + } + } else { + logger.warn('[WebSocket] Received non-array kubernetesClusters payload', { + type: typeof message.data.kubernetesClusters, + }); + } } if (message.data.removedKubernetesClusters !== undefined) { const removed = Array.isArray(message.data.removedKubernetesClusters) diff --git a/internal/hostmetrics/zfs.go b/internal/hostmetrics/zfs.go index bb57b419b..7062b190f 100644 --- a/internal/hostmetrics/zfs.go +++ b/internal/hostmetrics/zfs.go @@ -74,22 +74,26 @@ func disksFromZpoolStats( // For RAIDZ/mirror pools, zpool SIZE is raw capacity (sum of all disks), // but users expect usable capacity (accounting for parity/redundancy). // The dataset's Total (from statfs) gives usable capacity. - // Use dataset stats when available and smaller than zpool size. (issue #1052) + // Similarly, zpool ALLOC includes parity overhead, but dataset Used gives + // actual data usage. Use dataset stats when available and smaller than + // zpool size. (issue #1052) totalBytes := stat.Size + usedBytes := stat.Alloc freeBytes := stat.Free if ds.Total > 0 && ds.Total < stat.Size { totalBytes = ds.Total + usedBytes = ds.Used freeBytes = ds.Free } - usage := clampPercent(calculatePercent(totalBytes, stat.Alloc)) + usage := clampPercent(calculatePercent(totalBytes, usedBytes)) disks = append(disks, agentshost.Disk{ Device: pool, Mountpoint: mp, Filesystem: "zfs", Type: "zfs", TotalBytes: int64(totalBytes), - UsedBytes: int64(stat.Alloc), + UsedBytes: int64(usedBytes), FreeBytes: int64(freeBytes), Usage: usage, }) diff --git a/internal/hostmetrics/zfs_test.go b/internal/hostmetrics/zfs_test.go index 56b6cd2c7..f5b722db2 100644 --- a/internal/hostmetrics/zfs_test.go +++ b/internal/hostmetrics/zfs_test.go @@ -53,15 +53,17 @@ func TestSummarizeZFSPoolsRAIDZCapacity(t *testing.T) { // Simulate a RAIDZ1 pool with 3 disks: // - Raw SIZE from zpool list: 43.6 TB (sum of all disks) // - Usable capacity from statfs: 29 TB (after RAIDZ1 parity overhead) + // - zpool ALLOC: 7 GB (includes parity data) + // - zfs USED: 4.6 GB (actual user data) queryZpoolStats = func(ctx context.Context, pools []string) (map[string]zpoolStats, error) { return map[string]zpoolStats{ - "Main": {Size: 43600000000000, Alloc: 962000000, Free: 43599038000000}, + "Main": {Size: 43600000000000, Alloc: 7000000000, Free: 43593000000000}, }, nil } - // Dataset stats from statfs reflect usable capacity (29 TB) + // Dataset stats from statfs reflect usable capacity (29 TB) and actual data usage (4.6 GB) datasets := []zfsDatasetUsage{ - {Pool: "Main", Dataset: "Main", Mountpoint: "/mnt/Main", Total: 29000000000000, Used: 962000000, Free: 28999038000000}, + {Pool: "Main", Dataset: "Main", Mountpoint: "/mnt/Main", Total: 29000000000000, Used: 4600000000, Free: 28995400000000}, } disks := summarizeZFSPools(context.Background(), datasets) @@ -80,20 +82,20 @@ func TestSummarizeZFSPoolsRAIDZCapacity(t *testing.T) { t.Errorf("expected TotalBytes %d (usable capacity), got %d (might be using raw capacity)", expectedTotal, main.TotalBytes) } - // Used should come from zpool stats (accurate allocation) - expectedUsed := int64(962000000) + // Used should come from dataset stats (4.6 GB actual data), not zpool alloc (7 GB with parity) + expectedUsed := int64(4600000000) if main.UsedBytes != expectedUsed { - t.Errorf("expected UsedBytes %d, got %d", expectedUsed, main.UsedBytes) + t.Errorf("expected UsedBytes %d (dataset used), got %d (might be using zpool alloc which includes parity)", expectedUsed, main.UsedBytes) } // Free should use dataset stats when we're using dataset Total - expectedFree := int64(28999038000000) + expectedFree := int64(28995400000000) if main.FreeBytes != expectedFree { t.Errorf("expected FreeBytes %d, got %d", expectedFree, main.FreeBytes) } - // Usage should be calculated against usable capacity - // 962000000 / 29000000000000 * 100 ≈ 0.003% + // Usage should be calculated against usable capacity with actual used data + // 4600000000 / 29000000000000 * 100 ≈ 0.016% if main.Usage > 0.1 { t.Errorf("expected usage ~0%%, got %.2f%% (might be calculated against wrong total)", main.Usage) }