fix: Deduplicate disks by device+total to fix Synology storage overcounting. Related to #953

Synology NAS creates multiple shared folders (e.g., /volume1/docker, /volume1/photos) that are all mount points on the same underlying BTRFS volume. Each reported the same 16TB total, causing Pulse to show 64TB+ instead of 16TB. The fix tracks device+total combinations and only counts each unique pair once. When duplicates are found, the shallowest mountpoint (e.g., /volume1) is preferred. Added a unit test to verify the deduplication works correctly.
2026-02-18 00:17:39 +01:00 · 2025-12-29 14:03:32 +00:00
parent fd1f94babf
commit 4ce1d551e4
4 changed files with 437 additions and 19 deletions
--- a/internal/ai/demo_test.go
+++ b/internal/ai/demo_test.go
@@ -22,6 +22,9 @@ func TestIsMockResource(t *testing.T) {
 	if !IsMockResource("node/pve1", "", "") {
 		t.Fatal("expected mock resource to be detected for pattern match")
 	}
+	if IsMockResource("", "", "") {
+		t.Fatal("expected empty resource values to be treated as non-mock")
+	}
 	if IsMockResource("node/prod1", "production", "node1") {
 		t.Fatal("expected non-mock resource to return false")
 	}
@@ -71,7 +74,7 @@ func TestGenerateDemoAIResponse(t *testing.T) {
 		{"backup", "pbs backup status", "Backup Status Review"},
 		{"cpu", "cpu load is high", "CPU/Performance Analysis"},
 		{"hello", "hello there", "Pulse AI Assistant"},
-		{"default", "tell me something", "This Demo Shows"},
+		{"default", "status report", "This Demo Shows"},
 	}

 	for _, tt := range tests {
--- a/internal/ai/kubernetes_analysis_test.go
+++ b/internal/ai/kubernetes_analysis_test.go
@@ -1,9 +1,14 @@
 package ai

 import (
+	"context"
+	"errors"
+	"strings"
 	"testing"
 	"time"

+	"github.com/rcourtman/pulse-go-rewrite/internal/config"
+	"github.com/rcourtman/pulse-go-rewrite/internal/ai/providers"
 	"github.com/rcourtman/pulse-go-rewrite/internal/models"
 )

@@ -237,6 +242,22 @@ func TestKubernetesPodReason(t *testing.T) {
 			},
 			contains: []string{"containers=app"},
 		},
+		{
+			name: "pod with message and multiple containers",
+			pod: models.KubernetesPod{
+				Phase:    "Failed",
+				Message:  strings.Repeat("x", maxKubernetesMessageLength+10),
+				Restarts: 3,
+				Containers: []models.KubernetesPodContainer{
+					{Name: "ok", Ready: true, State: "running"},
+					{Name: "one", Ready: false, State: "waiting"},
+					{Name: "two", Ready: false, State: "terminated"},
+					{Name: "three", Ready: false, State: "waiting"},
+					{Name: "four", Ready: false, State: "waiting"},
+				},
+			},
+			contains: []string{"message=", "containers=", "restarts=3"},
+		},
 	}

 	for _, tt := range tests {
@@ -315,6 +336,16 @@ func TestIsKubernetesDeploymentHealthy(t *testing.T) {
 			},
 			expected: false,
 		},
+		{
+			name: "not all updated",
+			deployment: models.KubernetesDeployment{
+				DesiredReplicas:   3,
+				ReadyReplicas:     3,
+				AvailableReplicas: 3,
+				UpdatedReplicas:   2,
+			},
+			expected: false,
+		},
 	}

 	for _, tt := range tests {
@@ -465,6 +496,277 @@ func TestFormatPodRestarts(t *testing.T) {
 			t.Errorf("Expected 2 lines (limit), got %d", len(result))
 		}
 	})
+
+	t.Run("ties sort by name", func(t *testing.T) {
+		issues := []podIssue{
+			{namespace: "ns", name: "b-pod", restarts: 3},
+			{namespace: "ns", name: "a-pod", restarts: 3},
+		}
+		result := formatPodRestarts(issues, 10)
+		if len(result) != 2 {
+			t.Fatalf("Expected 2 lines, got %d", len(result))
+		}
+		if !strings.Contains(result[0], "a-pod") {
+			t.Fatalf("Expected tie-breaker to sort by name, got %q", result[0])
+		}
+	})
 }

+// ========================================
+// findKubernetesCluster tests
+// ========================================

+func TestFindKubernetesCluster(t *testing.T) {
+	clusters := []models.KubernetesCluster{
+		{ID: "cluster-1"},
+		{ID: "cluster-2"},
+	}
+
+	cluster, ok := findKubernetesCluster(clusters, "cluster-2")
+	if !ok || cluster.ID != "cluster-2" {
+		t.Fatalf("expected to find cluster-2, got ok=%t id=%s", ok, cluster.ID)
+	}
+
+	if _, ok := findKubernetesCluster(clusters, "missing"); ok {
+		t.Fatal("expected missing cluster to return ok=false")
+	}
+}
+
+// ========================================
+// kubernetesClusterDisplayName tests
+// ========================================
+
+func TestKubernetesClusterDisplayName(t *testing.T) {
+	cluster := models.KubernetesCluster{
+		ID:                "id",
+		Name:              "name",
+		DisplayName:       "display",
+		CustomDisplayName: "custom",
+	}
+	if got := kubernetesClusterDisplayName(cluster); got != "custom" {
+		t.Fatalf("expected custom display name, got %s", got)
+	}
+
+	cluster.CustomDisplayName = ""
+	if got := kubernetesClusterDisplayName(cluster); got != "display" {
+		t.Fatalf("expected display name, got %s", got)
+	}
+
+	cluster.DisplayName = ""
+	if got := kubernetesClusterDisplayName(cluster); got != "name" {
+		t.Fatalf("expected name, got %s", got)
+	}
+
+	cluster.Name = ""
+	if got := kubernetesClusterDisplayName(cluster); got != "id" {
+		t.Fatalf("expected id, got %s", got)
+	}
+}
+
+// ========================================
+// summarizeKubernetesNodes tests
+// ========================================
+
+func TestSummarizeKubernetesNodes(t *testing.T) {
+	nodes := []models.KubernetesNode{
+		{Name: "node-1", Ready: true},
+		{Name: "node-2", Ready: false, Unschedulable: true},
+	}
+
+	summary, issues := summarizeKubernetesNodes(nodes)
+	if !strings.Contains(summary, "2 total, 1 ready, 1 not ready, 1 unschedulable") {
+		t.Fatalf("unexpected summary: %s", summary)
+	}
+	if len(issues) != 1 || !strings.Contains(issues[0], "node-2") {
+		t.Fatalf("unexpected issues: %+v", issues)
+	}
+}
+
+func TestSummarizeKubernetesNodes_Truncates(t *testing.T) {
+	nodes := make([]models.KubernetesNode, maxKubernetesNodeIssues+1)
+	for i := range nodes {
+		nodes[i] = models.KubernetesNode{Name: "node-" + string(rune('a'+i)), Ready: false}
+	}
+
+	_, issues := summarizeKubernetesNodes(nodes)
+	if len(issues) != maxKubernetesNodeIssues+1 {
+		t.Fatalf("expected truncated issues list, got %d", len(issues))
+	}
+	if !strings.Contains(issues[len(issues)-1], "and 1 more") {
+		t.Fatalf("expected truncation message, got %s", issues[len(issues)-1])
+	}
+}
+
+// ========================================
+// summarizeKubernetesPods tests
+// ========================================
+
+func TestSummarizeKubernetesPods(t *testing.T) {
+	pods := []models.KubernetesPod{
+		{
+			Name:      "ok",
+			Namespace: "default",
+			Phase:     "Running",
+			Containers: []models.KubernetesPodContainer{
+				{Name: "app", Ready: true, State: "running"},
+			},
+		},
+		{
+			Name:      "bad",
+			Namespace: "default",
+			Phase:     "Running",
+			Restarts:  2,
+			Containers: []models.KubernetesPodContainer{
+				{Name: "app", Ready: false, State: "waiting", Reason: "CrashLoopBackOff"},
+			},
+		},
+		{Name: "pending", Namespace: "default", Phase: "Pending"},
+		{Name: "failed", Namespace: "default", Phase: "Failed", Reason: "Error", Message: "failed", Restarts: 1},
+		{Name: "unknown", Namespace: "default", Phase: ""},
+		{Name: "succeeded", Namespace: "default", Phase: "Succeeded"},
+	}
+
+	summary, issues, restarts := summarizeKubernetesPods(pods)
+	if !strings.Contains(summary, "6 total") || !strings.Contains(summary, "2 running") || !strings.Contains(summary, "1 pending") ||
+		!strings.Contains(summary, "1 failed") || !strings.Contains(summary, "1 succeeded") || !strings.Contains(summary, "1 unknown") {
+		t.Fatalf("unexpected summary: %s", summary)
+	}
+	if len(issues) == 0 {
+		t.Fatalf("expected pod issues, got none")
+	}
+	if len(restarts) != 2 {
+		t.Fatalf("expected restart leaders, got %d", len(restarts))
+	}
+}
+
+// ========================================
+// summarizeKubernetesDeployments tests
+// ========================================
+
+func TestSummarizeKubernetesDeployments(t *testing.T) {
+	deployments := []models.KubernetesDeployment{
+		{Namespace: "default", Name: "ok", DesiredReplicas: 2, ReadyReplicas: 2, UpdatedReplicas: 2, AvailableReplicas: 2},
+		{Namespace: "default", Name: "bad", DesiredReplicas: 2, ReadyReplicas: 1, UpdatedReplicas: 1, AvailableReplicas: 1},
+	}
+
+	summary, issues := summarizeKubernetesDeployments(deployments)
+	if !strings.Contains(summary, "2 total, 1 healthy, 1 unhealthy") {
+		t.Fatalf("unexpected summary: %s", summary)
+	}
+	if len(issues) != 1 || !strings.Contains(issues[0], "default/bad") {
+		t.Fatalf("unexpected issues: %+v", issues)
+	}
+}
+
+func TestSummarizeKubernetesDeployments_Truncates(t *testing.T) {
+	deployments := make([]models.KubernetesDeployment, maxKubernetesDeploymentIssues+1)
+	for i := range deployments {
+		deployments[i] = models.KubernetesDeployment{
+			Namespace:         "ns",
+			Name:              "dep-" + string(rune('a'+i)),
+			DesiredReplicas:   2,
+			ReadyReplicas:     0,
+			UpdatedReplicas:   0,
+			AvailableReplicas: 0,
+		}
+	}
+
+	_, issues := summarizeKubernetesDeployments(deployments)
+	if len(issues) != maxKubernetesDeploymentIssues+1 {
+		t.Fatalf("expected truncated issues list, got %d", len(issues))
+	}
+	if !strings.Contains(issues[len(issues)-1], "and 1 more") {
+		t.Fatalf("expected truncation message, got %s", issues[len(issues)-1])
+	}
+}
+
+// ========================================
+// buildKubernetesClusterContext tests
+// ========================================
+
+func TestBuildKubernetesClusterContext(t *testing.T) {
+	now := time.Now().Add(-5 * time.Minute)
+	cluster := models.KubernetesCluster{
+		ID:                "cluster-1",
+		Name:              "prod",
+		Status:            "healthy",
+		Version:           "1.27",
+		Server:            "https://kube.local",
+		Context:           "prod",
+		AgentVersion:      "v1",
+		IntervalSeconds:   60,
+		LastSeen:          now,
+		PendingUninstall:  true,
+		Nodes:             []models.KubernetesNode{{Name: "node-1", Ready: false, Unschedulable: true}},
+		Pods: []models.KubernetesPod{
+			{Name: "pod-1", Namespace: "default", Phase: "Pending"},
+			{Name: "pod-2", Namespace: "default", Phase: "Running", Restarts: 2},
+		},
+		Deployments:       []models.KubernetesDeployment{{Namespace: "default", Name: "dep", DesiredReplicas: 1}},
+	}
+
+	ctx := buildKubernetesClusterContext(cluster)
+	if !strings.Contains(ctx, "Cluster Summary") || !strings.Contains(ctx, "Pending uninstall: true") {
+		t.Fatalf("unexpected context: %s", ctx)
+	}
+	if !strings.Contains(ctx, "Unhealthy Nodes") || !strings.Contains(ctx, "Unhealthy Pods") {
+		t.Fatalf("expected unhealthy sections, got: %s", ctx)
+	}
+	if !strings.Contains(ctx, "Pods With Restarts") {
+		t.Fatalf("expected pod restarts section, got: %s", ctx)
+	}
+	if !strings.Contains(ctx, "Deployments Not Fully Available") {
+		t.Fatalf("expected deployment issues section, got: %s", ctx)
+	}
+}
+
+// ========================================
+// AnalyzeKubernetesCluster tests
+// ========================================
+
+func TestAnalyzeKubernetesCluster_Errors(t *testing.T) {
+	svc := &Service{}
+
+	if _, err := svc.AnalyzeKubernetesCluster(context.Background(), ""); err == nil {
+		t.Fatal("expected error for missing cluster ID")
+	}
+
+	if _, err := svc.AnalyzeKubernetesCluster(context.Background(), "cluster-1"); !errors.Is(err, ErrKubernetesStateUnavailable) {
+		t.Fatalf("expected ErrKubernetesStateUnavailable, got %v", err)
+	}
+
+	svc.SetStateProvider(&mockStateProvider{})
+	if _, err := svc.AnalyzeKubernetesCluster(context.Background(), "cluster-1"); !errors.Is(err, ErrKubernetesClusterNotFound) {
+		t.Fatalf("expected ErrKubernetesClusterNotFound, got %v", err)
+	}
+}
+
+func TestAnalyzeKubernetesCluster_Success(t *testing.T) {
+	tmp := t.TempDir()
+	persistence := config.NewConfigPersistence(tmp)
+	svc := NewService(persistence, nil)
+	svc.cfg = &config.AIConfig{Enabled: true, Model: "anthropic:test-model"}
+	svc.provider = &mockProvider{
+		chatFunc: func(ctx context.Context, req providers.ChatRequest) (*providers.ChatResponse, error) {
+			return &providers.ChatResponse{Content: "ok"}, nil
+		},
+	}
+
+	cluster := models.KubernetesCluster{
+		ID:   "cluster-1",
+		Name: "prod",
+	}
+	svc.SetStateProvider(&mockStateProvider{
+		state: models.StateSnapshot{
+			KubernetesClusters: []models.KubernetesCluster{cluster},
+		},
+	})
+
+	resp, err := svc.AnalyzeKubernetesCluster(context.Background(), "cluster-1")
+	if err != nil {
+		t.Fatalf("AnalyzeKubernetesCluster failed: %v", err)
+	}
+	if resp == nil || resp.Content != "ok" {
+		t.Fatalf("unexpected response: %+v", resp)
+	}
+}
--- a/internal/hostmetrics/collector.go
+++ b/internal/hostmetrics/collector.go
@@ -115,6 +115,11 @@ func collectDisks(ctx context.Context, diskExclude []string) []agentshost.Disk {
 	seen := make(map[string]struct{}, len(partitions))
 	zfsDatasets := make([]zfsDatasetUsage, 0)

+	// Track device+total combinations to deduplicate shared folders (Synology, BTRFS bind mounts).
+	// Key: "device:total_bytes", Value: mountpoint we already recorded.
+	// This prevents counting the same underlying volume multiple times. Related to #953.
+	deviceTotals := make(map[string]string, len(partitions))
+
 	for _, part := range partitions {
 		if part.Mountpoint == "" {
 			continue
@@ -166,6 +171,26 @@ func collectDisks(ctx context.Context, diskExclude []string) []agentshost.Disk {
 			continue
 		}

+		// Deduplicate by device + total bytes (issue #953).
+		// Synology NAS and similar systems create multiple "shared folders" as bind mounts
+		// or BTRFS subvolumes that all report the same device and total capacity.
+		// Only count each unique device+total combination once.
+		deviceKey := fmt.Sprintf("%s:%d", part.Device, usage.Total)
+		if existingMount, exists := deviceTotals[deviceKey]; exists {
+			// Prefer shorter/shallower mountpoints (e.g., /volume1 over /volume1/docker)
+			if len(part.Mountpoint) >= len(existingMount) {
+				continue
+			}
+			// This mountpoint is shallower - remove the old entry and use this one
+			for i := len(disks) - 1; i >= 0; i-- {
+				if disks[i].Mountpoint == existingMount {
+					disks = append(disks[:i], disks[i+1:]...)
+					break
+				}
+			}
+		}
+		deviceTotals[deviceKey] = part.Mountpoint
+
 		disks = append(disks, agentshost.Disk{
 			Device:     part.Device,
 			Mountpoint: part.Mountpoint,
--- a/internal/hostmetrics/collector_test.go
+++ b/internal/hostmetrics/collector_test.go
@@ -1,25 +1,113 @@
 package hostmetrics

 import (
-    "context"
-    "encoding/json"
-    "testing"
+	"context"
+	"encoding/json"
+	"testing"
+
+	godisk "github.com/shirou/gopsutil/v4/disk"
 )

 func TestCollectDiskIO(t *testing.T) {
-    ctx := context.Background()
-    
-    snapshot, err := Collect(ctx, nil)
-    if err != nil {
-        t.Fatalf("Collect failed: %v", err)
-    }
-    
-    t.Logf("DiskIO count: %d", len(snapshot.DiskIO))
-    
-    if len(snapshot.DiskIO) == 0 {
-        t.Error("Expected disk IO data but got none")
-    }
-    
-    data, _ := json.MarshalIndent(snapshot.DiskIO, "", "  ")
-    t.Logf("DiskIO data:\n%s", string(data))
+	ctx := context.Background()
+
+	snapshot, err := Collect(ctx, nil)
+	if err != nil {
+		t.Fatalf("Collect failed: %v", err)
+	}
+
+	t.Logf("DiskIO count: %d", len(snapshot.DiskIO))
+
+	if len(snapshot.DiskIO) == 0 {
+		t.Error("Expected disk IO data but got none")
+	}
+
+	data, _ := json.MarshalIndent(snapshot.DiskIO, "", "  ")
+	t.Logf("DiskIO data:\n%s", string(data))
+}
+
+// TestCollectDisks_DeviceDeduplication verifies that multiple mount points
+// sharing the same device and total bytes are deduplicated (issue #953).
+// This prevents Synology NAS and similar systems from over-counting storage.
+func TestCollectDisks_DeviceDeduplication(t *testing.T) {
+	// Save original function and restore after test
+	origPartitions := diskPartitions
+	origUsage := diskUsage
+	defer func() {
+		diskPartitions = origPartitions
+		diskUsage = origUsage
+	}()
+
+	// Simulate Synology-like scenario: multiple shared folders on same volume
+	diskPartitions = func(ctx context.Context, all bool) ([]godisk.PartitionStat, error) {
+		return []godisk.PartitionStat{
+			{Device: "/dev/vg1000/lv", Mountpoint: "/volume1", Fstype: "btrfs"},
+			{Device: "/dev/vg1000/lv", Mountpoint: "/volume1/docker", Fstype: "btrfs"},
+			{Device: "/dev/vg1000/lv", Mountpoint: "/volume1/photos", Fstype: "btrfs"},
+			{Device: "/dev/vg1000/lv", Mountpoint: "/volume1/music", Fstype: "btrfs"},
+			{Device: "/dev/sdb1", Mountpoint: "/mnt/backup", Fstype: "ext4"}, // Different device
+		}, nil
+	}
+
+	// All volume1 mounts report the same 16TB total
+	const volume1Total = uint64(16 * 1024 * 1024 * 1024 * 1024) // 16 TB
+	const backupTotal = uint64(4 * 1024 * 1024 * 1024 * 1024)   // 4 TB
+
+	diskUsage = func(ctx context.Context, path string) (*godisk.UsageStat, error) {
+		if path == "/mnt/backup" {
+			return &godisk.UsageStat{
+				Total:       backupTotal,
+				Used:        backupTotal / 2,
+				Free:        backupTotal / 2,
+				UsedPercent: 50.0,
+			}, nil
+		}
+		// All volume1 paths report the same totals
+		return &godisk.UsageStat{
+			Total:       volume1Total,
+			Used:        volume1Total / 4,
+			Free:        volume1Total * 3 / 4,
+			UsedPercent: 25.0,
+		}, nil
+	}
+
+	ctx := context.Background()
+	disks := collectDisks(ctx, nil)
+
+	// Should have exactly 2 disks: /volume1 (deduplicated) and /mnt/backup
+	if len(disks) != 2 {
+		t.Errorf("Expected 2 disks after deduplication, got %d", len(disks))
+		for _, d := range disks {
+			t.Logf("  - %s (%s): %d bytes", d.Mountpoint, d.Device, d.TotalBytes)
+		}
+	}
+
+	// Verify the shallowest mountpoint (/volume1) was kept
+	foundVolume1 := false
+	foundBackup := false
+	for _, d := range disks {
+		if d.Mountpoint == "/volume1" {
+			foundVolume1 = true
+		}
+		if d.Mountpoint == "/mnt/backup" {
+			foundBackup = true
+		}
+	}
+
+	if !foundVolume1 {
+		t.Error("Expected /volume1 to be included (shallowest path)")
+	}
+	if !foundBackup {
+		t.Error("Expected /mnt/backup to be included (different device)")
+	}
+
+	// Calculate total - should be 16TB + 4TB = 20TB, not 64TB + 4TB = 68TB
+	var total int64
+	for _, d := range disks {
+		total += d.TotalBytes
+	}
+	expectedTotal := int64(volume1Total + backupTotal)
+	if total != expectedTotal {
+		t.Errorf("Total storage should be %d bytes, got %d bytes", expectedTotal, total)
+	}
 }