From dd9bd65a2ed81ab691cfa4eaaa8533c113e29b7a Mon Sep 17 00:00:00 2001 From: rcourtman Date: Mon, 13 Oct 2025 09:57:14 +0000 Subject: [PATCH] fix: Add hasCPU/hasNVMe flags to prevent false 'no CPU sensor' errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses #101 v4.23.0 introduced a regression where systems with only NVMe temperatures (no CPU sensor) would display "No CPU sensor" in the UI. This was caused by the Available flag being set to true when NVMe temps existed, even without CPU data, triggering the error message in the frontend. Backend changes: - Add HasCPU and HasNVMe boolean fields to Temperature model - Extend CPU sensor detection to support more chip types: zenpower, k8temp, acpitz, it87 (case-insensitive matching) - HasCPU is set based on CPU chip detection (coretemp, k10temp, etc.), not value thresholds - This prevents false negatives when sensors report 0°C during resets - CPU temperature values now accepted even when 0 (checked with !IsNaN instead of > 0) - extractTempInput returns NaN instead of 0 when no data found - Available flag means "any temperature data exists" for backward compatibility - Update mock generator to properly set the new flags - Add unit tests for NVMe-only and 0°C scenarios to prevent regression - Removed amd_energy from CPU chip list (power sensor, not temperature) Frontend changes: - Add hasCPU and hasNVMe optional fields to Temperature interface - Update NodeSummaryTable to check hasCPU flag with fallback to available for backward compatibility with older API responses - Update NodeCard temperature display logic with same fallback pattern - Systems with only NVMe temps now show "-" instead of error message - Fallback ensures UI works with both old and new API responses Testing: - All unit tests pass including NVMe-only and 0°C test cases - Fix prevents false "no CPU sensor" errors when sensors temporarily report 0°C - Fix eliminates false "no CPU sensor" errors for NVMe-only systems --- .../src/components/Dashboard/NodeCard.tsx | 19 ++-- .../components/shared/NodeSummaryTable.tsx | 10 +- frontend-modern/src/types/api.ts | 4 +- internal/mock/generator.go | 4 + internal/models/models.go | 4 +- internal/monitoring/temperature.go | 34 ++++-- internal/monitoring/temperature_test.go | 106 +++++++++++++++++- 7 files changed, 146 insertions(+), 35 deletions(-) diff --git a/frontend-modern/src/components/Dashboard/NodeCard.tsx b/frontend-modern/src/components/Dashboard/NodeCard.tsx index 61ad8a87d..42ff2c0fc 100644 --- a/frontend-modern/src/components/Dashboard/NodeCard.tsx +++ b/frontend-modern/src/components/Dashboard/NodeCard.tsx @@ -125,7 +125,8 @@ const NodeCard: Component = (props) => { return value !== null ? Math.round(value) : null; }); const temperatureTooltip = createMemo(() => { - if (!props.node.temperature?.available) { + const hasCPU = props.node.temperature?.hasCPU ?? props.node.temperature?.available; + if (!hasCPU) { return ''; } const value = cpuTemperatureValue(); @@ -236,18 +237,12 @@ const NodeCard: Component = (props) => { ↑{formatUptime(props.node.uptime)} - 🌡-- - - ) : ( - ⚡{normalizedLoad()} - ) + ⚡{normalizedLoad()} } > = (props) => { when={ online && isPVE && - node!.temperature?.available && - cpuTemperatureValue !== null + cpuTemperatureValue !== null && + (node!.temperature?.hasCPU ?? node!.temperature?.available) } fallback={ - - {online && isPVE && node!.temperature?.available - ? 'No CPU sensor' - : '-'} - + - } > {(() => { diff --git a/frontend-modern/src/types/api.ts b/frontend-modern/src/types/api.ts index 0818118d0..bc31954af 100644 --- a/frontend-modern/src/types/api.ts +++ b/frontend-modern/src/types/api.ts @@ -491,7 +491,9 @@ export interface Temperature { cpuMax?: number; // Highest core temperature cores?: CoreTemp[]; // Individual core temperatures nvme?: NVMeTemp[]; // NVMe drive temperatures - available: boolean; // Whether temperature data is available + available: boolean; // Whether any temperature data is available + hasCPU?: boolean; // Whether CPU temperature data is available + hasNVMe?: boolean; // Whether NVMe temperature data is available lastUpdate: string; // When this data was collected } diff --git a/internal/mock/generator.go b/internal/mock/generator.go index 6a87c0c72..724dc29a7 100644 --- a/internal/mock/generator.go +++ b/internal/mock/generator.go @@ -491,6 +491,8 @@ func generateNodeTemperature(cores int) *models.Temperature { if rand.Float64() < 0.3 { return &models.Temperature{ Available: false, + HasCPU: false, + HasNVMe: false, } } @@ -537,6 +539,8 @@ func generateNodeTemperature(cores int) *models.Temperature { Cores: coreTemps, NVMe: nvmeTemps, Available: true, + HasCPU: true, + HasNVMe: len(nvmeTemps) > 0, LastUpdate: time.Now(), } } diff --git a/internal/models/models.go b/internal/models/models.go index 22fe28b10..4684e6902 100644 --- a/internal/models/models.go +++ b/internal/models/models.go @@ -549,7 +549,9 @@ type Temperature struct { CPUMax float64 `json:"cpuMax,omitempty"` // Highest core temperature Cores []CoreTemp `json:"cores,omitempty"` // Individual core temperatures NVMe []NVMeTemp `json:"nvme,omitempty"` // NVMe drive temperatures - Available bool `json:"available"` // Whether temperature data is available + Available bool `json:"available"` // Whether any temperature data is available + HasCPU bool `json:"hasCPU"` // Whether CPU temperature data is available + HasNVMe bool `json:"hasNVMe"` // Whether NVMe temperature data is available LastUpdate time.Time `json:"lastUpdate"` // When this data was collected } diff --git a/internal/monitoring/temperature.go b/internal/monitoring/temperature.go index 6a6616d98..5c3b8177c 100644 --- a/internal/monitoring/temperature.go +++ b/internal/monitoring/temperature.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "math" "os/exec" "strconv" "strings" @@ -153,6 +154,8 @@ func (tc *TemperatureCollector) parseSensorsJSON(jsonStr string) (*models.Temper NVMe: []models.NVMeTemp{}, } + foundCPUChip := false + // Parse each sensor chip for chipName, chipData := range sensorsData { chipMap, ok := chipData.(map[string]interface{}) @@ -160,8 +163,15 @@ func (tc *TemperatureCollector) parseSensorsJSON(jsonStr string) (*models.Temper continue } - // Handle CPU temperature sensors (coretemp, k10temp, etc.) - if strings.Contains(chipName, "coretemp") || strings.Contains(chipName, "k10temp") { + // Handle CPU temperature sensors + chipLower := strings.ToLower(chipName) + if strings.Contains(chipLower, "coretemp") || + strings.Contains(chipLower, "k10temp") || + strings.Contains(chipLower, "zenpower") || + strings.Contains(chipLower, "k8temp") || + strings.Contains(chipLower, "acpitz") || + strings.Contains(chipLower, "it87") { + foundCPUChip = true tc.parseCPUTemps(chipMap, temp) } @@ -180,11 +190,13 @@ func (tc *TemperatureCollector) parseSensorsJSON(jsonStr string) (*models.Temper } } - if temp.CPUPackage > 0 || temp.CPUMax > 0 || len(temp.Cores) > 0 || len(temp.NVMe) > 0 { - temp.Available = true - } else { - temp.Available = false - } + // Set individual sensor type flags based on chip presence, not value thresholds + // This prevents false negatives when sensors report 0°C during resets or temporarily + temp.HasCPU = foundCPUChip + temp.HasNVMe = len(temp.NVMe) > 0 + + // Available means any temperature data exists (backward compatibility) + temp.Available = temp.HasCPU || temp.HasNVMe return temp, nil } @@ -199,7 +211,7 @@ func (tc *TemperatureCollector) parseCPUTemps(chipMap map[string]interface{}, te // Look for Package id (Intel) or Tdie (AMD) if strings.Contains(sensorName, "Package id") || strings.Contains(sensorName, "Tdie") { - if tempVal := extractTempInput(sensorMap); tempVal > 0 { + if tempVal := extractTempInput(sensorMap); !math.IsNaN(tempVal) { temp.CPUPackage = tempVal } } @@ -207,7 +219,7 @@ func (tc *TemperatureCollector) parseCPUTemps(chipMap map[string]interface{}, te // Look for individual cores if strings.HasPrefix(sensorName, "Core ") { coreNum := extractCoreNumber(sensorName) - if tempVal := extractTempInput(sensorMap); tempVal > 0 { + if tempVal := extractTempInput(sensorMap); !math.IsNaN(tempVal) { temp.Cores = append(temp.Cores, models.CoreTemp{ Core: coreNum, Temp: tempVal, @@ -233,7 +245,7 @@ func (tc *TemperatureCollector) parseNVMeTemps(chipName string, chipMap map[stri // Look for Composite temperature (main NVMe temp) if strings.Contains(sensorName, "Composite") || strings.Contains(sensorName, "Sensor 1") { - if tempVal := extractTempInput(sensorMap); tempVal > 0 { + if tempVal := extractTempInput(sensorMap); !math.IsNaN(tempVal) && tempVal > 0 { temp.NVMe = append(temp.NVMe, models.NVMeTemp{ Device: device, Temp: tempVal, @@ -261,7 +273,7 @@ func extractTempInput(sensorMap map[string]interface{}) float64 { } } } - return 0 + return math.NaN() } // extractCoreNumber extracts the core number from a sensor name like "Core 0" diff --git a/internal/monitoring/temperature_test.go b/internal/monitoring/temperature_test.go index 49cde0c86..1aa9070c2 100644 --- a/internal/monitoring/temperature_test.go +++ b/internal/monitoring/temperature_test.go @@ -5,9 +5,10 @@ import "testing" func TestParseSensorsJSON_NoTemperatureData(t *testing.T) { collector := &TemperatureCollector{} + // Test with a chip that doesn't match any known CPU or NVMe patterns jsonStr := `{ - "acpitz-acpi-0": { - "Adapter": "ACPI interface", + "unknown-sensor-0": { + "Adapter": "Unknown interface", "temp1": { "temp1_label": "temp1" } @@ -22,7 +23,13 @@ func TestParseSensorsJSON_NoTemperatureData(t *testing.T) { t.Fatalf("expected temperature struct, got nil") } if temp.Available { - t.Fatalf("expected temperature to be unavailable when no readings are present") + t.Fatalf("expected temperature to be unavailable when no CPU or NVMe chips are detected") + } + if temp.HasCPU { + t.Fatalf("expected HasCPU to be false when no CPU chip detected") + } + if temp.HasNVMe { + t.Fatalf("expected HasNVMe to be false when no NVMe chip detected") } } @@ -65,4 +72,97 @@ func TestParseSensorsJSON_WithCpuAndNvmeData(t *testing.T) { if temp.NVMe[0].Temp != 38.75 { t.Fatalf("expected NVMe temperature 38.75, got %.2f", temp.NVMe[0].Temp) } + if !temp.HasCPU { + t.Fatalf("expected HasCPU to be true when CPU data present") + } + if !temp.HasNVMe { + t.Fatalf("expected HasNVMe to be true when NVMe data present") + } +} + +// TestParseSensorsJSON_NVMeOnly tests that NVMe-only systems don't show "No CPU sensor" +func TestParseSensorsJSON_NVMeOnly(t *testing.T) { + collector := &TemperatureCollector{} + + jsonStr := `{ + "nvme-pci-0400": { + "Composite": {"temp1_input": 42.5} + }, + "nvme-pci-0500": { + "Composite": {"temp1_input": 38.0} + } + }` + + temp, err := collector.parseSensorsJSON(jsonStr) + if err != nil { + t.Fatalf("unexpected error parsing sensors output: %v", err) + } + if temp == nil { + t.Fatalf("expected temperature struct, got nil") + } + // available should be true (any temperature data exists) + if !temp.Available { + t.Fatalf("expected temperature to be available when NVMe readings are present") + } + // hasCPU should be false (no CPU temperature data) + if temp.HasCPU { + t.Fatalf("expected HasCPU to be false when only NVMe data present") + } + // hasNVMe should be true + if !temp.HasNVMe { + t.Fatalf("expected HasNVMe to be true when NVMe data present") + } + // Verify NVMe data was parsed correctly + if len(temp.NVMe) != 2 { + t.Fatalf("expected two NVMe temperatures, got %d", len(temp.NVMe)) + } + // Check that both expected temperatures are present (order may vary) + foundTemps := make(map[float64]bool) + for _, nvme := range temp.NVMe { + foundTemps[nvme.Temp] = true + } + if !foundTemps[42.5] { + t.Fatalf("expected to find NVMe temperature 42.5") + } + if !foundTemps[38.0] { + t.Fatalf("expected to find NVMe temperature 38.0") + } +} + +// TestParseSensorsJSON_ZeroTemperature tests that HasCPU is true even when sensor reports 0°C +func TestParseSensorsJSON_ZeroTemperature(t *testing.T) { + collector := &TemperatureCollector{} + + jsonStr := `{ + "coretemp-isa-0000": { + "Package id 0": {"temp1_input": 0.0}, + "Core 0": {"temp2_input": 0.0} + } + }` + + temp, err := collector.parseSensorsJSON(jsonStr) + if err != nil { + t.Fatalf("unexpected error parsing sensors output: %v", err) + } + if temp == nil { + t.Fatalf("expected temperature struct, got nil") + } + // hasCPU should be true because coretemp chip was detected, even though values are 0 + if !temp.HasCPU { + t.Fatalf("expected HasCPU to be true when CPU chip is detected (even with 0°C readings)") + } + // available should be true because we have a CPU sensor + if !temp.Available { + t.Fatalf("expected temperature to be available when CPU chip is detected") + } + // Values should be accepted (not filtered out) + if temp.CPUPackage != 0.0 { + t.Fatalf("expected CPUPackage to be 0.0, got %.2f", temp.CPUPackage) + } + if len(temp.Cores) != 1 { + t.Fatalf("expected one core temperature, got %d", len(temp.Cores)) + } + if temp.Cores[0].Temp != 0.0 { + t.Fatalf("expected core temperature to be 0.0, got %.2f", temp.Cores[0].Temp) + } }