From 3a4fc044ea29976a47ce4b64207b5a362ed1111b Mon Sep 17 00:00:00 2001
From: rcourtman
- sudo systemctl restart pulse-backend
+ sudo systemctl restart pulse-hot-dev
diff --git a/internal/monitoring/monitor.go b/internal/monitoring/monitor.go
index 526abb72c..9a0d25b48 100644
--- a/internal/monitoring/monitor.go
+++ b/internal/monitoring/monitor.go
@@ -281,6 +281,8 @@ type Monitor struct {
removedDockerHosts map[string]time.Time // Track deliberately removed Docker hosts (ID -> removal time)
dockerCommands map[string]*dockerHostCommand
dockerCommandIndex map[string]string
+ guestMetadataMu sync.RWMutex
+ guestMetadataCache map[string]guestMetadataCacheEntry
}
type rrdMemCacheEntry struct {
@@ -323,8 +325,17 @@ const (
dockerMaximumHealthWindow = 10 * time.Minute
nodeRRDCacheTTL = 30 * time.Second
nodeRRDRequestTimeout = 2 * time.Second
+ guestMetadataCacheTTL = 5 * time.Minute
)
+type guestMetadataCacheEntry struct {
+ ipAddresses []string
+ networkInterfaces []models.GuestNetworkInterface
+ osName string
+ osVersion string
+ fetchedAt time.Time
+}
+
func (m *Monitor) getNodeRRDMemAvailable(ctx context.Context, client PVEClientInterface, nodeName string) (uint64, error) {
if client == nil || nodeName == "" {
return 0, fmt.Errorf("invalid arguments for RRD lookup")
@@ -813,14 +824,33 @@ func sortContent(content string) string {
return strings.Join(parts, ",")
}
-func fetchGuestAgentMetadata(ctx context.Context, client PVEClientInterface, instanceName, nodeName, vmName string, vmid int, vmStatus *proxmox.VMStatus) ([]string, []models.GuestNetworkInterface, string, string) {
- if vmStatus == nil {
+func (m *Monitor) fetchGuestAgentMetadata(ctx context.Context, client PVEClientInterface, instanceName, nodeName, vmName string, vmid int, vmStatus *proxmox.VMStatus) ([]string, []models.GuestNetworkInterface, string, string) {
+ if vmStatus == nil || client == nil {
+ m.clearGuestMetadataCache(instanceName, nodeName, vmid)
return nil, nil, "", ""
}
- var ipAddresses []string
- var networkIfaces []models.GuestNetworkInterface
- var osName, osVersion string
+ if vmStatus.Agent <= 0 {
+ m.clearGuestMetadataCache(instanceName, nodeName, vmid)
+ return nil, nil, "", ""
+ }
+
+ key := guestMetadataCacheKey(instanceName, nodeName, vmid)
+ now := time.Now()
+
+ m.guestMetadataMu.RLock()
+ cached, ok := m.guestMetadataCache[key]
+ m.guestMetadataMu.RUnlock()
+
+ if ok && now.Sub(cached.fetchedAt) < guestMetadataCacheTTL {
+ return cloneStringSlice(cached.ipAddresses), cloneGuestNetworkInterfaces(cached.networkInterfaces), cached.osName, cached.osVersion
+ }
+
+ // Start with cached values as fallback in case new calls fail
+ ipAddresses := cloneStringSlice(cached.ipAddresses)
+ networkIfaces := cloneGuestNetworkInterfaces(cached.networkInterfaces)
+ osName := cached.osName
+ osVersion := cached.osVersion
ifaceCtx, cancelIface := context.WithTimeout(ctx, 5*time.Second)
interfaces, err := client.GetVMNetworkInterfaces(ifaceCtx, nodeName, vmid)
@@ -834,27 +864,86 @@ func fetchGuestAgentMetadata(ctx context.Context, client PVEClientInterface, ins
Msg("Guest agent network interfaces unavailable")
} else if len(interfaces) > 0 {
ipAddresses, networkIfaces = processGuestNetworkInterfaces(interfaces)
+ } else {
+ ipAddresses = nil
+ networkIfaces = nil
}
- if vmStatus.Agent > 0 {
- osCtx, cancelOS := context.WithTimeout(ctx, 3*time.Second)
- agentInfo, err := client.GetVMAgentInfo(osCtx, nodeName, vmid)
- cancelOS()
- if err != nil {
- log.Debug().
- Str("instance", instanceName).
- Str("vm", vmName).
- Int("vmid", vmid).
- Err(err).
- Msg("Guest agent OS info unavailable")
- } else if len(agentInfo) > 0 {
- osName, osVersion = extractGuestOSInfo(agentInfo)
- }
+ osCtx, cancelOS := context.WithTimeout(ctx, 3*time.Second)
+ agentInfo, err := client.GetVMAgentInfo(osCtx, nodeName, vmid)
+ cancelOS()
+ if err != nil {
+ log.Debug().
+ Str("instance", instanceName).
+ Str("vm", vmName).
+ Int("vmid", vmid).
+ Err(err).
+ Msg("Guest agent OS info unavailable")
+ } else if len(agentInfo) > 0 {
+ osName, osVersion = extractGuestOSInfo(agentInfo)
+ } else {
+ osName = ""
+ osVersion = ""
}
+ entry := guestMetadataCacheEntry{
+ ipAddresses: cloneStringSlice(ipAddresses),
+ networkInterfaces: cloneGuestNetworkInterfaces(networkIfaces),
+ osName: osName,
+ osVersion: osVersion,
+ fetchedAt: time.Now(),
+ }
+
+ m.guestMetadataMu.Lock()
+ if m.guestMetadataCache == nil {
+ m.guestMetadataCache = make(map[string]guestMetadataCacheEntry)
+ }
+ m.guestMetadataCache[key] = entry
+ m.guestMetadataMu.Unlock()
+
return ipAddresses, networkIfaces, osName, osVersion
}
+func guestMetadataCacheKey(instanceName, nodeName string, vmid int) string {
+ return fmt.Sprintf("%s|%s|%d", instanceName, nodeName, vmid)
+}
+
+func (m *Monitor) clearGuestMetadataCache(instanceName, nodeName string, vmid int) {
+ if m == nil {
+ return
+ }
+
+ key := guestMetadataCacheKey(instanceName, nodeName, vmid)
+ m.guestMetadataMu.Lock()
+ if m.guestMetadataCache != nil {
+ delete(m.guestMetadataCache, key)
+ }
+ m.guestMetadataMu.Unlock()
+}
+
+func cloneStringSlice(src []string) []string {
+ if len(src) == 0 {
+ return nil
+ }
+ dst := make([]string, len(src))
+ copy(dst, src)
+ return dst
+}
+
+func cloneGuestNetworkInterfaces(src []models.GuestNetworkInterface) []models.GuestNetworkInterface {
+ if len(src) == 0 {
+ return nil
+ }
+ dst := make([]models.GuestNetworkInterface, len(src))
+ for i, iface := range src {
+ dst[i] = iface
+ if len(iface.Addresses) > 0 {
+ dst[i].Addresses = cloneStringSlice(iface.Addresses)
+ }
+ }
+ return dst
+}
+
func processGuestNetworkInterfaces(raw []proxmox.VMNetworkInterface) ([]string, []models.GuestNetworkInterface) {
ipSet := make(map[string]struct{})
ipAddresses := make([]string, 0)
@@ -1194,9 +1283,10 @@ func New(cfg *config.Config) (*Monitor, error) {
nodeSnapshots: make(map[string]NodeMemorySnapshot),
guestSnapshots: make(map[string]GuestMemorySnapshot),
nodeRRDMemCache: make(map[string]rrdMemCacheEntry),
- removedDockerHosts: make(map[string]time.Time),
- dockerCommands: make(map[string]*dockerHostCommand),
- dockerCommandIndex: make(map[string]string),
+ removedDockerHosts: make(map[string]time.Time),
+ dockerCommands: make(map[string]*dockerHostCommand),
+ dockerCommandIndex: make(map[string]string),
+ guestMetadataCache: make(map[string]guestMetadataCacheEntry),
}
// Load saved configurations
@@ -3108,7 +3198,7 @@ func (m *Monitor) pollVMsAndContainersEfficient(ctx context.Context, instanceNam
}
// Gather guest metadata from the agent when available
- guestIPs, guestIfaces, guestOSName, guestOSVersion := fetchGuestAgentMetadata(ctx, client, instanceName, res.Node, res.Name, res.VMID, detailedStatus)
+ guestIPs, guestIfaces, guestOSName, guestOSVersion := m.fetchGuestAgentMetadata(ctx, client, instanceName, res.Node, res.Name, res.VMID, detailedStatus)
if len(guestIPs) > 0 {
ipAddresses = guestIPs
}
@@ -3796,7 +3886,7 @@ func (m *Monitor) pollVMsWithNodes(ctx context.Context, instanceName string, cli
memUsed = memTotal
}
- guestIPs, guestIfaces, guestOSName, guestOSVersion := fetchGuestAgentMetadata(ctx, client, instanceName, node.Node, vm.Name, vm.VMID, status)
+ guestIPs, guestIfaces, guestOSName, guestOSVersion := m.fetchGuestAgentMetadata(ctx, client, instanceName, node.Node, vm.Name, vm.VMID, status)
if len(guestIPs) > 0 {
ipAddresses = guestIPs
}
diff --git a/internal/monitoring/monitor_optimized.go b/internal/monitoring/monitor_optimized.go
index 42d922134..bd6c2aa2b 100644
--- a/internal/monitoring/monitor_optimized.go
+++ b/internal/monitoring/monitor_optimized.go
@@ -266,7 +266,7 @@ func (m *Monitor) pollVMsWithNodesOptimized(ctx context.Context, instanceName st
}
if vm.Status == "running" && vmStatus != nil {
- guestIPs, guestIfaces, guestOSName, guestOSVersion := fetchGuestAgentMetadata(ctx, client, instanceName, n.Node, vm.Name, vm.VMID, vmStatus)
+ guestIPs, guestIfaces, guestOSName, guestOSVersion := m.fetchGuestAgentMetadata(ctx, client, instanceName, n.Node, vm.Name, vm.VMID, vmStatus)
if len(guestIPs) > 0 {
ipAddresses = guestIPs
}
diff --git a/scripts/clean-mock-alerts.sh b/scripts/clean-mock-alerts.sh
index 212e372b3..68188b99b 100755
--- a/scripts/clean-mock-alerts.sh
+++ b/scripts/clean-mock-alerts.sh
@@ -35,6 +35,8 @@ echo -e "${GREEN}✓ Backup created: $BACKUP_FILE${NC}"
# Stop backend to prevent writes during cleanup
echo "Stopping backend..."
pkill -x pulse 2>/dev/null || true
+sudo systemctl stop pulse-hot-dev 2>/dev/null || true
+sudo systemctl stop pulse 2>/dev/null || true
sudo systemctl stop pulse-backend 2>/dev/null || true
sleep 2
@@ -58,4 +60,5 @@ echo -e "${GREEN}✓ Mock alerts removed successfully${NC}"
echo ""
echo "To restart the backend, run:"
echo " ./scripts/hot-dev.sh (for development)"
-echo " sudo systemctl start pulse-backend (for production)"
+echo " sudo systemctl start pulse (systemd)"
+echo " sudo systemctl start pulse-backend (legacy)"
diff --git a/scripts/dev-orchestrator.sh b/scripts/dev-orchestrator.sh
index 7c542c752..5fbe4080e 100755
--- a/scripts/dev-orchestrator.sh
+++ b/scripts/dev-orchestrator.sh
@@ -18,21 +18,56 @@ NC='\033[0m'
# STATE DETECTION
#########################################
+detect_backend_service() {
+ local services=("pulse-hot-dev" "pulse" "pulse-backend")
+ for svc in "${services[@]}"; do
+ if systemctl list-unit-files --no-legend 2>/dev/null | grep -q "^${svc}\\.service"; then
+ echo "$svc"
+ return 0
+ fi
+ done
+ echo ""
+}
+
+detect_running_backend_service() {
+ local services=("pulse-hot-dev" "pulse" "pulse-backend")
+ for svc in "${services[@]}"; do
+ if systemctl is-active --quiet "$svc" 2>/dev/null; then
+ echo "$svc"
+ return 0
+ fi
+ done
+ echo ""
+}
+
detect_backend_state() {
local state="{}"
+ local running_service=$(detect_running_backend_service)
- # Check if pulse-backend service is running
- if systemctl is-active --quiet pulse-backend 2>/dev/null; then
- state=$(echo "$state" | jq '. + {backend_running: true, backend_type: "systemd"}')
+ if [[ -n "$running_service" ]]; then
+ local backend_type="systemd"
+ if [[ "$running_service" == "pulse-hot-dev" ]]; then
+ backend_type="hot-dev"
+ fi
+
+ state=$(echo "$state" | jq ". + {backend_running: true, backend_type: \"$backend_type\", backend_service: \"$running_service\"}")
# Check mock mode from logs (multiple possible indicators, look at last 2 minutes for reliability)
- if sudo journalctl -u pulse-backend --since "2 minutes ago" | grep -qE "(Mock mode enabled|mockEnabled=true|mock mode trackedNodes)"; then
+ if sudo journalctl -u "$running_service" --since "2 minutes ago" | grep -qE "(Mock mode enabled|mockEnabled=true|mock mode trackedNodes)"; then
state=$(echo "$state" | jq '. + {mock_mode: true}')
else
state=$(echo "$state" | jq '. + {mock_mode: false}')
fi
else
state=$(echo "$state" | jq '. + {backend_running: false}')
+ local configured_service=$(detect_backend_service)
+ if [[ -n "$configured_service" ]]; then
+ local backend_type="systemd"
+ if [[ "$configured_service" == "pulse-hot-dev" ]]; then
+ backend_type="hot-dev"
+ fi
+ state=$(echo "$state" | jq ". + {backend_service: \"$configured_service\", backend_type: \"$backend_type\"}")
+ fi
fi
# Check what's configured in mock.env.local
@@ -82,6 +117,11 @@ get_full_state() {
switch_to_mock() {
echo -e "${YELLOW}Switching to mock mode...${NC}"
+ local service=$(detect_backend_service)
+ if [[ -z "$service" ]]; then
+ echo -e "${RED}✗ No Pulse systemd service detected${NC}"
+ return 1
+ fi
# Update mock.env.local (preferred) or mock.env
if [ -f "$ROOT_DIR/mock.env.local" ]; then
@@ -93,14 +133,14 @@ switch_to_mock() {
fi
# Restart backend
- sudo systemctl restart pulse-backend
+ sudo systemctl restart "$service"
echo -e "${GREEN}✓ Backend restarted${NC}"
# Wait for backend to be ready
sleep 3
# Verify
- if sudo journalctl -u pulse-backend --since "5 seconds ago" | grep -qE "(Mock mode enabled|mockEnabled=true|mock mode trackedNodes)"; then
+ if sudo journalctl -u "$service" --since "5 seconds ago" | grep -qE "(Mock mode enabled|mockEnabled=true|mock mode trackedNodes)"; then
echo -e "${GREEN}✓ Mock mode ACTIVE${NC}"
return 0
else
@@ -111,6 +151,11 @@ switch_to_mock() {
switch_to_production() {
echo -e "${YELLOW}Switching to production mode...${NC}"
+ local service=$(detect_backend_service)
+ if [[ -z "$service" ]]; then
+ echo -e "${RED}✗ No Pulse systemd service detected${NC}"
+ return 1
+ fi
# Sync production config first
if [ -f "$ROOT_DIR/scripts/sync-production-config.sh" ]; then
@@ -128,7 +173,7 @@ switch_to_production() {
fi
# Restart backend
- sudo systemctl restart pulse-backend
+ sudo systemctl restart "$service"
echo -e "${GREEN}✓ Backend restarted${NC}"
# Wait for backend to be ready
@@ -211,7 +256,12 @@ cmd_prod() {
cmd_restart() {
echo -e "${YELLOW}Restarting backend...${NC}"
- sudo systemctl restart pulse-backend
+ local service=$(detect_backend_service)
+ if [[ -z "$service" ]]; then
+ echo -e "${RED}✗ No Pulse systemd service detected${NC}"
+ return 1
+ fi
+ sudo systemctl restart "$service"
sleep 2
echo -e "${GREEN}✓ Backend restarted${NC}"
}
diff --git a/scripts/hot-dev.sh b/scripts/hot-dev.sh
index 012182432..74a8f912c 100755
--- a/scripts/hot-dev.sh
+++ b/scripts/hot-dev.sh
@@ -75,6 +75,7 @@ kill_port() {
printf "[hot-dev] Cleaning up existing processes...\n"
+sudo systemctl stop pulse-hot-dev 2>/dev/null || true
sudo systemctl stop pulse-backend 2>/dev/null || true
sudo systemctl stop pulse 2>/dev/null || true
sudo systemctl stop pulse-frontend 2>/dev/null || true
@@ -196,7 +197,8 @@ cleanup() {
pkill -f vite 2>/dev/null || true
pkill -f "npm run dev" 2>/dev/null || true
pkill -9 -x "pulse" 2>/dev/null || true
- echo "Hot-dev stopped. To restart normal service, run: sudo systemctl start pulse-backend"
+ echo "Hot-dev stopped. To restart normal service, run: sudo systemctl start pulse"
+ echo "(Legacy installs may use: sudo systemctl start pulse-backend)"
}
trap cleanup INT TERM EXIT