From 19a67dd4f3efec85c81e422eb047edbd5f9e6c1e Mon Sep 17 00:00:00 2001
From: rcourtman <courtmanr@gmail.com>
Date: Wed, 28 Jan 2026 16:52:35 +0000
Subject: [PATCH] Update core infrastructure components

Config:
- AI configuration improvements
- API tokens handling
- Persistence layer updates

Host Agent:
- Command execution improvements
- Better test coverage

Infrastructure Discovery:
- Service improvements
- Enhanced test coverage

Models:
- State snapshot updates
- Model improvements

Monitoring:
- Polling improvements
- Guest config handling
- Storage config support

WebSocket:
- Hub tenant test updates

Service Discovery:
- New service discovery module
---
 internal/config/ai.go                         |   34 +-
 internal/config/ai_additional_test.go         |   31 +-
 internal/config/api_tokens.go                 |   16 +
 internal/config/persistence.go                |   19 +-
 internal/hostagent/commands.go                |   44 +-
 internal/hostagent/commands_execute_test.go   |   10 +-
 internal/hostagent/commands_test.go           |  108 +
 internal/infradiscovery/service.go            |   26 +-
 internal/infradiscovery/service_test.go       |    8 +-
 internal/models/models.go                     |    1 +
 internal/models/state_snapshot.go             |  216 ++
 internal/monitoring/guest_config.go           |   89 +
 internal/monitoring/monitor_polling.go        |   42 +
 internal/monitoring/storage_config.go         |   89 +
 internal/servicediscovery/commands.go         |  526 +++++
 internal/servicediscovery/commands_test.go    |   81 +
 internal/servicediscovery/deep_scanner.go     |  475 +++++
 .../servicediscovery/deep_scanner_test.go     |  395 ++++
 internal/servicediscovery/fingerprint.go      |  249 +++
 internal/servicediscovery/formatters.go       |  629 ++++++
 internal/servicediscovery/formatters_test.go  |  218 ++
 internal/servicediscovery/service.go          | 1753 +++++++++++++++++
 internal/servicediscovery/service_test.go     |  797 ++++++++
 internal/servicediscovery/store.go            |  651 ++++++
 internal/servicediscovery/store_test.go       |  469 +++++
 internal/servicediscovery/tools_adapter.go    |  226 +++
 internal/servicediscovery/types.go            |  298 +++
 internal/servicediscovery/types_test.go       |   22 +
 internal/websocket/hub_tenant_test.go         |    4 +-
 29 files changed, 7459 insertions(+), 67 deletions(-)
 create mode 100644 internal/monitoring/guest_config.go
 create mode 100644 internal/monitoring/storage_config.go
 create mode 100644 internal/servicediscovery/commands.go
 create mode 100644 internal/servicediscovery/commands_test.go
 create mode 100644 internal/servicediscovery/deep_scanner.go
 create mode 100644 internal/servicediscovery/deep_scanner_test.go
 create mode 100644 internal/servicediscovery/fingerprint.go
 create mode 100644 internal/servicediscovery/formatters.go
 create mode 100644 internal/servicediscovery/formatters_test.go
 create mode 100644 internal/servicediscovery/service.go
 create mode 100644 internal/servicediscovery/service_test.go
 create mode 100644 internal/servicediscovery/store.go
 create mode 100644 internal/servicediscovery/store_test.go
 create mode 100644 internal/servicediscovery/tools_adapter.go
 create mode 100644 internal/servicediscovery/types.go
 create mode 100644 internal/servicediscovery/types_test.go

diff --git a/internal/config/ai.go b/internal/config/ai.go
index 6cc3f6d71..931c49d94 100644
--- a/internal/config/ai.go
+++ b/internal/config/ai.go
@@ -69,13 +69,13 @@ type AIConfig struct {
 	ProtectedGuests []string `json:"protected_guests,omitempty"` // VMIDs or names that AI cannot control
 
 	// Patrol Autonomy settings - controls automatic investigation and remediation of findings
-	PatrolAutonomyLevel           string `json:"patrol_autonomy_level,omitempty"`            // "monitor", "approval", "full"
+	PatrolAutonomyLevel           string `json:"patrol_autonomy_level,omitempty"`            // "monitor", "approval", "assisted", "full"
+	PatrolFullModeUnlocked        bool   `json:"patrol_full_mode_unlocked"`                  // User has acknowledged Full mode risks (required to use "full")
 	PatrolInvestigationBudget     int    `json:"patrol_investigation_budget,omitempty"`      // Max turns per investigation (default: 15)
 	PatrolInvestigationTimeoutSec int    `json:"patrol_investigation_timeout_sec,omitempty"` // Max seconds per investigation (default: 300)
-	PatrolCriticalRequireApproval bool   `json:"patrol_critical_require_approval"`           // Critical findings always require approval (default: true)
 
-	// AI Discovery settings - controls automatic infrastructure discovery
-	DiscoveryEnabled       bool `json:"discovery_enabled"`                  // Enable AI-powered infrastructure discovery
+	// Discovery settings - controls automatic infrastructure discovery
+	DiscoveryEnabled       bool `json:"discovery_enabled"`                  // Enable infrastructure discovery
 	DiscoveryIntervalHours int  `json:"discovery_interval_hours,omitempty"` // Hours between automatic re-scans (0 = manual only, default: 0)
 }
 
@@ -102,13 +102,12 @@ const (
 const (
 	// PatrolAutonomyMonitor - Detect issues and create findings, no automatic investigation
 	PatrolAutonomyMonitor = "monitor"
-	// PatrolAutonomyApproval - Spawn Chat sessions to investigate, queue fixes for user approval
+	// PatrolAutonomyApproval - Spawn Chat sessions to investigate, queue ALL fixes for user approval
 	PatrolAutonomyApproval = "approval"
-	// PatrolAutonomyFull - Spawn Chat sessions to investigate, execute non-critical fixes automatically
+	// PatrolAutonomyAssisted - Auto-fix warnings, critical findings still need approval
+	PatrolAutonomyAssisted = "assisted"
+	// PatrolAutonomyFull - Full autonomy, auto-fix everything including critical (user accepts risk)
 	PatrolAutonomyFull = "full"
-	// PatrolAutonomyAutonomous - Full autonomy, execute ALL fixes including destructive ones without approval
-	// User accepts full risk - similar to "auto-accept" mode in Claude Code
-	PatrolAutonomyAutonomous = "autonomous"
 )
 
 // Default patrol investigation settings
@@ -577,8 +576,11 @@ func (c *AIConfig) GetPatrolAutonomyLevel() string {
 		return PatrolAutonomyMonitor
 	}
 	switch c.PatrolAutonomyLevel {
-	case PatrolAutonomyMonitor, PatrolAutonomyApproval, PatrolAutonomyFull, PatrolAutonomyAutonomous:
+	case PatrolAutonomyMonitor, PatrolAutonomyApproval, PatrolAutonomyAssisted, PatrolAutonomyFull:
 		return c.PatrolAutonomyLevel
+	// Migration: treat old "autonomous" as new "full"
+	case "autonomous":
+		return PatrolAutonomyFull
 	default:
 		return PatrolAutonomyMonitor
 	}
@@ -614,20 +616,10 @@ func (c *AIConfig) GetPatrolInvestigationTimeout() time.Duration {
 	return time.Duration(c.PatrolInvestigationTimeoutSec) * time.Second
 }
 
-// ShouldCriticalRequireApproval returns whether critical findings should always require approval
-// Defaults to true for safety
-func (c *AIConfig) ShouldCriticalRequireApproval() bool {
-	// This is a safety feature, default to true
-	// The JSON field uses the default Go behavior (false when not set),
-	// so we explicitly check if it was intended to be false
-	// For backwards compatibility, treat unset as true
-	return c.PatrolCriticalRequireApproval || c.PatrolAutonomyLevel == ""
-}
-
 // IsValidPatrolAutonomyLevel checks if a patrol autonomy level string is valid
 func IsValidPatrolAutonomyLevel(level string) bool {
 	switch level {
-	case PatrolAutonomyMonitor, PatrolAutonomyApproval, PatrolAutonomyFull, PatrolAutonomyAutonomous:
+	case PatrolAutonomyMonitor, PatrolAutonomyApproval, PatrolAutonomyAssisted, PatrolAutonomyFull:
 		return true
 	default:
 		return false
diff --git a/internal/config/ai_additional_test.go b/internal/config/ai_additional_test.go
index 80d24d985..54a705592 100644
--- a/internal/config/ai_additional_test.go
+++ b/internal/config/ai_additional_test.go
@@ -52,14 +52,26 @@ func TestAIConfig_PatrolSettings(t *testing.T) {
 		t.Fatalf("patrol autonomy should be disabled by default")
 	}
 
+	// Test all valid levels
+	cfg.PatrolAutonomyLevel = PatrolAutonomyAssisted
+	if got := cfg.GetPatrolAutonomyLevel(); got != PatrolAutonomyAssisted {
+		t.Fatalf("patrol autonomy = %q, want assisted", got)
+	}
+
 	cfg.PatrolAutonomyLevel = PatrolAutonomyFull
 	if got := cfg.GetPatrolAutonomyLevel(); got != PatrolAutonomyFull {
-		t.Fatalf("patrol autonomy = %q", got)
+		t.Fatalf("patrol autonomy = %q, want full", got)
 	}
 	if !cfg.IsPatrolAutonomyEnabled() {
 		t.Fatalf("patrol autonomy should be enabled for full mode")
 	}
 
+	// Test migration: old "autonomous" maps to new "full"
+	cfg.PatrolAutonomyLevel = "autonomous"
+	if got := cfg.GetPatrolAutonomyLevel(); got != PatrolAutonomyFull {
+		t.Fatalf("patrol autonomy = %q, want full (migrated from autonomous)", got)
+	}
+
 	cfg.PatrolAutonomyLevel = "invalid"
 	if got := cfg.GetPatrolAutonomyLevel(); got != PatrolAutonomyMonitor {
 		t.Fatalf("invalid autonomy should fallback to monitor, got %q", got)
@@ -94,17 +106,6 @@ func TestAIConfig_PatrolSettings(t *testing.T) {
 	if got := cfg.GetPatrolInvestigationTimeout(); got.Seconds() != 120 {
 		t.Fatalf("timeout should be 120s, got %s", got)
 	}
-
-	cfg.PatrolAutonomyLevel = ""
-	cfg.PatrolCriticalRequireApproval = false
-	if !cfg.ShouldCriticalRequireApproval() {
-		t.Fatalf("critical approval should default to true when level unset")
-	}
-
-	cfg.PatrolAutonomyLevel = PatrolAutonomyMonitor
-	if cfg.ShouldCriticalRequireApproval() {
-		t.Fatalf("critical approval should be false when explicitly disabled")
-	}
 }
 
 func TestAIConfig_ProtectedGuestsAndValidation(t *testing.T) {
@@ -131,4 +132,10 @@ func TestAIConfig_ProtectedGuestsAndValidation(t *testing.T) {
 	if !IsValidPatrolAutonomyLevel(PatrolAutonomyApproval) {
 		t.Fatalf("expected patrol approval to be valid")
 	}
+	if !IsValidPatrolAutonomyLevel(PatrolAutonomyAssisted) {
+		t.Fatalf("expected patrol assisted to be valid")
+	}
+	if !IsValidPatrolAutonomyLevel(PatrolAutonomyFull) {
+		t.Fatalf("expected patrol full to be valid")
+	}
 }
diff --git a/internal/config/api_tokens.go b/internal/config/api_tokens.go
index 6a5cb84d8..4550f0ebd 100644
--- a/internal/config/api_tokens.go
+++ b/internal/config/api_tokens.go
@@ -297,6 +297,22 @@ func (c *Config) ValidateAPIToken(rawToken string) (*APITokenRecord, bool) {
 	return nil, false
 }
 
+// IsValidAPIToken checks if a token is valid without mutating any metadata.
+// Use this for read-only checks like admin verification where you don't need
+// to update LastUsedAt or get the full record. Safe to call under RLock.
+func (c *Config) IsValidAPIToken(rawToken string) bool {
+	if rawToken == "" {
+		return false
+	}
+
+	for _, record := range c.APITokens {
+		if auth.CompareAPIToken(rawToken, record.Hash) {
+			return true
+		}
+	}
+	return false
+}
+
 // UpsertAPIToken inserts or replaces a record by ID.
 func (c *Config) UpsertAPIToken(record APITokenRecord) {
 	record.ensureScopes()
diff --git a/internal/config/persistence.go b/internal/config/persistence.go
index d1cc90937..2cc90dfb9 100644
--- a/internal/config/persistence.go
+++ b/internal/config/persistence.go
@@ -1959,12 +1959,19 @@ type PatrolRunHistoryData struct {
 
 // PatrolRunRecord represents a single patrol check run
 type PatrolRunRecord struct {
-	ID               string    `json:"id"`
-	StartedAt        time.Time `json:"started_at"`
-	CompletedAt      time.Time `json:"completed_at"`
-	DurationMs       int64     `json:"duration_ms"`
-	Type             string    `json:"type"` // "quick" or "deep"
-	ResourcesChecked int       `json:"resources_checked"`
+	ID                 string    `json:"id"`
+	StartedAt          time.Time `json:"started_at"`
+	CompletedAt        time.Time `json:"completed_at"`
+	DurationMs         int64     `json:"duration_ms"`
+	Type               string    `json:"type"` // "quick" or "deep"
+	TriggerReason      string    `json:"trigger_reason,omitempty"`
+	ScopeResourceIDs   []string  `json:"scope_resource_ids,omitempty"`
+	ScopeResourceTypes []string  `json:"scope_resource_types,omitempty"`
+	ScopeDepth         string    `json:"scope_depth,omitempty"`
+	ScopeContext       string    `json:"scope_context,omitempty"`
+	AlertID            string    `json:"alert_id,omitempty"`
+	FindingID          string    `json:"finding_id,omitempty"`
+	ResourcesChecked   int       `json:"resources_checked"`
 	// Breakdown by resource type
 	NodesChecked      int `json:"nodes_checked"`
 	GuestsChecked     int `json:"guests_checked"`
diff --git a/internal/hostagent/commands.go b/internal/hostagent/commands.go
index 8c6ff7c2e..a76b9366d 100644
--- a/internal/hostagent/commands.go
+++ b/internal/hostagent/commands.go
@@ -7,7 +7,9 @@ import (
 	"encoding/json"
 	"fmt"
 	"net/url"
+	"os"
 	"os/exec"
+	"regexp"
 	"runtime"
 	"strings"
 	"sync"
@@ -17,6 +19,10 @@ import (
 	"github.com/rs/zerolog"
 )
 
+// safeTargetIDPattern validates target IDs to prevent shell injection.
+// Allows alphanumeric, dash, underscore, period (no colons or special chars).
+var safeTargetIDPattern = regexp.MustCompile(`^[a-zA-Z0-9._-]+$`)
+
 var execCommandContext = exec.CommandContext
 
 // CommandClient handles WebSocket connection to Pulse for AI command execution
@@ -387,15 +393,41 @@ func (c *CommandClient) handleExecuteCommand(ctx context.Context, conn *websocke
 }
 
 func wrapCommand(payload executeCommandPayload) string {
-	if payload.TargetType == "container" && payload.TargetID != "" {
-		return fmt.Sprintf("pct exec %s -- %s", payload.TargetID, payload.Command)
-	}
-	if payload.TargetType == "vm" && payload.TargetID != "" {
-		return fmt.Sprintf("qm guest exec %s -- %s", payload.TargetID, payload.Command)
+	// Only validate TargetID when it will be interpolated into the command
+	// (container and vm types). Host type doesn't use TargetID in the command.
+	needsTargetID := (payload.TargetType == "container" || payload.TargetType == "vm") && payload.TargetID != ""
+
+	if needsTargetID {
+		// Validate TargetID to prevent shell injection - defense in depth
+		if !safeTargetIDPattern.MatchString(payload.TargetID) {
+			// Return a command that fails with non-zero exit and error message
+			return "sh -c 'echo \"Error: invalid target ID\" >&2; exit 1'"
+		}
+
+		// Wrap command in sh -c so shell metacharacters (pipes, redirects, globs)
+		// are processed inside the container/VM, not on the Proxmox host.
+		// Without this, "pct exec 141 -- grep pattern /var/log/*.log" would
+		// expand the glob on the host (where /var/log/*.log doesn't exist).
+		quotedCmd := shellQuote(payload.Command)
+
+		if payload.TargetType == "container" {
+			return fmt.Sprintf("pct exec %s -- sh -c %s", payload.TargetID, quotedCmd)
+		}
+		if payload.TargetType == "vm" {
+			return fmt.Sprintf("qm guest exec %s -- sh -c %s", payload.TargetID, quotedCmd)
+		}
 	}
+
 	return payload.Command
 }
 
+// shellQuote safely quotes a string for use as a shell argument.
+// Uses single quotes and escapes any embedded single quotes.
+func shellQuote(s string) string {
+	escaped := strings.ReplaceAll(s, "'", "'\"'\"'")
+	return "'" + escaped + "'"
+}
+
 func (c *CommandClient) executeCommand(ctx context.Context, payload executeCommandPayload) commandResultPayload {
 	result := commandResultPayload{
 		RequestID: payload.RequestID,
@@ -418,6 +450,8 @@ func (c *CommandClient) executeCommand(ctx context.Context, payload executeComma
 		cmd = execCommandContext(cmdCtx, "cmd", "/C", command)
 	} else {
 		cmd = execCommandContext(cmdCtx, "sh", "-c", command)
+		// Ensure PATH includes common binary locations for docker, kubectl, etc.
+		cmd.Env = append(os.Environ(), "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:"+os.Getenv("PATH"))
 	}
 
 	var stdout, stderr bytes.Buffer
diff --git a/internal/hostagent/commands_execute_test.go b/internal/hostagent/commands_execute_test.go
index a71c2f067..7d0b0302e 100644
--- a/internal/hostagent/commands_execute_test.go
+++ b/internal/hostagent/commands_execute_test.go
@@ -23,22 +23,24 @@ func TestWrapCommand_TargetWrapping(t *testing.T) {
 			want: "echo ok",
 		},
 		{
-			name: "container wraps with pct",
+			name: "container wraps with pct and sh -c",
 			payload: executeCommandPayload{
 				Command:    "echo ok",
 				TargetType: "container",
 				TargetID:   "101",
 			},
-			want: "pct exec 101 -- echo ok",
+			// Commands are wrapped in sh -c so shell metacharacters are processed inside the container
+			want: "pct exec 101 -- sh -c 'echo ok'",
 		},
 		{
-			name: "vm wraps with qm guest exec",
+			name: "vm wraps with qm guest exec and sh -c",
 			payload: executeCommandPayload{
 				Command:    "echo ok",
 				TargetType: "vm",
 				TargetID:   "900",
 			},
-			want: "qm guest exec 900 -- echo ok",
+			// Commands are wrapped in sh -c so shell metacharacters are processed inside the VM
+			want: "qm guest exec 900 -- sh -c 'echo ok'",
 		},
 		{
 			name: "missing target id does not wrap",
diff --git a/internal/hostagent/commands_test.go b/internal/hostagent/commands_test.go
index ac81c7b2a..8f8bd6484 100644
--- a/internal/hostagent/commands_test.go
+++ b/internal/hostagent/commands_test.go
@@ -6,6 +6,7 @@ import (
 	"net/http"
 	"net/http/httptest"
 	"os/exec"
+	"strings"
 	"testing"
 	"time"
 
@@ -128,3 +129,110 @@ func TestCommandClient_Run(t *testing.T) {
 	cancel()
 	time.Sleep(100 * time.Millisecond)
 }
+
+func TestWrapCommand(t *testing.T) {
+	tests := []struct {
+		name    string
+		payload executeCommandPayload
+		wantCmd string
+		checkFn func(string) bool
+	}{
+		{
+			name: "HostCommandPassedThrough",
+			payload: executeCommandPayload{
+				Command:    "ls -la",
+				TargetType: "host",
+				TargetID:   "",
+			},
+			wantCmd: "ls -la",
+		},
+		{
+			name: "LXCCommandWrappedInShC",
+			payload: executeCommandPayload{
+				Command:    "grep pattern /var/log/*.log",
+				TargetType: "container",
+				TargetID:   "141",
+			},
+			checkFn: func(cmd string) bool {
+				// Should be: pct exec 141 -- sh -c 'grep pattern /var/log/*.log'
+				return strings.HasPrefix(cmd, "pct exec 141 -- sh -c '") &&
+					strings.Contains(cmd, "grep pattern /var/log/*.log")
+			},
+		},
+		{
+			name: "VMCommandWrappedInShC",
+			payload: executeCommandPayload{
+				Command:    "cat /etc/hostname",
+				TargetType: "vm",
+				TargetID:   "100",
+			},
+			checkFn: func(cmd string) bool {
+				// Should be: qm guest exec 100 -- sh -c 'cat /etc/hostname'
+				return strings.HasPrefix(cmd, "qm guest exec 100 -- sh -c '") &&
+					strings.Contains(cmd, "cat /etc/hostname")
+			},
+		},
+		{
+			name: "LXCCommandWithSingleQuotes",
+			payload: executeCommandPayload{
+				Command:    "echo \"it's working\"",
+				TargetType: "container",
+				TargetID:   "141",
+			},
+			checkFn: func(cmd string) bool {
+				// Single quotes should be escaped: it's -> it'"'"'s
+				return strings.HasPrefix(cmd, "pct exec 141 -- sh -c '") &&
+					strings.Contains(cmd, `it'"'"'s`)
+			},
+		},
+		{
+			name: "LXCCommandWithPipeline",
+			payload: executeCommandPayload{
+				Command:    "echo 'test' | base64 -d > /tmp/file",
+				TargetType: "container",
+				TargetID:   "108",
+			},
+			checkFn: func(cmd string) bool {
+				// Pipeline should be wrapped so it runs inside LXC
+				return strings.HasPrefix(cmd, "pct exec 108 -- sh -c '") &&
+					strings.Contains(cmd, "| base64 -d > /tmp/file")
+			},
+		},
+		{
+			name: "InvalidTargetIDReturnsError",
+			payload: executeCommandPayload{
+				Command:    "ls",
+				TargetType: "container",
+				TargetID:   "141; rm -rf /", // injection attempt
+			},
+			checkFn: func(cmd string) bool {
+				return strings.Contains(cmd, "invalid target ID")
+			},
+		},
+		{
+			name: "EmptyTargetIDPassedThrough",
+			payload: executeCommandPayload{
+				Command:    "ls",
+				TargetType: "container",
+				TargetID:   "",
+			},
+			wantCmd: "ls", // No wrapping when TargetID is empty
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := wrapCommand(tt.payload)
+			if tt.wantCmd != "" {
+				if got != tt.wantCmd {
+					t.Errorf("wrapCommand() = %q, want %q", got, tt.wantCmd)
+				}
+			}
+			if tt.checkFn != nil {
+				if !tt.checkFn(got) {
+					t.Errorf("wrapCommand() = %q, check failed", got)
+				}
+			}
+		})
+	}
+}
diff --git a/internal/infradiscovery/service.go b/internal/infradiscovery/service.go
index 1caec4637..72699bcf5 100644
--- a/internal/infradiscovery/service.go
+++ b/internal/infradiscovery/service.go
@@ -1,4 +1,4 @@
-// Package infradiscovery provides AI-powered infrastructure discovery for detecting
+// Package infradiscovery provides infrastructure discovery for detecting
 // applications and services running on monitored hosts. It uses LLM analysis to
 // identify services from Docker containers, enabling AI systems like Patrol to
 // understand where services run and propose correct remediation commands.
@@ -70,8 +70,8 @@ type PortInfo struct {
 	Protocol      string `json:"protocol,omitempty"`
 }
 
-// AIDiscoveryResult represents the AI's analysis of a container.
-type AIDiscoveryResult struct {
+// DiscoveryResult represents the AI's analysis of a container.
+type DiscoveryResult struct {
 	ServiceType string  `json:"service_type"` // e.g., "postgres", "pbs", "nginx", "unknown"
 	ServiceName string  `json:"service_name"` // Human-readable name
 	Category    string  `json:"category"`     // backup, database, web, monitoring, etc.
@@ -80,7 +80,7 @@ type AIDiscoveryResult struct {
 	Reasoning   string  `json:"reasoning"`    // Why the AI made this determination
 }
 
-// Service manages AI-powered infrastructure discovery.
+// Service manages infrastructure discovery.
 type Service struct {
 	stateProvider  StateProvider
 	knowledgeStore *knowledge.Store
@@ -94,7 +94,7 @@ type Service struct {
 
 	// Cache to avoid re-analyzing the same containers
 	// Key: image name, Value: analysis result
-	analysisCache   map[string]*AIDiscoveryResult
+	analysisCache   map[string]*DiscoveryResult
 	cacheMu         sync.RWMutex
 	cacheExpiry     time.Duration
 	lastCacheUpdate time.Time
@@ -114,7 +114,7 @@ func DefaultConfig() Config {
 	}
 }
 
-// NewService creates a new AI-powered infrastructure discovery service.
+// NewService creates a new infrastructure discovery service.
 func NewService(stateProvider StateProvider, knowledgeStore *knowledge.Store, cfg Config) *Service {
 	if cfg.Interval == 0 {
 		cfg.Interval = 5 * time.Minute
@@ -130,12 +130,12 @@ func NewService(stateProvider StateProvider, knowledgeStore *knowledge.Store, cf
 		cacheExpiry:    cfg.CacheExpiry,
 		stopCh:         make(chan struct{}),
 		discoveries:    make([]DiscoveredApp, 0),
-		analysisCache:  make(map[string]*AIDiscoveryResult),
+		analysisCache:  make(map[string]*DiscoveryResult),
 	}
 }
 
 // SetAIAnalyzer sets the AI analyzer for discovery.
-// This must be called before Start() for AI-powered discovery to work.
+// This must be called before Start() for discovery to work.
 func (s *Service) SetAIAnalyzer(analyzer AIAnalyzer) {
 	s.mu.Lock()
 	defer s.mu.Unlock()
@@ -154,7 +154,7 @@ func (s *Service) Start(ctx context.Context) {
 
 	log.Info().
 		Dur("interval", s.interval).
-		Msg("Starting AI-powered infrastructure discovery service")
+		Msg("Starting infrastructure discovery service")
 
 	// Run immediately on startup
 	go func() {
@@ -285,7 +285,7 @@ func (s *Service) analyzeContainer(ctx context.Context, analyzer AIAnalyzer, c m
 	cacheValid := time.Since(s.lastCacheUpdate) < s.cacheExpiry
 	s.cacheMu.RUnlock()
 
-	var result *AIDiscoveryResult
+	var result *DiscoveryResult
 
 	if found && cacheValid {
 		result = cached
@@ -459,7 +459,7 @@ Respond with ONLY the JSON, no other text.`, string(infoJSON))
 }
 
 // parseAIResponse parses the AI's JSON response.
-func (s *Service) parseAIResponse(response string) *AIDiscoveryResult {
+func (s *Service) parseAIResponse(response string) *DiscoveryResult {
 	// Try to extract JSON from the response
 	response = strings.TrimSpace(response)
 
@@ -487,7 +487,7 @@ func (s *Service) parseAIResponse(response string) *AIDiscoveryResult {
 		response = response[start : end+1]
 	}
 
-	var result AIDiscoveryResult
+	var result DiscoveryResult
 	if err := json.Unmarshal([]byte(response), &result); err != nil {
 		log.Debug().
 			Err(err).
@@ -581,7 +581,7 @@ func (s *Service) ForceRefresh(ctx context.Context) {
 func (s *Service) ClearCache() {
 	s.cacheMu.Lock()
 	defer s.cacheMu.Unlock()
-	s.analysisCache = make(map[string]*AIDiscoveryResult)
+	s.analysisCache = make(map[string]*DiscoveryResult)
 	s.lastCacheUpdate = time.Time{}
 }
 
diff --git a/internal/infradiscovery/service_test.go b/internal/infradiscovery/service_test.go
index e56ed398e..0b320f99b 100644
--- a/internal/infradiscovery/service_test.go
+++ b/internal/infradiscovery/service_test.go
@@ -64,7 +64,7 @@ func TestParseAIResponse(t *testing.T) {
 	tests := []struct {
 		name     string
 		response string
-		want     *AIDiscoveryResult
+		want     *DiscoveryResult
 	}{
 		{
 			name: "valid JSON",
@@ -76,7 +76,7 @@ func TestParseAIResponse(t *testing.T) {
 				"confidence": 0.95,
 				"reasoning": "Image name contains postgres"
 			}`,
-			want: &AIDiscoveryResult{
+			want: &DiscoveryResult{
 				ServiceType: "postgres",
 				ServiceName: "PostgreSQL",
 				Category:    "database",
@@ -88,7 +88,7 @@ func TestParseAIResponse(t *testing.T) {
 		{
 			name:     "JSON in markdown code block",
 			response: "```json\n{\"service_type\": \"redis\", \"service_name\": \"Redis\", \"category\": \"cache\", \"cli_command\": \"docker exec {container} redis-cli\", \"confidence\": 0.9, \"reasoning\": \"Redis image\"}\n```",
-			want: &AIDiscoveryResult{
+			want: &DiscoveryResult{
 				ServiceType: "redis",
 				ServiceName: "Redis",
 				Category:    "cache",
@@ -107,7 +107,7 @@ func TestParseAIResponse(t *testing.T) {
 			response: `Here's my analysis:
 			{"service_type": "nginx", "service_name": "Nginx", "category": "web", "cli_command": "", "confidence": 0.85, "reasoning": "Web server"}
 			That's my answer.`,
-			want: &AIDiscoveryResult{
+			want: &DiscoveryResult{
 				ServiceType: "nginx",
 				ServiceName: "Nginx",
 				Category:    "web",
diff --git a/internal/models/models.go b/internal/models/models.go
index 436ace46b..23f6a0d38 100644
--- a/internal/models/models.go
+++ b/internal/models/models.go
@@ -696,6 +696,7 @@ type Storage struct {
 	NodeCount int      `json:"nodeCount,omitempty"`
 	Type      string   `json:"type"`
 	Status    string   `json:"status"`
+	Path      string   `json:"path,omitempty"`
 	Total     int64    `json:"total"`
 	Used      int64    `json:"used"`
 	Free      int64    `json:"free"`
diff --git a/internal/models/state_snapshot.go b/internal/models/state_snapshot.go
index c777c5084..e274003e5 100644
--- a/internal/models/state_snapshot.go
+++ b/internal/models/state_snapshot.go
@@ -87,6 +87,222 @@ func (s *State) GetSnapshot() StateSnapshot {
 	return snapshot
 }
 
+// ResourceLocation describes where a resource lives in the infrastructure hierarchy.
+// This is the authoritative source of truth for routing commands to resources.
+type ResourceLocation struct {
+	// What was found
+	Found        bool   // True if the resource was found
+	Name         string // The resource name
+	ResourceType string // "node", "vm", "lxc", "dockerhost", "docker", "host", "k8s_cluster", "k8s_pod", "k8s_deployment"
+
+	// For VMs and LXCs (Proxmox)
+	VMID int    // VMID if this is a VM or LXC
+	Node string // Proxmox node name
+
+	// For Docker/Podman containers
+	DockerHostName string // Name of the Docker host (LXC/VM/standalone)
+	DockerHostType string // "lxc", "vm", or "standalone"
+	DockerHostVMID int    // VMID if Docker host is an LXC/VM
+
+	// For Kubernetes resources
+	K8sClusterName string // Kubernetes cluster name
+	K8sNamespace   string // Kubernetes namespace
+	K8sAgentID     string // Agent ID for routing kubectl commands
+
+	// For generic hosts (Windows/Linux via Pulse Unified Agent)
+	HostID   string // Host ID
+	Platform string // "linux", "windows", etc.
+
+	// The key output: where to route commands
+	TargetHost string // The target_host to use for pulse_control/pulse_file_edit
+	AgentID    string // Direct agent ID if known (for K8s, standalone hosts)
+}
+
+// ResolveResource looks up a resource by name and returns its location in the hierarchy.
+// This is the single source of truth for determining where any resource lives.
+func (s StateSnapshot) ResolveResource(name string) ResourceLocation {
+	// Check Proxmox nodes first
+	for _, node := range s.Nodes {
+		if node.Name == name {
+			return ResourceLocation{
+				Found:        true,
+				Name:         name,
+				ResourceType: "node",
+				Node:         node.Name,
+				TargetHost:   node.Name,
+			}
+		}
+	}
+
+	// Check VMs
+	for _, vm := range s.VMs {
+		if vm.Name == name {
+			return ResourceLocation{
+				Found:        true,
+				Name:         name,
+				ResourceType: "vm",
+				VMID:         vm.VMID,
+				Node:         vm.Node,
+				TargetHost:   vm.Name, // Route to VM by name
+			}
+		}
+	}
+
+	// Check LXC containers
+	for _, lxc := range s.Containers {
+		if lxc.Name == name {
+			return ResourceLocation{
+				Found:        true,
+				Name:         name,
+				ResourceType: "lxc",
+				VMID:         lxc.VMID,
+				Node:         lxc.Node,
+				TargetHost:   lxc.Name, // Route to LXC by name
+			}
+		}
+	}
+
+	// Check Docker hosts (LXCs/VMs/standalone hosts running Docker)
+	for _, dh := range s.DockerHosts {
+		if dh.Hostname == name || dh.ID == name {
+			loc := ResourceLocation{
+				Found:          true,
+				Name:           dh.Hostname,
+				ResourceType:   "dockerhost",
+				DockerHostName: dh.Hostname,
+				TargetHost:     dh.Hostname,
+			}
+			// Check if this Docker host is an LXC
+			for _, lxc := range s.Containers {
+				if lxc.Name == dh.Hostname || lxc.Name == dh.ID {
+					loc.DockerHostType = "lxc"
+					loc.DockerHostVMID = lxc.VMID
+					loc.Node = lxc.Node
+					break
+				}
+			}
+			// Check if this Docker host is a VM
+			if loc.DockerHostType == "" {
+				for _, vm := range s.VMs {
+					if vm.Name == dh.Hostname || vm.Name == dh.ID {
+						loc.DockerHostType = "vm"
+						loc.DockerHostVMID = vm.VMID
+						loc.Node = vm.Node
+						break
+					}
+				}
+			}
+			if loc.DockerHostType == "" {
+				loc.DockerHostType = "standalone"
+			}
+			return loc
+		}
+	}
+
+	// Check Docker containers - this is the critical path for "homepage" -> "homepage-docker"
+	for _, dh := range s.DockerHosts {
+		for _, container := range dh.Containers {
+			if container.Name == name {
+				loc := ResourceLocation{
+					Found:          true,
+					Name:           name,
+					ResourceType:   "docker",
+					DockerHostName: dh.Hostname,
+					TargetHost:     dh.Hostname, // Route to the Docker host, not the container
+				}
+				// Resolve the Docker host's parent (LXC/VM/standalone)
+				for _, lxc := range s.Containers {
+					if lxc.Name == dh.Hostname || lxc.Name == dh.ID {
+						loc.DockerHostType = "lxc"
+						loc.DockerHostVMID = lxc.VMID
+						loc.Node = lxc.Node
+						loc.TargetHost = lxc.Name // Route to the LXC
+						break
+					}
+				}
+				if loc.DockerHostType == "" {
+					for _, vm := range s.VMs {
+						if vm.Name == dh.Hostname || vm.Name == dh.ID {
+							loc.DockerHostType = "vm"
+							loc.DockerHostVMID = vm.VMID
+							loc.Node = vm.Node
+							loc.TargetHost = vm.Name // Route to the VM
+							break
+						}
+					}
+				}
+				if loc.DockerHostType == "" {
+					loc.DockerHostType = "standalone"
+				}
+				return loc
+			}
+		}
+	}
+
+	// Check generic Hosts (Windows/Linux via Pulse Unified Agent)
+	for _, host := range s.Hosts {
+		if host.Hostname == name || host.ID == name {
+			return ResourceLocation{
+				Found:        true,
+				Name:         host.Hostname,
+				ResourceType: "host",
+				HostID:       host.ID,
+				Platform:     host.Platform,
+				TargetHost:   host.Hostname,
+			}
+		}
+	}
+
+	// Check Kubernetes clusters, pods, and deployments
+	for _, cluster := range s.KubernetesClusters {
+		if cluster.Name == name || cluster.ID == name || cluster.DisplayName == name {
+			return ResourceLocation{
+				Found:          true,
+				Name:           cluster.Name,
+				ResourceType:   "k8s_cluster",
+				K8sClusterName: cluster.Name,
+				K8sAgentID:     cluster.AgentID,
+				TargetHost:     cluster.Name,
+				AgentID:        cluster.AgentID,
+			}
+		}
+
+		// Check pods within this cluster
+		for _, pod := range cluster.Pods {
+			if pod.Name == name {
+				return ResourceLocation{
+					Found:          true,
+					Name:           pod.Name,
+					ResourceType:   "k8s_pod",
+					K8sClusterName: cluster.Name,
+					K8sNamespace:   pod.Namespace,
+					K8sAgentID:     cluster.AgentID,
+					TargetHost:     cluster.Name,
+					AgentID:        cluster.AgentID,
+				}
+			}
+		}
+
+		// Check deployments within this cluster
+		for _, deploy := range cluster.Deployments {
+			if deploy.Name == name {
+				return ResourceLocation{
+					Found:          true,
+					Name:           deploy.Name,
+					ResourceType:   "k8s_deployment",
+					K8sClusterName: cluster.Name,
+					K8sNamespace:   deploy.Namespace,
+					K8sAgentID:     cluster.AgentID,
+					TargetHost:     cluster.Name,
+					AgentID:        cluster.AgentID,
+				}
+			}
+		}
+	}
+
+	return ResourceLocation{Found: false, Name: name}
+}
+
 // ToFrontend converts a StateSnapshot to frontend format with proper tag handling
 func (s StateSnapshot) ToFrontend() StateFrontend {
 	// Convert nodes
diff --git a/internal/monitoring/guest_config.go b/internal/monitoring/guest_config.go
new file mode 100644
index 000000000..9c3b81550
--- /dev/null
+++ b/internal/monitoring/guest_config.go
@@ -0,0 +1,89 @@
+package monitoring
+
+import (
+	"context"
+	"fmt"
+	"strings"
+)
+
+// GetGuestConfig fetches Proxmox guest configuration for a VM or LXC container.
+// If instance or node are empty, it attempts to resolve them from the current state.
+func (m *Monitor) GetGuestConfig(ctx context.Context, guestType, instance, node string, vmid int) (map[string]interface{}, error) {
+	if m == nil {
+		return nil, fmt.Errorf("monitor not available")
+	}
+	if vmid <= 0 {
+		return nil, fmt.Errorf("invalid vmid")
+	}
+
+	gt := strings.ToLower(strings.TrimSpace(guestType))
+	if gt == "" {
+		return nil, fmt.Errorf("guest type is required")
+	}
+
+	// Resolve instance/node from state if missing.
+	if instance == "" || node == "" {
+		m.mu.RLock()
+		state := m.state
+		m.mu.RUnlock()
+		if state == nil {
+			return nil, fmt.Errorf("state not available")
+		}
+
+		switch gt {
+		case "container", "lxc":
+			for _, ct := range state.Containers {
+				if ct.VMID == vmid {
+					if instance == "" {
+						instance = ct.Instance
+					}
+					if node == "" {
+						node = ct.Node
+					}
+					break
+				}
+			}
+		case "vm":
+			for _, vm := range state.VMs {
+				if vm.VMID == vmid {
+					if instance == "" {
+						instance = vm.Instance
+					}
+					if node == "" {
+						node = vm.Node
+					}
+					break
+				}
+			}
+		default:
+			return nil, fmt.Errorf("unsupported guest type: %s", guestType)
+		}
+	}
+
+	if instance == "" || node == "" {
+		return nil, fmt.Errorf("unable to resolve instance or node for guest")
+	}
+
+	m.mu.RLock()
+	client := m.pveClients[instance]
+	m.mu.RUnlock()
+	if client == nil {
+		return nil, fmt.Errorf("no PVE client for instance %s", instance)
+	}
+
+	switch gt {
+	case "container", "lxc":
+		return client.GetContainerConfig(ctx, node, vmid)
+	case "vm":
+		type vmConfigClient interface {
+			GetVMConfig(ctx context.Context, node string, vmid int) (map[string]interface{}, error)
+		}
+		vmClient, ok := client.(vmConfigClient)
+		if !ok {
+			return nil, fmt.Errorf("VM config not supported by client")
+		}
+		return vmClient.GetVMConfig(ctx, node, vmid)
+	default:
+		return nil, fmt.Errorf("unsupported guest type: %s", guestType)
+	}
+}
diff --git a/internal/monitoring/monitor_polling.go b/internal/monitoring/monitor_polling.go
index 94ef6ac64..ff672873c 100644
--- a/internal/monitoring/monitor_polling.go
+++ b/internal/monitoring/monitor_polling.go
@@ -1410,6 +1410,7 @@ func (m *Monitor) pollStorageWithNodes(ctx context.Context, instanceName string,
 					Instance: storageInstanceName,
 					Type:     storage.Type,
 					Status:   "available",
+					Path:     storage.Path,
 					Total:    int64(storage.Total),
 					Used:     int64(storage.Used),
 					Free:     int64(storage.Available),
@@ -1420,6 +1421,15 @@ func (m *Monitor) pollStorageWithNodes(ctx context.Context, instanceName string,
 					Active:   storage.Active == 1,
 				}
 
+				if hasClusterConfig {
+					if nodes := parseClusterStorageNodes(clusterConfig.Nodes); len(nodes) > 0 {
+						modelStorage.Nodes = nodes
+					}
+					if modelStorage.Path == "" && clusterConfig.Path != "" {
+						modelStorage.Path = clusterConfig.Path
+					}
+				}
+
 				// If this is ZFS storage, attach pool status information
 				if storage.Type == "zfspool" || storage.Type == "zfs" || storage.Type == "local-zfs" {
 					// Try to match by storage name or by common ZFS pool names
@@ -2478,3 +2488,35 @@ func (m *Monitor) pollPVENode(
 
 	return modelNode, effectiveStatus, nil
 }
+
+func parseClusterStorageNodes(raw string) []string {
+	raw = strings.TrimSpace(raw)
+	if raw == "" {
+		return nil
+	}
+
+	parts := strings.FieldsFunc(raw, func(r rune) bool {
+		return r == ',' || r == ';' || r == ' ' || r == '\t' || r == '\n'
+	})
+	if len(parts) == 0 {
+		return nil
+	}
+
+	seen := make(map[string]struct{}, len(parts))
+	result := make([]string, 0, len(parts))
+	for _, part := range parts {
+		part = strings.TrimSpace(part)
+		if part == "" {
+			continue
+		}
+		if _, exists := seen[part]; exists {
+			continue
+		}
+		seen[part] = struct{}{}
+		result = append(result, part)
+	}
+	if len(result) == 0 {
+		return nil
+	}
+	return result
+}
diff --git a/internal/monitoring/storage_config.go b/internal/monitoring/storage_config.go
new file mode 100644
index 000000000..a73095a63
--- /dev/null
+++ b/internal/monitoring/storage_config.go
@@ -0,0 +1,89 @@
+package monitoring
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	"github.com/rcourtman/pulse-go-rewrite/pkg/proxmox"
+	"github.com/rs/zerolog/log"
+)
+
+// GetStorageConfig fetches Proxmox storage configuration across instances.
+// If instance is empty, returns configs for all instances.
+func (m *Monitor) GetStorageConfig(ctx context.Context, instance string) (map[string][]proxmox.Storage, error) {
+	if m == nil {
+		return nil, fmt.Errorf("monitor not available")
+	}
+	if ctx == nil {
+		ctx = context.Background()
+	}
+
+	filter := strings.TrimSpace(instance)
+
+	m.mu.RLock()
+	clients := make(map[string]PVEClientInterface, len(m.pveClients))
+	for name, client := range m.pveClients {
+		clients[name] = client
+	}
+	m.mu.RUnlock()
+
+	if len(clients) == 0 {
+		return nil, fmt.Errorf("no PVE clients available")
+	}
+
+	results := make(map[string][]proxmox.Storage)
+	var firstErr error
+
+	for name, client := range clients {
+		if client == nil {
+			continue
+		}
+		if filter != "" && !m.matchesInstanceFilter(name, filter) {
+			continue
+		}
+
+		storageInstance := name
+		if cfg := m.getInstanceConfig(name); cfg != nil && cfg.IsCluster && cfg.ClusterName != "" {
+			storageInstance = cfg.ClusterName
+		}
+
+		storages, err := client.GetAllStorage(ctx)
+		if err != nil {
+			if filter != "" {
+				return nil, err
+			}
+			if firstErr == nil {
+				firstErr = err
+			}
+			log.Warn().
+				Err(err).
+				Str("instance", name).
+				Msg("Failed to fetch storage config for instance")
+			continue
+		}
+
+		results[storageInstance] = append(results[storageInstance], storages...)
+	}
+
+	if len(results) == 0 && firstErr != nil {
+		return nil, firstErr
+	}
+
+	if filter != "" && len(results) == 0 {
+		return nil, fmt.Errorf("no PVE instance matches %s", filter)
+	}
+
+	return results, nil
+}
+
+func (m *Monitor) matchesInstanceFilter(instanceName, filter string) bool {
+	if strings.EqualFold(instanceName, filter) {
+		return true
+	}
+	cfg := m.getInstanceConfig(instanceName)
+	if cfg != nil && cfg.IsCluster && cfg.ClusterName != "" && strings.EqualFold(cfg.ClusterName, filter) {
+		return true
+	}
+	return false
+}
diff --git a/internal/servicediscovery/commands.go b/internal/servicediscovery/commands.go
new file mode 100644
index 000000000..91cc84041
--- /dev/null
+++ b/internal/servicediscovery/commands.go
@@ -0,0 +1,526 @@
+package servicediscovery
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+)
+
+// safeResourceIDPattern matches valid resource IDs: alphanumeric, dash, underscore, period, colon
+// This prevents shell injection via malicious resource names.
+var safeResourceIDPattern = regexp.MustCompile(`^[a-zA-Z0-9._:-]+$`)
+
+// ValidateResourceID checks if a resource ID is safe to use in shell commands.
+// Returns an error if the ID contains potentially dangerous characters.
+func ValidateResourceID(id string) error {
+	if id == "" {
+		return fmt.Errorf("resource ID cannot be empty")
+	}
+	if len(id) > 256 {
+		return fmt.Errorf("resource ID too long (max 256 chars)")
+	}
+	if !safeResourceIDPattern.MatchString(id) {
+		return fmt.Errorf("resource ID contains invalid characters: only alphanumeric, dash, underscore, period, and colon allowed")
+	}
+	return nil
+}
+
+// shellQuote safely quotes a string for use as a shell argument.
+// Uses single quotes and escapes any embedded single quotes.
+func shellQuote(s string) string {
+	// Replace single quotes with '\'' (end quote, escaped quote, start quote)
+	escaped := strings.ReplaceAll(s, "'", "'\"'\"'")
+	return "'" + escaped + "'"
+}
+
+// DiscoveryCommand represents a command to run during discovery.
+type DiscoveryCommand struct {
+	Name        string   // Human-readable name
+	Command     string   // The command template
+	Description string   // What this discovers
+	Categories  []string // What categories of info this provides
+	Timeout     int      // Timeout in seconds (0 = default)
+	Optional    bool     // If true, don't fail if command fails
+}
+
+// CommandSet represents a set of commands for a resource type.
+type CommandSet struct {
+	ResourceType ResourceType
+	Commands     []DiscoveryCommand
+}
+
+// GetCommandsForResource returns the commands to run for a given resource type.
+func GetCommandsForResource(resourceType ResourceType) []DiscoveryCommand {
+	switch resourceType {
+	case ResourceTypeLXC:
+		return getLXCCommands()
+	case ResourceTypeVM:
+		return getVMCommands()
+	case ResourceTypeDocker:
+		return getDockerCommands()
+	case ResourceTypeDockerVM, ResourceTypeDockerLXC:
+		return getNestedDockerCommands()
+	case ResourceTypeK8s:
+		return getK8sCommands()
+	case ResourceTypeHost:
+		return getHostCommands()
+	default:
+		return []DiscoveryCommand{}
+	}
+}
+
+// getLXCCommands returns commands for discovering LXC containers.
+func getLXCCommands() []DiscoveryCommand {
+	return []DiscoveryCommand{
+		{
+			Name:        "os_release",
+			Command:     "cat /etc/os-release",
+			Description: "Operating system identification",
+			Categories:  []string{"version", "config"},
+			Optional:    true,
+		},
+		{
+			Name:        "hostname",
+			Command:     "hostname",
+			Description: "Container hostname",
+			Categories:  []string{"config"},
+			Optional:    true,
+		},
+		{
+			Name:        "running_services",
+			Command:     "systemctl list-units --type=service --state=running --no-pager 2>/dev/null | head -30 || service --status-all 2>/dev/null | grep '+' | head -30",
+			Description: "Running services and daemons",
+			Categories:  []string{"service"},
+			Optional:    true,
+		},
+		{
+			Name:        "listening_ports",
+			Command:     "ss -tlnp 2>/dev/null | head -25 || netstat -tlnp 2>/dev/null | head -25",
+			Description: "Network ports listening",
+			Categories:  []string{"port", "network"},
+			Optional:    true,
+		},
+		{
+			Name:        "top_processes",
+			Command:     "ps aux --sort=-rss 2>/dev/null | head -15 || ps aux | head -15",
+			Description: "Top processes by memory",
+			Categories:  []string{"service"},
+			Optional:    true,
+		},
+		{
+			Name:        "disk_usage",
+			Command:     "df -h 2>/dev/null | head -15",
+			Description: "Disk usage and mount points",
+			Categories:  []string{"storage"},
+			Optional:    true,
+		},
+		{
+			Name:        "docker_check",
+			Command:     "docker ps --format '{{.Names}}: {{.Image}} ({{.Status}})' 2>/dev/null | head -20 || echo 'no_docker'",
+			Description: "Docker containers if running",
+			Categories:  []string{"service", "container"},
+			Optional:    true,
+		},
+		{
+			Name:        "docker_mounts",
+			Command:     `sh -c 'docker ps -q 2>/dev/null | head -15 | while read id; do name=$(docker inspect --format "{{.Name}}" "$id" 2>/dev/null | sed "s|^/||"); echo "CONTAINER:$name"; docker inspect --format "{{range .Mounts}}{{.Source}}|{{.Destination}}|{{.Type}}{{println}}{{end}}" "$id" 2>/dev/null | grep -v "^$" || true; done; echo docker_mounts_done'`,
+			Description: "Docker container bind mounts (source -> destination)",
+			Categories:  []string{"config", "storage"},
+			Optional:    true,
+		},
+		{
+			Name:        "installed_packages",
+			Command:     "dpkg -l 2>/dev/null | grep -E '^ii' | awk '{print $2}' | head -50 || rpm -qa 2>/dev/null | head -50 || apk list --installed 2>/dev/null | head -50",
+			Description: "Installed packages",
+			Categories:  []string{"version", "service"},
+			Optional:    true,
+		},
+		{
+			Name:        "config_files",
+			Command:     "find /etc -name '*.conf' -o -name '*.yml' -o -name '*.yaml' -o -name '*.json' 2>/dev/null | head -30",
+			Description: "Configuration files",
+			Categories:  []string{"config"},
+			Optional:    true,
+		},
+		{
+			Name:        "cron_jobs",
+			Command:     "crontab -l 2>/dev/null | grep -v '^#' | head -10 || ls -la /etc/cron.d/ 2>/dev/null | head -10",
+			Description: "Scheduled jobs",
+			Categories:  []string{"service"},
+			Optional:    true,
+		},
+		{
+			Name:        "hardware_info",
+			Command:     "lspci 2>/dev/null | head -20 || echo 'no_lspci'",
+			Description: "Hardware devices (e.g., Coral TPU)",
+			Categories:  []string{"hardware"},
+			Optional:    true,
+		},
+		{
+			Name:        "gpu_devices",
+			Command:     "ls -la /dev/dri/ 2>/dev/null; ls -la /dev/apex* 2>/dev/null; nvidia-smi -L 2>/dev/null || echo 'no_gpu'",
+			Description: "GPU and TPU devices",
+			Categories:  []string{"hardware"},
+			Optional:    true,
+		},
+	}
+}
+
+// getVMCommands returns commands for discovering VMs (via QEMU guest agent).
+func getVMCommands() []DiscoveryCommand {
+	return []DiscoveryCommand{
+		{
+			Name:        "os_release",
+			Command:     "cat /etc/os-release",
+			Description: "Operating system identification",
+			Categories:  []string{"version", "config"},
+			Optional:    true,
+		},
+		{
+			Name:        "hostname",
+			Command:     "hostname",
+			Description: "VM hostname",
+			Categories:  []string{"config"},
+			Optional:    true,
+		},
+		{
+			Name:        "running_services",
+			Command:     "systemctl list-units --type=service --state=running --no-pager 2>/dev/null | head -30",
+			Description: "Running services and daemons",
+			Categories:  []string{"service"},
+			Optional:    true,
+		},
+		{
+			Name:        "listening_ports",
+			Command:     "ss -tlnp 2>/dev/null | head -25 || netstat -tlnp 2>/dev/null | head -25",
+			Description: "Network ports listening",
+			Categories:  []string{"port", "network"},
+			Optional:    true,
+		},
+		{
+			Name:        "top_processes",
+			Command:     "ps aux --sort=-rss 2>/dev/null | head -15",
+			Description: "Top processes by memory",
+			Categories:  []string{"service"},
+			Optional:    true,
+		},
+		{
+			Name:        "disk_usage",
+			Command:     "df -h 2>/dev/null | head -15",
+			Description: "Disk usage and mount points",
+			Categories:  []string{"storage"},
+			Optional:    true,
+		},
+		{
+			Name:        "docker_check",
+			Command:     "docker ps --format '{{.Names}}: {{.Image}} ({{.Status}})' 2>/dev/null | head -20 || echo 'no_docker'",
+			Description: "Docker containers if running",
+			Categories:  []string{"service", "container"},
+			Optional:    true,
+		},
+		{
+			Name:        "docker_mounts",
+			Command:     `sh -c 'docker ps -q 2>/dev/null | head -15 | while read id; do name=$(docker inspect --format "{{.Name}}" "$id" 2>/dev/null | sed "s|^/||"); echo "CONTAINER:$name"; docker inspect --format "{{range .Mounts}}{{.Source}}|{{.Destination}}|{{.Type}}{{println}}{{end}}" "$id" 2>/dev/null | grep -v "^$" || true; done; echo docker_mounts_done'`,
+			Description: "Docker container bind mounts (source -> destination)",
+			Categories:  []string{"config", "storage"},
+			Optional:    true,
+		},
+		{
+			Name:        "hardware_info",
+			Command:     "lspci 2>/dev/null | head -20",
+			Description: "PCI hardware devices",
+			Categories:  []string{"hardware"},
+			Optional:    true,
+		},
+		{
+			Name:        "gpu_devices",
+			Command:     "ls -la /dev/dri/ 2>/dev/null; nvidia-smi -L 2>/dev/null || echo 'no_gpu'",
+			Description: "GPU devices",
+			Categories:  []string{"hardware"},
+			Optional:    true,
+		},
+	}
+}
+
+// getDockerCommands returns commands for discovering Docker containers.
+// These are run inside the container via docker exec.
+func getDockerCommands() []DiscoveryCommand {
+	return []DiscoveryCommand{
+		{
+			Name:        "os_release",
+			Command:     "cat /etc/os-release 2>/dev/null || cat /etc/alpine-release 2>/dev/null || echo 'unknown'",
+			Description: "Container OS",
+			Categories:  []string{"version"},
+			Optional:    true,
+		},
+		{
+			Name:        "processes",
+			Command:     "ps aux 2>/dev/null || echo 'no_ps'",
+			Description: "Running processes",
+			Categories:  []string{"service"},
+			Optional:    true,
+		},
+		{
+			Name:        "listening_ports",
+			Command:     "ss -tlnp 2>/dev/null || netstat -tlnp 2>/dev/null || echo 'no_ss'",
+			Description: "Listening ports inside container",
+			Categories:  []string{"port"},
+			Optional:    true,
+		},
+		{
+			Name:        "env_vars",
+			Command:     "env 2>/dev/null | grep -vE '(PASSWORD|SECRET|KEY|TOKEN|CREDENTIAL)' | head -30",
+			Description: "Environment variables (filtered)",
+			Categories:  []string{"config"},
+			Optional:    true,
+		},
+		{
+			Name:        "config_files",
+			Command:     "find /config /data /app /etc -maxdepth 2 -name '*.conf' -o -name '*.yml' -o -name '*.yaml' -o -name '*.json' 2>/dev/null | head -20",
+			Description: "Configuration files",
+			Categories:  []string{"config"},
+			Optional:    true,
+		},
+	}
+}
+
+// getNestedDockerCommands returns commands for Docker inside VMs or LXCs.
+func getNestedDockerCommands() []DiscoveryCommand {
+	return []DiscoveryCommand{
+		{
+			Name:        "docker_containers",
+			Command:     "docker ps -a --format '{{.Names}}|{{.Image}}|{{.Status}}|{{.Ports}}'",
+			Description: "All Docker containers",
+			Categories:  []string{"container", "service"},
+			Optional:    false,
+		},
+		{
+			Name:        "docker_images",
+			Command:     "docker images --format '{{.Repository}}:{{.Tag}}' | head -20",
+			Description: "Docker images",
+			Categories:  []string{"version"},
+			Optional:    true,
+		},
+		{
+			Name:        "docker_compose",
+			Command:     "find /opt /home /root -name 'docker-compose*.yml' -o -name 'compose*.yml' 2>/dev/null | head -10",
+			Description: "Docker compose files",
+			Categories:  []string{"config"},
+			Optional:    true,
+		},
+	}
+}
+
+// getK8sCommands returns commands for discovering Kubernetes pods.
+func getK8sCommands() []DiscoveryCommand {
+	return []DiscoveryCommand{
+		{
+			Name:        "processes",
+			Command:     "ps aux 2>/dev/null || echo 'no_ps'",
+			Description: "Running processes in pod",
+			Categories:  []string{"service"},
+			Optional:    true,
+		},
+		{
+			Name:        "listening_ports",
+			Command:     "ss -tlnp 2>/dev/null || netstat -tlnp 2>/dev/null || echo 'no_ss'",
+			Description: "Listening ports",
+			Categories:  []string{"port"},
+			Optional:    true,
+		},
+		{
+			Name:        "env_vars",
+			Command:     "env 2>/dev/null | grep -vE '(PASSWORD|SECRET|KEY|TOKEN|CREDENTIAL)' | head -30",
+			Description: "Environment variables (filtered)",
+			Categories:  []string{"config"},
+			Optional:    true,
+		},
+	}
+}
+
+// getHostCommands returns commands for discovering host systems.
+func getHostCommands() []DiscoveryCommand {
+	return []DiscoveryCommand{
+		{
+			Name:        "os_release",
+			Command:     "cat /etc/os-release",
+			Description: "Operating system",
+			Categories:  []string{"version", "config"},
+			Optional:    true,
+		},
+		{
+			Name:        "hostname",
+			Command:     "hostname -f 2>/dev/null || hostname",
+			Description: "Full hostname",
+			Categories:  []string{"config"},
+			Optional:    true,
+		},
+		{
+			Name:        "running_services",
+			Command:     "systemctl list-units --type=service --state=running --no-pager 2>/dev/null | head -40",
+			Description: "Running services",
+			Categories:  []string{"service"},
+			Optional:    true,
+		},
+		{
+			Name:        "listening_ports",
+			Command:     "ss -tlnp 2>/dev/null | head -30",
+			Description: "Listening network ports",
+			Categories:  []string{"port", "network"},
+			Optional:    true,
+		},
+		{
+			Name:        "docker_containers",
+			Command:     "docker ps --format '{{.Names}}: {{.Image}} ({{.Status}})' 2>/dev/null | head -30 || echo 'no_docker'",
+			Description: "Docker containers on host",
+			Categories:  []string{"container", "service"},
+			Optional:    true,
+		},
+		{
+			Name:        "proxmox_version",
+			Command:     "pveversion 2>/dev/null || echo 'not_proxmox'",
+			Description: "Proxmox version if applicable",
+			Categories:  []string{"version"},
+			Optional:    true,
+		},
+		{
+			Name:        "zfs_pools",
+			Command:     "zpool list 2>/dev/null | head -10 || echo 'no_zfs'",
+			Description: "ZFS pools",
+			Categories:  []string{"storage"},
+			Optional:    true,
+		},
+		{
+			Name:        "disk_usage",
+			Command:     "df -h | head -20",
+			Description: "Disk usage",
+			Categories:  []string{"storage"},
+			Optional:    true,
+		},
+		{
+			Name:        "hardware_info",
+			Command:     "lscpu | head -20",
+			Description: "CPU information",
+			Categories:  []string{"hardware"},
+			Optional:    true,
+		},
+		{
+			Name:        "memory_info",
+			Command:     "free -h",
+			Description: "Memory information",
+			Categories:  []string{"hardware"},
+			Optional:    true,
+		},
+	}
+}
+
+// BuildLXCCommand wraps a command for execution in an LXC container.
+// The vmid is validated to prevent command injection.
+func BuildLXCCommand(vmid string, cmd string) string {
+	if err := ValidateResourceID(vmid); err != nil {
+		// Don't include the invalid ID in output to prevent any injection
+		return "sh -c 'echo \"Discovery error: invalid LXC container ID\" >&2; exit 1'"
+	}
+	return fmt.Sprintf("pct exec %s -- sh -c %s", vmid, shellQuote(cmd))
+}
+
+// BuildVMCommand wraps a command for execution in a VM via QEMU guest agent.
+// Note: This requires the guest agent to be running.
+// The vmid is validated to prevent command injection.
+func BuildVMCommand(vmid string, cmd string) string {
+	if err := ValidateResourceID(vmid); err != nil {
+		return "sh -c 'echo \"Discovery error: invalid VM ID\" >&2; exit 1'"
+	}
+	// For VMs, we use qm guest exec which requires the guest agent
+	return fmt.Sprintf("qm guest exec %s -- sh -c %s", vmid, shellQuote(cmd))
+}
+
+// BuildDockerCommand wraps a command for execution in a Docker container.
+// The containerName is validated to prevent command injection.
+// Note: Leading slashes are trimmed as Docker API often returns names with leading /.
+func BuildDockerCommand(containerName string, cmd string) string {
+	// Docker API returns container names with leading slash, trim it
+	containerName = strings.TrimPrefix(containerName, "/")
+	if err := ValidateResourceID(containerName); err != nil {
+		return "sh -c 'echo \"Discovery error: invalid container name\" >&2; exit 1'"
+	}
+	return fmt.Sprintf("docker exec %s sh -c %s", shellQuote(containerName), shellQuote(cmd))
+}
+
+// BuildNestedDockerCommand builds a command to run inside Docker on a VM/LXC.
+// All resource identifiers are validated to prevent command injection.
+func BuildNestedDockerCommand(vmid string, isLXC bool, containerName string, cmd string) string {
+	if err := ValidateResourceID(vmid); err != nil {
+		return "sh -c 'echo \"Discovery error: invalid VM/LXC ID\" >&2; exit 1'"
+	}
+	// Docker API returns container names with leading slash, trim it
+	containerName = strings.TrimPrefix(containerName, "/")
+	if err := ValidateResourceID(containerName); err != nil {
+		return "sh -c 'echo \"Discovery error: invalid container name\" >&2; exit 1'"
+	}
+	dockerCmd := BuildDockerCommand(containerName, cmd)
+	if isLXC {
+		return BuildLXCCommand(vmid, dockerCmd)
+	}
+	return BuildVMCommand(vmid, dockerCmd)
+}
+
+// BuildK8sCommand builds a command to run in a Kubernetes pod.
+// All identifiers are validated to prevent command injection.
+func BuildK8sCommand(namespace, podName, containerName, cmd string) string {
+	if err := ValidateResourceID(namespace); err != nil {
+		return "sh -c 'echo \"Discovery error: invalid namespace\" >&2; exit 1'"
+	}
+	if err := ValidateResourceID(podName); err != nil {
+		return "sh -c 'echo \"Discovery error: invalid pod name\" >&2; exit 1'"
+	}
+	if containerName != "" {
+		if err := ValidateResourceID(containerName); err != nil {
+			return "sh -c 'echo \"Discovery error: invalid container name\" >&2; exit 1'"
+		}
+		return fmt.Sprintf("kubectl exec -n %s %s -c %s -- sh -c %s", shellQuote(namespace), shellQuote(podName), shellQuote(containerName), shellQuote(cmd))
+	}
+	return fmt.Sprintf("kubectl exec -n %s %s -- sh -c %s", shellQuote(namespace), shellQuote(podName), shellQuote(cmd))
+}
+
+// GetCLIAccessTemplate returns a CLI access template for a resource type.
+// These are instructions for using pulse_control, NOT literal shell commands.
+// Commands via pulse_control run directly on the target where the agent is installed.
+func GetCLIAccessTemplate(resourceType ResourceType) string {
+	switch resourceType {
+	case ResourceTypeLXC:
+		// Agent runs ON the LXC - commands execute directly inside the container
+		return "Use pulse_control with target_host matching this LXC's hostname. Commands run directly inside the container."
+	case ResourceTypeVM:
+		// Agent runs ON the VM - commands execute directly inside the VM
+		return "Use pulse_control with target_host matching this VM's hostname. Commands run directly inside the VM."
+	case ResourceTypeDocker:
+		// Docker container on a host - need docker exec from the host
+		return "Use pulse_control targeting the Docker host with command: docker exec {container} <your-command>"
+	case ResourceTypeDockerLXC:
+		// Docker inside an LXC - agent on the LXC runs docker exec
+		return "Use pulse_control targeting the LXC hostname with command: docker exec {container} <your-command>"
+	case ResourceTypeDockerVM:
+		// Docker inside a VM - agent on the VM runs docker exec
+		return "Use pulse_control targeting the VM hostname with command: docker exec {container} <your-command>"
+	case ResourceTypeK8s:
+		return "Use kubectl exec -n {namespace} {pod} -- <your-command>"
+	case ResourceTypeHost:
+		return "Use pulse_control with target_host matching this host. Commands run directly."
+	default:
+		return "Use pulse_control with target_host matching the resource hostname."
+	}
+}
+
+// FormatCLIAccess formats a CLI access string with actual values.
+func FormatCLIAccess(resourceType ResourceType, vmid, containerName, namespace, podName string) string {
+	template := GetCLIAccessTemplate(resourceType)
+	result := template
+
+	result = strings.ReplaceAll(result, "{vmid}", vmid)
+	result = strings.ReplaceAll(result, "{container}", containerName)
+	result = strings.ReplaceAll(result, "{namespace}", namespace)
+	result = strings.ReplaceAll(result, "{pod}", podName)
+
+	return result
+}
diff --git a/internal/servicediscovery/commands_test.go b/internal/servicediscovery/commands_test.go
new file mode 100644
index 000000000..5fd9780e1
--- /dev/null
+++ b/internal/servicediscovery/commands_test.go
@@ -0,0 +1,81 @@
+package servicediscovery
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestCommandsAndTemplates(t *testing.T) {
+	resourceTypes := []ResourceType{
+		ResourceTypeLXC,
+		ResourceTypeVM,
+		ResourceTypeDocker,
+		ResourceTypeDockerVM,
+		ResourceTypeDockerLXC,
+		ResourceTypeK8s,
+		ResourceTypeHost,
+	}
+
+	for _, rt := range resourceTypes {
+		cmds := GetCommandsForResource(rt)
+		if len(cmds) == 0 {
+			t.Fatalf("expected commands for %s", rt)
+		}
+	}
+
+	if len(GetCommandsForResource(ResourceType("unknown"))) != 0 {
+		t.Fatalf("expected no commands for unknown resource type")
+	}
+
+	if !strings.Contains(BuildLXCCommand("101", "echo hi"), "pct exec 101") {
+		t.Fatalf("unexpected LXC command")
+	}
+	if !strings.Contains(BuildVMCommand("101", "echo hi"), "qm guest exec 101") {
+		t.Fatalf("unexpected VM command")
+	}
+	// Docker commands now quote container names for safety
+	dockerCmd := BuildDockerCommand("web", "echo hi")
+	if !strings.Contains(dockerCmd, "docker exec") || !strings.Contains(dockerCmd, "web") {
+		t.Fatalf("unexpected docker command: %s", dockerCmd)
+	}
+
+	nestedLXC := BuildNestedDockerCommand("201", true, "web", "echo hi")
+	if !strings.Contains(nestedLXC, "pct exec 201") || !strings.Contains(nestedLXC, "docker exec") || !strings.Contains(nestedLXC, "web") {
+		t.Fatalf("unexpected nested LXC command: %s", nestedLXC)
+	}
+
+	nestedVM := BuildNestedDockerCommand("301", false, "web", "echo hi")
+	if !strings.Contains(nestedVM, "qm guest exec 301") || !strings.Contains(nestedVM, "docker exec") || !strings.Contains(nestedVM, "web") {
+		t.Fatalf("unexpected nested VM command: %s", nestedVM)
+	}
+
+	// K8s commands now quote arguments for safety
+	withContainer := BuildK8sCommand("default", "pod", "app", "echo hi")
+	if !strings.Contains(withContainer, "-c") || !strings.Contains(withContainer, "app") || !strings.Contains(withContainer, "kubectl exec") {
+		t.Fatalf("unexpected k8s command: %s", withContainer)
+	}
+
+	withoutContainer := BuildK8sCommand("default", "pod", "", "echo hi")
+	if strings.Contains(withoutContainer, "-c") && strings.Contains(withoutContainer, "app") {
+		t.Fatalf("unexpected container selector: %s", withoutContainer)
+	}
+
+	template := GetCLIAccessTemplate(ResourceTypeK8s)
+	if !strings.Contains(template, "{namespace}") || !strings.Contains(template, "{pod}") {
+		t.Fatalf("unexpected template: %s", template)
+	}
+
+	for _, rt := range resourceTypes {
+		if tmpl := GetCLIAccessTemplate(rt); tmpl == "" {
+			t.Fatalf("expected template for %s", rt)
+		}
+	}
+	if tmpl := GetCLIAccessTemplate(ResourceType("unknown")); !strings.Contains(tmpl, "pulse_control") {
+		t.Fatalf("expected default template to mention pulse_control, got: %s", tmpl)
+	}
+
+	formatted := FormatCLIAccess(ResourceTypeK8s, "101", "container", "default", "pod")
+	if !strings.Contains(formatted, "default") || !strings.Contains(formatted, "pod") {
+		t.Fatalf("unexpected formatted access: %s", formatted)
+	}
+}
diff --git a/internal/servicediscovery/deep_scanner.go b/internal/servicediscovery/deep_scanner.go
new file mode 100644
index 000000000..9b745c7c6
--- /dev/null
+++ b/internal/servicediscovery/deep_scanner.go
@@ -0,0 +1,475 @@
+package servicediscovery
+
+import (
+	"context"
+	"fmt"
+	"sync"
+	"time"
+
+	"github.com/google/uuid"
+	"github.com/rs/zerolog/log"
+)
+
+// CommandExecutor executes commands on infrastructure.
+type CommandExecutor interface {
+	ExecuteCommand(ctx context.Context, agentID string, cmd ExecuteCommandPayload) (*CommandResultPayload, error)
+	GetConnectedAgents() []ConnectedAgent
+	IsAgentConnected(agentID string) bool
+}
+
+// ExecuteCommandPayload mirrors agentexec.ExecuteCommandPayload
+type ExecuteCommandPayload struct {
+	RequestID  string `json:"request_id"`
+	Command    string `json:"command"`
+	TargetType string `json:"target_type"`         // "host", "container", "vm"
+	TargetID   string `json:"target_id,omitempty"` // VMID for container/VM
+	Timeout    int    `json:"timeout,omitempty"`
+}
+
+// CommandResultPayload mirrors agentexec.CommandResultPayload
+type CommandResultPayload struct {
+	RequestID string `json:"request_id"`
+	Success   bool   `json:"success"`
+	Stdout    string `json:"stdout,omitempty"`
+	Stderr    string `json:"stderr,omitempty"`
+	ExitCode  int    `json:"exit_code"`
+	Error     string `json:"error,omitempty"`
+	Duration  int64  `json:"duration_ms"`
+}
+
+// ConnectedAgent mirrors agentexec.ConnectedAgent
+type ConnectedAgent struct {
+	AgentID     string
+	Hostname    string
+	Version     string
+	Platform    string
+	Tags        []string
+	ConnectedAt time.Time
+}
+
+// ProgressCallback is called when discovery progress changes.
+type ProgressCallback func(*DiscoveryProgress)
+
+// DeepScanner runs discovery commands on resources.
+type DeepScanner struct {
+	executor         CommandExecutor
+	mu               sync.RWMutex
+	progress         map[string]*DiscoveryProgress // resourceID -> progress
+	maxParallel      int
+	timeout          time.Duration
+	progressCallback ProgressCallback
+}
+
+// NewDeepScanner creates a new deep scanner.
+func NewDeepScanner(executor CommandExecutor) *DeepScanner {
+	return &DeepScanner{
+		executor:    executor,
+		progress:    make(map[string]*DiscoveryProgress),
+		maxParallel: 3, // Run up to 3 commands in parallel per resource
+		timeout:     30 * time.Second,
+	}
+}
+
+// SetProgressCallback sets a callback function that will be called when discovery progress changes.
+func (s *DeepScanner) SetProgressCallback(callback ProgressCallback) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	s.progressCallback = callback
+}
+
+// notifyProgress calls the progress callback if set.
+func (s *DeepScanner) notifyProgress(progress *DiscoveryProgress) {
+	s.mu.RLock()
+	callback := s.progressCallback
+	s.mu.RUnlock()
+
+	if callback != nil && progress != nil {
+		// Calculate elapsed time and percent complete
+		progressCopy := *progress
+		if !progress.StartedAt.IsZero() {
+			progressCopy.ElapsedMs = time.Since(progress.StartedAt).Milliseconds()
+		}
+		if progress.TotalSteps > 0 {
+			progressCopy.PercentComplete = float64(progress.CompletedSteps) / float64(progress.TotalSteps) * 100
+		}
+		callback(&progressCopy)
+	}
+}
+
+// ScanResult contains the results of a deep scan.
+type ScanResult struct {
+	ResourceType   ResourceType
+	ResourceID     string
+	HostID         string
+	Hostname       string
+	CommandOutputs map[string]string
+	Errors         map[string]string
+	StartedAt      time.Time
+	CompletedAt    time.Time
+}
+
+// Scan runs discovery commands on a resource and returns the outputs.
+func (s *DeepScanner) Scan(ctx context.Context, req DiscoveryRequest) (*ScanResult, error) {
+	resourceID := MakeResourceID(req.ResourceType, req.HostID, req.ResourceID)
+	startTime := time.Now()
+
+	// Initialize progress
+	s.mu.Lock()
+	s.progress[resourceID] = &DiscoveryProgress{
+		ResourceID:  resourceID,
+		Status:      DiscoveryStatusRunning,
+		CurrentStep: "initializing",
+		StartedAt:   startTime,
+	}
+	initialProgress := *s.progress[resourceID]
+	s.mu.Unlock()
+
+	// Broadcast scan start
+	s.notifyProgress(&initialProgress)
+
+	defer func() {
+		s.mu.Lock()
+		delete(s.progress, resourceID)
+		s.mu.Unlock()
+	}()
+
+	result := &ScanResult{
+		ResourceType:   req.ResourceType,
+		ResourceID:     req.ResourceID,
+		HostID:         req.HostID,
+		Hostname:       req.Hostname,
+		CommandOutputs: make(map[string]string),
+		Errors:         make(map[string]string),
+		StartedAt:      time.Now(),
+	}
+
+	// Check if we have an agent for this host
+	if s.executor == nil {
+		return nil, fmt.Errorf("no command executor available")
+	}
+
+	// Find the agent for this host
+	agentID := s.findAgentForHost(req.HostID, req.Hostname)
+	if agentID == "" {
+		return nil, fmt.Errorf("no connected agent for host %s (%s)", req.HostID, req.Hostname)
+	}
+
+	// Get commands for this resource type
+	commands := GetCommandsForResource(req.ResourceType)
+	if len(commands) == 0 {
+		return nil, fmt.Errorf("no commands defined for resource type %s", req.ResourceType)
+	}
+
+	// Update progress
+	s.mu.Lock()
+	if prog, ok := s.progress[resourceID]; ok {
+		prog.TotalSteps = len(commands)
+		prog.CurrentStep = "running commands"
+		progressCopy := *prog
+		s.mu.Unlock()
+		s.notifyProgress(&progressCopy)
+	} else {
+		s.mu.Unlock()
+	}
+
+	// Run commands with limited parallelism
+	semaphore := make(chan struct{}, s.maxParallel)
+	var wg sync.WaitGroup
+	var mu sync.Mutex
+
+	for _, cmd := range commands {
+		wg.Add(1)
+		go func(cmd DiscoveryCommand) {
+			defer wg.Done()
+
+			select {
+			case semaphore <- struct{}{}:
+				defer func() { <-semaphore }()
+			case <-ctx.Done():
+				return
+			}
+
+			// Build the actual command to run
+			actualCmd := s.buildCommand(req.ResourceType, req.ResourceID, cmd.Command)
+
+			// Get the target ID for the agent
+			targetID := s.getTargetID(req.ResourceType, req.ResourceID)
+
+			// Only validate TargetID when it will be interpolated into shell commands
+			// by the agent (container/vm types). Host/docker types don't use TargetID
+			// in command wrapping, so they can have any format (including colons for IPv6).
+			targetType := s.getTargetType(req.ResourceType)
+			if targetType == "container" || targetType == "vm" {
+				if err := ValidateResourceID(targetID); err != nil {
+					mu.Lock()
+					result.Errors[cmd.Name] = fmt.Sprintf("invalid target ID: %v", err)
+					mu.Unlock()
+					return
+				}
+			}
+
+			// Execute the command
+			cmdCtx, cancel := context.WithTimeout(ctx, s.timeout)
+			defer cancel()
+
+			cmdResult, err := s.executor.ExecuteCommand(cmdCtx, agentID, ExecuteCommandPayload{
+				RequestID:  uuid.New().String(),
+				Command:    actualCmd,
+				TargetType: s.getTargetType(req.ResourceType),
+				TargetID:   targetID,
+				Timeout:    cmd.Timeout,
+			})
+
+			mu.Lock()
+			defer mu.Unlock()
+
+			if err != nil {
+				if !cmd.Optional {
+					result.Errors[cmd.Name] = err.Error()
+				}
+				log.Debug().
+					Err(err).
+					Str("command", cmd.Name).
+					Str("resource", resourceID).
+					Msg("Command failed during discovery")
+				return
+			}
+
+			if cmdResult != nil {
+				output := cmdResult.Stdout
+				if cmdResult.Stderr != "" && output != "" {
+					output += "\n--- stderr ---\n" + cmdResult.Stderr
+				} else if cmdResult.Stderr != "" {
+					output = cmdResult.Stderr
+				}
+
+				if output != "" {
+					result.CommandOutputs[cmd.Name] = output
+				}
+
+				if !cmdResult.Success && cmdResult.Error != "" && !cmd.Optional {
+					result.Errors[cmd.Name] = cmdResult.Error
+				}
+			}
+
+			// Update progress and broadcast
+			s.mu.Lock()
+			if prog, ok := s.progress[resourceID]; ok {
+				prog.CompletedSteps++
+				prog.CurrentCommand = cmd.Name
+				progressCopy := *prog
+				s.mu.Unlock()
+				s.notifyProgress(&progressCopy)
+			} else {
+				s.mu.Unlock()
+			}
+		}(cmd)
+	}
+
+	wg.Wait()
+	result.CompletedAt = time.Now()
+
+	// Broadcast scan completion
+	completionProgress := DiscoveryProgress{
+		ResourceID:      resourceID,
+		Status:          DiscoveryStatusCompleted,
+		CurrentStep:     "completed",
+		TotalSteps:      len(commands),
+		CompletedSteps:  len(commands),
+		StartedAt:       startTime,
+		ElapsedMs:       result.CompletedAt.Sub(startTime).Milliseconds(),
+		PercentComplete: 100,
+	}
+	s.notifyProgress(&completionProgress)
+
+	log.Info().
+		Str("resource", resourceID).
+		Int("outputs", len(result.CommandOutputs)).
+		Int("errors", len(result.Errors)).
+		Dur("duration", result.CompletedAt.Sub(result.StartedAt)).
+		Msg("Deep scan completed")
+
+	return result, nil
+}
+
+// buildCommand wraps the command appropriately for the resource type.
+// NOTE: For LXC/VM, the agent handles wrapping via pct exec / qm guest exec
+// based on TargetType, so we don't wrap here. We only wrap for Docker containers
+// since Docker isn't a recognized TargetType in the agent.
+func (s *DeepScanner) buildCommand(resourceType ResourceType, resourceID string, cmd string) string {
+	switch resourceType {
+	case ResourceTypeLXC:
+		// Agent wraps with pct exec based on TargetType="container"
+		return cmd
+	case ResourceTypeVM:
+		// Agent wraps with qm guest exec based on TargetType="vm"
+		return cmd
+	case ResourceTypeDocker:
+		// Docker needs wrapping here since agent doesn't handle it
+		return BuildDockerCommand(resourceID, cmd)
+	case ResourceTypeHost:
+		// Commands run directly on host
+		return cmd
+	case ResourceTypeDockerLXC:
+		// Docker inside LXC - agent wraps with pct exec, we just add docker exec
+		// resourceID format: "vmid:container_name"
+		parts := splitResourceID(resourceID)
+		if len(parts) >= 2 {
+			return BuildDockerCommand(parts[1], cmd)
+		}
+		return cmd
+	case ResourceTypeDockerVM:
+		// Docker inside VM - agent wraps with qm guest exec, we just add docker exec
+		parts := splitResourceID(resourceID)
+		if len(parts) >= 2 {
+			return BuildDockerCommand(parts[1], cmd)
+		}
+		return cmd
+	default:
+		return cmd
+	}
+}
+
+// getTargetType returns the target type for the agent execution payload.
+func (s *DeepScanner) getTargetType(resourceType ResourceType) string {
+	switch resourceType {
+	case ResourceTypeLXC:
+		return "container"
+	case ResourceTypeVM:
+		return "vm"
+	case ResourceTypeDocker:
+		return "host" // Docker commands run on host via docker exec
+	case ResourceTypeDockerLXC:
+		return "container" // Docker inside LXC: agent wraps with pct exec
+	case ResourceTypeDockerVM:
+		return "vm" // Docker inside VM: agent wraps with qm guest exec
+	case ResourceTypeHost:
+		return "host"
+	default:
+		return "host"
+	}
+}
+
+// getTargetID returns the target ID for the agent execution payload.
+// For nested Docker (docker_lxc/docker_vm), this extracts just the vmid.
+func (s *DeepScanner) getTargetID(resourceType ResourceType, resourceID string) string {
+	switch resourceType {
+	case ResourceTypeDockerLXC, ResourceTypeDockerVM:
+		// resourceID format: "vmid:container_name" - extract just vmid
+		parts := splitResourceID(resourceID)
+		if len(parts) >= 1 {
+			return parts[0]
+		}
+		return resourceID
+	default:
+		return resourceID
+	}
+}
+
+// findAgentForHost finds the agent ID for a given host.
+func (s *DeepScanner) findAgentForHost(hostID, hostname string) string {
+	agents := s.executor.GetConnectedAgents()
+
+	// First try exact match on agent ID
+	for _, agent := range agents {
+		if agent.AgentID == hostID {
+			return agent.AgentID
+		}
+	}
+
+	// Then try hostname match
+	for _, agent := range agents {
+		if agent.Hostname == hostname || agent.Hostname == hostID {
+			return agent.AgentID
+		}
+	}
+
+	// If only one agent connected, use it
+	if len(agents) == 1 {
+		return agents[0].AgentID
+	}
+
+	return ""
+}
+
+// GetProgress returns a copy of the current progress of a scan.
+// Returns nil if no scan is in progress for the resource.
+// A copy is returned to avoid data races with the scan goroutine.
+func (s *DeepScanner) GetProgress(resourceID string) *DiscoveryProgress {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	if prog, ok := s.progress[resourceID]; ok {
+		// Return a copy to avoid race with scan goroutine
+		copy := *prog
+		return &copy
+	}
+	return nil
+}
+
+// IsScanning returns whether a resource is currently being scanned.
+func (s *DeepScanner) IsScanning(resourceID string) bool {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	_, ok := s.progress[resourceID]
+	return ok
+}
+
+// splitResourceID splits a compound resource ID (e.g., "101:container_name").
+func splitResourceID(id string) []string {
+	var parts []string
+	start := 0
+	for i, c := range id {
+		if c == ':' {
+			parts = append(parts, id[start:i])
+			start = i + 1
+		}
+	}
+	if start < len(id) {
+		parts = append(parts, id[start:])
+	}
+	return parts
+}
+
+// ScanDocker runs discovery on Docker containers via the host.
+func (s *DeepScanner) ScanDocker(ctx context.Context, hostID, hostname, containerName string) (*ScanResult, error) {
+	req := DiscoveryRequest{
+		ResourceType: ResourceTypeDocker,
+		ResourceID:   containerName,
+		HostID:       hostID,
+		Hostname:     hostname,
+	}
+	return s.Scan(ctx, req)
+}
+
+// ScanLXC runs discovery on an LXC container.
+func (s *DeepScanner) ScanLXC(ctx context.Context, hostID, hostname, vmid string) (*ScanResult, error) {
+	req := DiscoveryRequest{
+		ResourceType: ResourceTypeLXC,
+		ResourceID:   vmid,
+		HostID:       hostID,
+		Hostname:     hostname,
+	}
+	return s.Scan(ctx, req)
+}
+
+// ScanVM runs discovery on a VM via QEMU guest agent.
+func (s *DeepScanner) ScanVM(ctx context.Context, hostID, hostname, vmid string) (*ScanResult, error) {
+	req := DiscoveryRequest{
+		ResourceType: ResourceTypeVM,
+		ResourceID:   vmid,
+		HostID:       hostID,
+		Hostname:     hostname,
+	}
+	return s.Scan(ctx, req)
+}
+
+// ScanHost runs discovery on a host system.
+func (s *DeepScanner) ScanHost(ctx context.Context, hostID, hostname string) (*ScanResult, error) {
+	req := DiscoveryRequest{
+		ResourceType: ResourceTypeHost,
+		ResourceID:   hostID,
+		HostID:       hostID,
+		Hostname:     hostname,
+	}
+	return s.Scan(ctx, req)
+}
diff --git a/internal/servicediscovery/deep_scanner_test.go b/internal/servicediscovery/deep_scanner_test.go
new file mode 100644
index 000000000..f21005bc6
--- /dev/null
+++ b/internal/servicediscovery/deep_scanner_test.go
@@ -0,0 +1,395 @@
+package servicediscovery
+
+import (
+	"context"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+)
+
+type stubExecutor struct {
+	mu       sync.Mutex
+	commands []string
+	payloads []ExecuteCommandPayload // Track full payloads for testing
+	agents   []ConnectedAgent
+}
+
+func (s *stubExecutor) ExecuteCommand(ctx context.Context, agentID string, cmd ExecuteCommandPayload) (*CommandResultPayload, error) {
+	s.mu.Lock()
+	s.commands = append(s.commands, cmd.Command)
+	s.payloads = append(s.payloads, cmd)
+	s.mu.Unlock()
+
+	if err := ctx.Err(); err != nil {
+		return nil, err
+	}
+
+	if strings.Contains(cmd.Command, "docker ps -a") {
+		return &CommandResultPayload{
+			RequestID: cmd.RequestID,
+			Success:   false,
+			Error:     "boom",
+		}, nil
+	}
+
+	return &CommandResultPayload{
+		RequestID: cmd.RequestID,
+		Success:   true,
+		Stdout:    cmd.Command,
+		Duration:  5,
+	}, nil
+}
+
+func (s *stubExecutor) GetConnectedAgents() []ConnectedAgent {
+	return s.agents
+}
+
+func (s *stubExecutor) IsAgentConnected(agentID string) bool {
+	for _, agent := range s.agents {
+		if agent.AgentID == agentID {
+			return true
+		}
+	}
+	return false
+}
+
+type outputExecutor struct{}
+
+func (outputExecutor) ExecuteCommand(ctx context.Context, agentID string, cmd ExecuteCommandPayload) (*CommandResultPayload, error) {
+	switch {
+	case strings.Contains(cmd.Command, "docker ps -a"):
+		return &CommandResultPayload{Success: true, Stdout: "out", Stderr: "err"}, nil
+	case strings.Contains(cmd.Command, "docker images"):
+		return &CommandResultPayload{Success: true, Stderr: "err-only"}, nil
+	default:
+		return &CommandResultPayload{Success: true}, nil
+	}
+}
+
+func (outputExecutor) GetConnectedAgents() []ConnectedAgent {
+	return []ConnectedAgent{{AgentID: "host1", Hostname: "host1"}}
+}
+
+func (outputExecutor) IsAgentConnected(string) bool { return true }
+
+type errorExecutor struct{}
+
+func (errorExecutor) ExecuteCommand(ctx context.Context, agentID string, cmd ExecuteCommandPayload) (*CommandResultPayload, error) {
+	return nil, context.DeadlineExceeded
+}
+
+func (errorExecutor) GetConnectedAgents() []ConnectedAgent {
+	return []ConnectedAgent{{AgentID: "host1", Hostname: "host1"}}
+}
+
+func (errorExecutor) IsAgentConnected(string) bool { return true }
+
+func TestDeepScanner_Scan_NestedDockerCommands(t *testing.T) {
+	exec := &stubExecutor{
+		agents: []ConnectedAgent{
+			{AgentID: "host1", Hostname: "host1", ConnectedAt: time.Now()},
+		},
+	}
+	scanner := NewDeepScanner(exec)
+
+	result, err := scanner.Scan(context.Background(), DiscoveryRequest{
+		ResourceType: ResourceTypeDockerVM,
+		ResourceID:   "101:web",
+		HostID:       "host1",
+		Hostname:     "host1",
+	})
+	if err != nil {
+		t.Fatalf("Scan error: %v", err)
+	}
+	if len(result.CommandOutputs) == 0 {
+		t.Fatalf("expected command outputs")
+	}
+	if _, ok := result.Errors["docker_containers"]; !ok {
+		t.Fatalf("expected docker_containers error, got %#v", result.Errors)
+	}
+
+	exec.mu.Lock()
+	defer exec.mu.Unlock()
+
+	// Verify the payload fields are set correctly for nested Docker:
+	// - Command should contain "docker exec" (buildCommand adds this)
+	// - TargetType should be "vm" (agent wraps with qm guest exec)
+	// - TargetID should be "101" (extracted from "101:web")
+	foundCorrectPayload := false
+	for _, payload := range exec.payloads {
+		hasDockerExec := strings.Contains(payload.Command, "docker exec")
+		hasContainerName := strings.Contains(payload.Command, "web")
+		correctTargetType := payload.TargetType == "vm"
+		correctTargetID := payload.TargetID == "101"
+
+		if hasDockerExec && hasContainerName && correctTargetType && correctTargetID {
+			foundCorrectPayload = true
+			break
+		}
+	}
+	if !foundCorrectPayload {
+		t.Fatalf("expected nested docker payload with docker exec, TargetType=vm, TargetID=101, got payloads: %+v", exec.payloads)
+	}
+}
+
+func TestDeepScanner_FindAgentAndTargetType(t *testing.T) {
+	exec := &stubExecutor{
+		agents: []ConnectedAgent{
+			{AgentID: "a1", Hostname: "node1"},
+			{AgentID: "a2", Hostname: "node2"},
+		},
+	}
+	scanner := NewDeepScanner(exec)
+
+	if got := scanner.findAgentForHost("a2", ""); got != "a2" {
+		t.Fatalf("expected direct agent match, got %s", got)
+	}
+	if got := scanner.findAgentForHost("node1", "node1"); got != "a1" {
+		t.Fatalf("expected hostname match, got %s", got)
+	}
+
+	exec.agents = []ConnectedAgent{{AgentID: "solo", Hostname: "only"}}
+	if got := scanner.findAgentForHost("missing", "missing"); got != "solo" {
+		t.Fatalf("expected single agent fallback, got %s", got)
+	}
+	exec.agents = nil
+	if got := scanner.findAgentForHost("missing", "missing"); got != "" {
+		t.Fatalf("expected no agent, got %s", got)
+	}
+
+	if scanner.getTargetType(ResourceTypeLXC) != "container" {
+		t.Fatalf("unexpected target type for lxc")
+	}
+	if scanner.getTargetType(ResourceTypeVM) != "vm" {
+		t.Fatalf("unexpected target type for vm")
+	}
+	if scanner.getTargetType(ResourceTypeDocker) != "host" {
+		t.Fatalf("unexpected target type for docker")
+	}
+	if scanner.getTargetType(ResourceTypeHost) != "host" {
+		t.Fatalf("unexpected target type for host")
+	}
+}
+
+func TestSplitResourceID(t *testing.T) {
+	parts := splitResourceID("101:web:extra")
+	if len(parts) != 3 || parts[0] != "101" || parts[1] != "web" || parts[2] != "extra" {
+		t.Fatalf("unexpected parts: %#v", parts)
+	}
+}
+
+func TestDeepScanner_GetTargetTypeAndID(t *testing.T) {
+	scanner := NewDeepScanner(&stubExecutor{})
+
+	// Test getTargetType
+	tests := []struct {
+		resourceType ResourceType
+		wantType     string
+	}{
+		{ResourceTypeLXC, "container"},
+		{ResourceTypeVM, "vm"},
+		{ResourceTypeDocker, "host"},
+		{ResourceTypeDockerLXC, "container"}, // Docker inside LXC runs via pct exec
+		{ResourceTypeDockerVM, "vm"},         // Docker inside VM runs via qm guest exec
+		{ResourceTypeHost, "host"},
+		{ResourceType("unknown"), "host"},
+	}
+	for _, tt := range tests {
+		if got := scanner.getTargetType(tt.resourceType); got != tt.wantType {
+			t.Errorf("getTargetType(%s) = %s, want %s", tt.resourceType, got, tt.wantType)
+		}
+	}
+
+	// Test getTargetID
+	idTests := []struct {
+		resourceType ResourceType
+		resourceID   string
+		wantID       string
+	}{
+		{ResourceTypeLXC, "101", "101"},
+		{ResourceTypeVM, "102", "102"},
+		{ResourceTypeDocker, "web", "web"},
+		{ResourceTypeDockerLXC, "201:nginx", "201"},   // Extract vmid for nested docker
+		{ResourceTypeDockerVM, "301:postgres", "301"}, // Extract vmid for nested docker
+		{ResourceTypeHost, "myhost", "myhost"},
+	}
+	for _, tt := range idTests {
+		if got := scanner.getTargetID(tt.resourceType, tt.resourceID); got != tt.wantID {
+			t.Errorf("getTargetID(%s, %s) = %s, want %s", tt.resourceType, tt.resourceID, got, tt.wantID)
+		}
+	}
+}
+
+func TestDeepScanner_BuildCommandAndProgress(t *testing.T) {
+	scanner := NewDeepScanner(&stubExecutor{})
+
+	// LXC: buildCommand returns raw command, agent handles pct exec wrapping
+	if cmd := scanner.buildCommand(ResourceTypeLXC, "101", "echo hi"); cmd != "echo hi" {
+		t.Fatalf("LXC should return raw command (agent wraps), got: %s", cmd)
+	}
+	// VM: buildCommand returns raw command, agent handles qm guest exec wrapping
+	if cmd := scanner.buildCommand(ResourceTypeVM, "101", "echo hi"); cmd != "echo hi" {
+		t.Fatalf("VM should return raw command (agent wraps), got: %s", cmd)
+	}
+	// Docker: buildCommand wraps with docker exec since agent doesn't handle it
+	if cmd := scanner.buildCommand(ResourceTypeDocker, "web", "echo hi"); !strings.Contains(cmd, "docker exec") {
+		t.Fatalf("Docker should include docker exec, got: %s", cmd)
+	}
+	// Host: buildCommand returns raw command
+	if cmd := scanner.buildCommand(ResourceTypeHost, "host", "echo hi"); cmd != "echo hi" {
+		t.Fatalf("Host should return raw command, got: %s", cmd)
+	}
+
+	// DockerLXC: buildCommand adds docker exec, agent adds pct exec
+	// So we should only see docker exec in the command (agent adds pct exec at runtime)
+	dockerLXC := scanner.buildCommand(ResourceTypeDockerLXC, "201:web", "echo hi")
+	if !strings.Contains(dockerLXC, "docker exec") {
+		t.Fatalf("DockerLXC should include docker exec, got: %s", dockerLXC)
+	}
+	if strings.Contains(dockerLXC, "pct exec") {
+		t.Fatalf("DockerLXC should NOT include pct exec (agent adds it), got: %s", dockerLXC)
+	}
+	if cmd := scanner.buildCommand(ResourceTypeDockerLXC, "bad", "echo hi"); cmd != "echo hi" {
+		t.Fatalf("DockerLXC with bad ID should fallback, got: %s", cmd)
+	}
+
+	// DockerVM: buildCommand adds docker exec, agent adds qm guest exec
+	dockerVM := scanner.buildCommand(ResourceTypeDockerVM, "301:web", "echo hi")
+	if !strings.Contains(dockerVM, "docker exec") {
+		t.Fatalf("DockerVM should include docker exec, got: %s", dockerVM)
+	}
+	if strings.Contains(dockerVM, "qm guest exec") {
+		t.Fatalf("DockerVM should NOT include qm guest exec (agent adds it), got: %s", dockerVM)
+	}
+	if cmd := scanner.buildCommand(ResourceTypeDockerVM, "bad", "echo hi"); cmd != "echo hi" {
+		t.Fatalf("DockerVM with bad ID should fallback, got: %s", cmd)
+	}
+
+	// Unknown type: returns raw command
+	if cmd := scanner.buildCommand(ResourceType("unknown"), "id", "echo hi"); cmd != "echo hi" {
+		t.Fatalf("Unknown type should return raw command, got: %s", cmd)
+	}
+
+	scanner.progress["id"] = &DiscoveryProgress{ResourceID: "id"}
+	if scanner.GetProgress("id") == nil {
+		t.Fatalf("expected progress")
+	}
+	if !scanner.IsScanning("id") {
+		t.Fatalf("expected IsScanning true")
+	}
+	if scanner.GetProgress("missing") != nil {
+		t.Fatalf("expected nil progress")
+	}
+	if scanner.IsScanning("missing") {
+		t.Fatalf("expected IsScanning false")
+	}
+
+	noExec := NewDeepScanner(nil)
+	if _, err := noExec.ScanHost(context.Background(), "host1", "host1"); err == nil {
+		t.Fatalf("expected error without executor")
+	}
+}
+
+func TestDeepScanner_ScanWrappers(t *testing.T) {
+	exec := &stubExecutor{
+		agents: []ConnectedAgent{{AgentID: "host1", Hostname: "host1"}},
+	}
+	scanner := NewDeepScanner(exec)
+	scanner.maxParallel = 1
+
+	if _, err := scanner.ScanDocker(context.Background(), "host1", "host1", "web"); err != nil {
+		t.Fatalf("ScanDocker error: %v", err)
+	}
+	if _, err := scanner.ScanLXC(context.Background(), "host1", "host1", "101"); err != nil {
+		t.Fatalf("ScanLXC error: %v", err)
+	}
+	if _, err := scanner.ScanVM(context.Background(), "host1", "host1", "102"); err != nil {
+		t.Fatalf("ScanVM error: %v", err)
+	}
+}
+
+func TestDeepScanner_ScanErrors(t *testing.T) {
+	exec := &stubExecutor{
+		agents: []ConnectedAgent{{AgentID: "host1", Hostname: "host1"}},
+	}
+	scanner := NewDeepScanner(exec)
+	if _, err := scanner.Scan(context.Background(), DiscoveryRequest{
+		ResourceType: ResourceType("unknown"),
+		ResourceID:   "id",
+		HostID:       "host1",
+		Hostname:     "host1",
+	}); err == nil {
+		t.Fatalf("expected error for unknown resource type")
+	}
+
+	exec.agents = nil
+	if _, err := scanner.Scan(context.Background(), DiscoveryRequest{
+		ResourceType: ResourceTypeDocker,
+		ResourceID:   "web",
+		HostID:       "host1",
+		Hostname:     "host1",
+	}); err == nil {
+		t.Fatalf("expected error for missing agent")
+	}
+}
+
+func TestDeepScanner_OutputHandling(t *testing.T) {
+	exec := outputExecutor{}
+	scanner := NewDeepScanner(exec)
+	scanner.maxParallel = 1
+
+	result, err := scanner.Scan(context.Background(), DiscoveryRequest{
+		ResourceType: ResourceTypeDockerVM,
+		ResourceID:   "101:web",
+		HostID:       "host1",
+		Hostname:     "host1",
+	})
+	if err != nil {
+		t.Fatalf("Scan error: %v", err)
+	}
+	if out := result.CommandOutputs["docker_containers"]; !strings.Contains(out, "--- stderr ---") {
+		t.Fatalf("expected combined stderr output, got %s", out)
+	}
+	if out := result.CommandOutputs["docker_images"]; out != "err-only" {
+		t.Fatalf("expected stderr-only output, got %s", out)
+	}
+}
+
+func TestDeepScanner_CommandErrorHandling(t *testing.T) {
+	scanner := NewDeepScanner(errorExecutor{})
+	scanner.maxParallel = 1
+
+	result, err := scanner.Scan(context.Background(), DiscoveryRequest{
+		ResourceType: ResourceTypeDockerVM,
+		ResourceID:   "101:web",
+		HostID:       "host1",
+		Hostname:     "host1",
+	})
+	if err != nil {
+		t.Fatalf("Scan error: %v", err)
+	}
+	if _, ok := result.Errors["docker_containers"]; !ok {
+		t.Fatalf("expected error for non-optional command")
+	}
+}
+
+func TestDeepScanner_ScanCanceledContext(t *testing.T) {
+	exec := &stubExecutor{
+		agents: []ConnectedAgent{{AgentID: "host1", Hostname: "host1"}},
+	}
+	scanner := NewDeepScanner(exec)
+	scanner.maxParallel = 0
+
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+
+	if _, err := scanner.Scan(ctx, DiscoveryRequest{
+		ResourceType: ResourceTypeDockerVM,
+		ResourceID:   "101:web",
+		HostID:       "host1",
+		Hostname:     "host1",
+	}); err != nil {
+		t.Fatalf("Scan error: %v", err)
+	}
+}
diff --git a/internal/servicediscovery/fingerprint.go b/internal/servicediscovery/fingerprint.go
new file mode 100644
index 000000000..c4aa721d7
--- /dev/null
+++ b/internal/servicediscovery/fingerprint.go
@@ -0,0 +1,249 @@
+package servicediscovery
+
+import (
+	"crypto/sha256"
+	"encoding/hex"
+	"fmt"
+	"sort"
+	"strconv"
+	"strings"
+	"time"
+)
+
+// GenerateDockerFingerprint creates a fingerprint from Docker container metadata.
+// The fingerprint captures key metadata that indicates when a container has changed
+// in ways that would affect discovery results (image, ports, mounts, env keys).
+func GenerateDockerFingerprint(hostID string, container *DockerContainer) *ContainerFingerprint {
+	fp := &ContainerFingerprint{
+		ResourceID:    container.Name,
+		HostID:        hostID,
+		SchemaVersion: FingerprintSchemaVersion,
+		GeneratedAt:   time.Now(),
+		ImageName:     container.Image,
+	}
+
+	// Extract port mappings (private port + protocol)
+	for _, p := range container.Ports {
+		fp.Ports = append(fp.Ports, fmt.Sprintf("%d/%s", p.PrivatePort, p.Protocol))
+	}
+	sort.Strings(fp.Ports)
+
+	// Extract mount paths (container destination paths, not host paths)
+	for _, m := range container.Mounts {
+		fp.MountPaths = append(fp.MountPaths, m.Destination)
+	}
+	sort.Strings(fp.MountPaths)
+
+	// Extract environment variable keys from labels (if present)
+	// Note: We don't have direct access to env vars in DockerContainer,
+	// but labels often contain relevant configuration hints
+	for key := range container.Labels {
+		fp.EnvKeys = append(fp.EnvKeys, key)
+	}
+	sort.Strings(fp.EnvKeys)
+
+	// Generate the hash
+	fp.Hash = fp.computeHash()
+	return fp
+}
+
+// computeHash generates a truncated SHA256 hash of the fingerprint components.
+// Includes schema version so algorithm changes produce different hashes.
+func (fp *ContainerFingerprint) computeHash() string {
+	h := sha256.New()
+	// Include schema version first so algorithm changes are detected
+	h.Write([]byte(strconv.Itoa(fp.SchemaVersion)))
+	h.Write([]byte(fp.ImageID))
+	h.Write([]byte(fp.ImageName))
+	h.Write([]byte(fp.CreatedAt))
+	h.Write([]byte(strings.Join(fp.Ports, ",")))
+	h.Write([]byte(strings.Join(fp.MountPaths, ",")))
+	h.Write([]byte(strings.Join(fp.EnvKeys, ",")))
+	return hex.EncodeToString(h.Sum(nil))[:16] // Short hash is sufficient
+}
+
+// HasChanged compares two fingerprints and returns true if they differ.
+// Also returns true if the schema version changed (algorithm updated).
+func (fp *ContainerFingerprint) HasChanged(other *ContainerFingerprint) bool {
+	if other == nil {
+		return true
+	}
+	return fp.Hash != other.Hash
+}
+
+// HasSchemaChanged returns true if the fingerprint was generated with a different schema.
+func (fp *ContainerFingerprint) HasSchemaChanged(other *ContainerFingerprint) bool {
+	if other == nil {
+		return false
+	}
+	return fp.SchemaVersion != other.SchemaVersion
+}
+
+// String returns a human-readable representation of the fingerprint.
+func (fp *ContainerFingerprint) String() string {
+	return fmt.Sprintf("Fingerprint{id=%s, host=%s, hash=%s, image=%s, ports=%v}",
+		fp.ResourceID, fp.HostID, fp.Hash, fp.ImageName, fp.Ports)
+}
+
+// GenerateLXCFingerprint creates a fingerprint from LXC container metadata.
+// Tracks: VMID, name, OS template, resource allocation, and tags.
+func GenerateLXCFingerprint(nodeID string, container *Container) *ContainerFingerprint {
+	fp := &ContainerFingerprint{
+		ResourceID:    strconv.Itoa(container.VMID),
+		HostID:        nodeID,
+		SchemaVersion: FingerprintSchemaVersion,
+		GeneratedAt:   time.Now(),
+		ImageName:     container.OSTemplate, // OS template is like the "image" for LXCs
+	}
+
+	// Build components for hashing
+	var components []string
+
+	// Core identity
+	components = append(components, strconv.Itoa(container.VMID))
+	components = append(components, container.Name)
+	components = append(components, container.OSTemplate)
+	components = append(components, container.OSName)
+
+	// Resource allocation (changes here might affect what's running)
+	components = append(components, strconv.Itoa(container.CPUs))
+	components = append(components, strconv.FormatUint(container.MaxMemory, 10))
+	components = append(components, strconv.FormatUint(container.MaxDisk, 10))
+
+	// OCI container flag (different container type)
+	if container.IsOCI {
+		components = append(components, "oci:true")
+	}
+
+	// Template flag (templates shouldn't trigger discovery)
+	if container.Template {
+		components = append(components, "template:true")
+	}
+
+	// Note: IP addresses intentionally excluded - DHCP churn causes false positives
+
+	// Tags (user might tag based on what's running)
+	if len(container.Tags) > 0 {
+		sortedTags := make([]string, len(container.Tags))
+		copy(sortedTags, container.Tags)
+		sort.Strings(sortedTags)
+		components = append(components, sortedTags...)
+	}
+
+	// Generate hash
+	h := sha256.New()
+	h.Write([]byte(strings.Join(components, "|")))
+	fp.Hash = hex.EncodeToString(h.Sum(nil))[:16]
+
+	return fp
+}
+
+// GenerateVMFingerprint creates a fingerprint from VM metadata.
+// Tracks: VMID, name, OS, resource allocation, and tags.
+func GenerateVMFingerprint(nodeID string, vm *VM) *ContainerFingerprint {
+	fp := &ContainerFingerprint{
+		ResourceID:    strconv.Itoa(vm.VMID),
+		HostID:        nodeID,
+		SchemaVersion: FingerprintSchemaVersion,
+		GeneratedAt:   time.Now(),
+		ImageName:     vm.OSName, // OS name is the closest to an "image" for VMs
+	}
+
+	// Build components for hashing
+	var components []string
+
+	// Core identity
+	components = append(components, strconv.Itoa(vm.VMID))
+	components = append(components, vm.Name)
+	components = append(components, vm.OSName)
+	components = append(components, vm.OSVersion)
+
+	// Resource allocation
+	components = append(components, strconv.Itoa(vm.CPUs))
+	components = append(components, strconv.FormatUint(vm.MaxMemory, 10))
+	components = append(components, strconv.FormatUint(vm.MaxDisk, 10))
+
+	// Template flag (templates shouldn't trigger discovery)
+	if vm.Template {
+		components = append(components, "template:true")
+	}
+
+	// Note: IP addresses intentionally excluded - DHCP churn causes false positives
+
+	// Tags
+	if len(vm.Tags) > 0 {
+		sortedTags := make([]string, len(vm.Tags))
+		copy(sortedTags, vm.Tags)
+		sort.Strings(sortedTags)
+		components = append(components, sortedTags...)
+	}
+
+	// Generate hash
+	h := sha256.New()
+	h.Write([]byte(strings.Join(components, "|")))
+	fp.Hash = hex.EncodeToString(h.Sum(nil))[:16]
+
+	return fp
+}
+
+// GenerateK8sPodFingerprint creates a fingerprint from Kubernetes pod metadata.
+// Tracks: UID, name, namespace, labels, owner (deployment/statefulset/etc), and container images.
+func GenerateK8sPodFingerprint(clusterID string, pod *KubernetesPod) *ContainerFingerprint {
+	fp := &ContainerFingerprint{
+		ResourceID:    pod.UID,
+		HostID:        clusterID,
+		SchemaVersion: FingerprintSchemaVersion,
+		GeneratedAt:   time.Now(),
+	}
+
+	// Build components for hashing
+	var components []string
+
+	// Core identity
+	components = append(components, pod.UID)
+	components = append(components, pod.Name)
+	components = append(components, pod.Namespace)
+	components = append(components, pod.NodeName)
+
+	// Owner reference (deployment, statefulset, daemonset, etc.)
+	if pod.OwnerKind != "" {
+		components = append(components, "owner:"+pod.OwnerKind+"/"+pod.OwnerName)
+	}
+
+	// Container images (most important for detecting app changes)
+	var images []string
+	for _, c := range pod.Containers {
+		images = append(images, c.Name+":"+c.Image)
+	}
+	sort.Strings(images)
+	if len(images) > 0 {
+		fp.ImageName = images[0] // Use first container image as the "image name"
+		components = append(components, "images:"+strings.Join(images, ","))
+	}
+
+	// Labels (sorted by key for consistency)
+	if len(pod.Labels) > 0 {
+		var labelKeys []string
+		for k := range pod.Labels {
+			labelKeys = append(labelKeys, k)
+		}
+		sort.Strings(labelKeys)
+		var labelPairs []string
+		for _, k := range labelKeys {
+			labelPairs = append(labelPairs, k+"="+pod.Labels[k])
+		}
+		components = append(components, "labels:"+strings.Join(labelPairs, ","))
+	}
+
+	// Generate hash
+	h := sha256.New()
+	h.Write([]byte(strings.Join(components, "|")))
+	fp.Hash = hex.EncodeToString(h.Sum(nil))[:16]
+
+	return fp
+}
+
+// GenerateFingerprint is an alias for GenerateDockerFingerprint for backwards compatibility.
+func GenerateFingerprint(hostID string, container *DockerContainer) *ContainerFingerprint {
+	return GenerateDockerFingerprint(hostID, container)
+}
diff --git a/internal/servicediscovery/formatters.go b/internal/servicediscovery/formatters.go
new file mode 100644
index 000000000..8974a7057
--- /dev/null
+++ b/internal/servicediscovery/formatters.go
@@ -0,0 +1,629 @@
+package servicediscovery
+
+import (
+	"fmt"
+	"strings"
+	"time"
+)
+
+// FormatForAIContext formats discoveries for inclusion in AI prompts.
+// This provides context about resources for Patrol, Investigation, and Chat.
+func FormatForAIContext(discoveries []*ResourceDiscovery) string {
+	if len(discoveries) == 0 {
+		return ""
+	}
+
+	var sb strings.Builder
+	sb.WriteString("## Infrastructure Discovery\n\n")
+	sb.WriteString("The following has been discovered about the affected resources:\n\n")
+
+	for _, d := range discoveries {
+		sb.WriteString(formatSingleDiscovery(d))
+		sb.WriteString("\n")
+	}
+
+	sb.WriteString("\n**IMPORTANT:** Use the CLI access methods shown above. For example:\n")
+	sb.WriteString("- For LXC containers, use `pct exec <vmid> -- <command>`\n")
+	sb.WriteString("- For VMs with guest agent, use `qm guest exec <vmid> -- <command>`\n")
+	sb.WriteString("- For Docker containers, use `docker exec <container> <command>`\n")
+
+	return sb.String()
+}
+
+// FormatSingleForAIContext formats a single discovery for AI context.
+func FormatSingleForAIContext(d *ResourceDiscovery) string {
+	if d == nil {
+		return ""
+	}
+	return formatSingleDiscovery(d)
+}
+
+// formatSingleDiscovery formats a single discovery entry.
+func formatSingleDiscovery(d *ResourceDiscovery) string {
+	var sb strings.Builder
+
+	// Header with service info
+	sb.WriteString(fmt.Sprintf("### %s (%s)\n", d.ServiceName, d.ID))
+	sb.WriteString(fmt.Sprintf("- **Type:** %s\n", d.ResourceType))
+	sb.WriteString(fmt.Sprintf("- **Host:** %s\n", d.Hostname))
+
+	if d.ServiceVersion != "" {
+		sb.WriteString(fmt.Sprintf("- **Version:** %s\n", d.ServiceVersion))
+	}
+
+	if d.Category != "" && d.Category != CategoryUnknown {
+		sb.WriteString(fmt.Sprintf("- **Category:** %s\n", d.Category))
+	}
+
+	// CLI access (most important for remediation)
+	if d.CLIAccess != "" {
+		sb.WriteString(fmt.Sprintf("- **CLI Access:** `%s`\n", d.CLIAccess))
+	}
+
+	// Config, data, and log paths
+	if len(d.ConfigPaths) > 0 {
+		sb.WriteString(fmt.Sprintf("- **Config Paths:** %s\n", strings.Join(d.ConfigPaths, ", ")))
+	}
+	if len(d.DataPaths) > 0 {
+		sb.WriteString(fmt.Sprintf("- **Data Paths:** %s\n", strings.Join(d.DataPaths, ", ")))
+	}
+	if len(d.LogPaths) > 0 {
+		sb.WriteString(fmt.Sprintf("- **Log Paths:** %s\n", strings.Join(d.LogPaths, ", ")))
+	}
+
+	// Ports
+	if len(d.Ports) > 0 {
+		var ports []string
+		for _, p := range d.Ports {
+			ports = append(ports, fmt.Sprintf("%d/%s", p.Port, p.Protocol))
+		}
+		sb.WriteString(fmt.Sprintf("- **Ports:** %s\n", strings.Join(ports, ", ")))
+	}
+
+	// Important facts
+	importantFacts := filterImportantFacts(d.Facts)
+	if len(importantFacts) > 0 {
+		sb.WriteString("- **Key Facts:**\n")
+		for _, f := range importantFacts {
+			sb.WriteString(fmt.Sprintf("  - %s: %s\n", f.Key, f.Value))
+		}
+	}
+
+	// User notes (critical for context)
+	if d.UserNotes != "" {
+		sb.WriteString(fmt.Sprintf("- **User Notes:** %s\n", d.UserNotes))
+	}
+
+	return sb.String()
+}
+
+// filterImportantFacts returns the most relevant facts for AI context.
+func filterImportantFacts(facts []DiscoveryFact) []DiscoveryFact {
+	var important []DiscoveryFact
+
+	// Priority categories
+	priorityCategories := map[FactCategory]bool{
+		FactCategoryHardware:   true, // GPU, TPU
+		FactCategoryDependency: true, // MQTT, database connections
+		FactCategorySecurity:   true, // Auth info
+		FactCategoryVersion:    true, // Version info
+	}
+
+	for _, f := range facts {
+		if priorityCategories[f.Category] && f.Confidence >= 0.7 {
+			important = append(important, f)
+		}
+	}
+
+	// Limit to top 5 facts
+	if len(important) > 5 {
+		important = important[:5]
+	}
+
+	return important
+}
+
+// FormatDiscoverySummary formats a summary of all discoveries.
+func FormatDiscoverySummary(discoveries []*ResourceDiscovery) string {
+	if len(discoveries) == 0 {
+		return "No infrastructure discovery data available."
+	}
+
+	var sb strings.Builder
+	sb.WriteString(fmt.Sprintf("Infrastructure Discovery Summary (%d resources):\n\n", len(discoveries)))
+
+	// Group by resource type
+	byType := make(map[ResourceType][]*ResourceDiscovery)
+	for _, d := range discoveries {
+		byType[d.ResourceType] = append(byType[d.ResourceType], d)
+	}
+
+	for rt, ds := range byType {
+		sb.WriteString(fmt.Sprintf("**%s** (%d):\n", rt, len(ds)))
+		for _, d := range ds {
+			confidence := ""
+			if d.Confidence >= 0.9 {
+				confidence = " [high confidence]"
+			} else if d.Confidence >= 0.7 {
+				confidence = " [medium confidence]"
+			}
+			sb.WriteString(fmt.Sprintf("  - %s: %s%s\n", d.ResourceID, d.ServiceName, confidence))
+		}
+		sb.WriteString("\n")
+	}
+
+	return sb.String()
+}
+
+// FormatScopeHint returns a compact, single-line discovery hint for scoped patrols.
+func FormatScopeHint(discoveries []*ResourceDiscovery) string {
+	if len(discoveries) == 0 {
+		return ""
+	}
+	primary := discoveries[0]
+	summary := formatScopeDiscoverySummary(primary)
+	if summary == "" {
+		return ""
+	}
+	if len(discoveries) > 1 {
+		summary = fmt.Sprintf("%s (+%d more)", summary, len(discoveries)-1)
+	}
+	return "Discovery: " + summary
+}
+
+func formatScopeDiscoverySummary(d *ResourceDiscovery) string {
+	if d == nil {
+		return ""
+	}
+	name := firstNonEmpty(d.ServiceName, d.ServiceType, d.ResourceID, d.ID)
+	if name == "" {
+		return ""
+	}
+	base := name
+	if d.ServiceVersion != "" && !strings.Contains(strings.ToLower(base), strings.ToLower(d.ServiceVersion)) {
+		version := d.ServiceVersion
+		if !strings.HasPrefix(strings.ToLower(version), "v") {
+			version = "v" + version
+		}
+		base = fmt.Sprintf("%s %s", base, version)
+	}
+
+	host := firstNonEmpty(d.Hostname, d.HostID)
+	meta := strings.TrimSpace(string(d.ResourceType))
+	if host != "" {
+		if meta != "" {
+			meta = fmt.Sprintf("%s on %s", meta, host)
+		} else {
+			meta = host
+		}
+	}
+	if meta != "" {
+		base = fmt.Sprintf("%s (%s)", base, meta)
+	}
+
+	parts := []string{base}
+	if cli := shortenScopeCLI(d.CLIAccess); cli != "" {
+		parts = append(parts, "cli: "+cli)
+	}
+	if ports := formatScopePorts(d.Ports); ports != "" {
+		parts = append(parts, "ports: "+ports)
+	}
+
+	return strings.Join(parts, "; ")
+}
+
+func shortenScopeCLI(value string) string {
+	trimmed := strings.TrimSpace(value)
+	if trimmed == "" {
+		return ""
+	}
+	compact := strings.Join(strings.Fields(trimmed), " ")
+	return truncateScopeText(compact, 64)
+}
+
+func formatScopePorts(ports []PortInfo) string {
+	if len(ports) == 0 {
+		return ""
+	}
+	maxPorts := 3
+	if len(ports) < maxPorts {
+		maxPorts = len(ports)
+	}
+	parts := make([]string, 0, maxPorts)
+	for i := 0; i < maxPorts; i++ {
+		p := ports[i]
+		proto := p.Protocol
+		if proto == "" {
+			proto = "tcp"
+		}
+		parts = append(parts, fmt.Sprintf("%d/%s", p.Port, proto))
+	}
+	if len(ports) > maxPorts {
+		parts = append(parts, fmt.Sprintf("+%d more", len(ports)-maxPorts))
+	}
+	return strings.Join(parts, ", ")
+}
+
+func truncateScopeText(value string, max int) string {
+	if max <= 0 || len(value) <= max {
+		return value
+	}
+	if max <= 3 {
+		return value[:max]
+	}
+	return value[:max-3] + "..."
+}
+
+func firstNonEmpty(values ...string) string {
+	for _, v := range values {
+		if strings.TrimSpace(v) != "" {
+			return v
+		}
+	}
+	return ""
+}
+
+// FormatForRemediation formats discovery specifically for remediation context.
+func FormatForRemediation(d *ResourceDiscovery) string {
+	if d == nil {
+		return ""
+	}
+
+	var sb strings.Builder
+	sb.WriteString("## Resource Context for Remediation\n\n")
+
+	sb.WriteString(fmt.Sprintf("**Resource:** %s (%s)\n", d.ServiceName, d.ID))
+	sb.WriteString(fmt.Sprintf("**Type:** %s on %s\n\n", d.ResourceType, d.Hostname))
+
+	// CLI access is most critical
+	if d.CLIAccess != "" {
+		sb.WriteString("### How to Execute Commands\n")
+		sb.WriteString(fmt.Sprintf("```\n%s\n```\n\n", d.CLIAccess))
+	}
+
+	// Service-specific info
+	if d.ServiceType != "" {
+		sb.WriteString(fmt.Sprintf("**Service:** %s", d.ServiceType))
+		if d.ServiceVersion != "" {
+			sb.WriteString(fmt.Sprintf(" v%s", d.ServiceVersion))
+		}
+		sb.WriteString("\n\n")
+	}
+
+	// Config paths for potential fixes
+	if len(d.ConfigPaths) > 0 {
+		sb.WriteString("### Configuration Files\n")
+		for _, p := range d.ConfigPaths {
+			sb.WriteString(fmt.Sprintf("- `%s`\n", p))
+		}
+		sb.WriteString("\n")
+	}
+
+	// Log paths for troubleshooting
+	if len(d.LogPaths) > 0 {
+		sb.WriteString("### Log Files\n")
+		for _, p := range d.LogPaths {
+			sb.WriteString(fmt.Sprintf("- `%s`\n", p))
+		}
+		sb.WriteString("\n")
+	}
+
+	// User notes may contain important context
+	if d.UserNotes != "" {
+		sb.WriteString("### User Notes\n")
+		sb.WriteString(d.UserNotes)
+		sb.WriteString("\n\n")
+	}
+
+	// Hardware info for special considerations
+	for _, f := range d.Facts {
+		if f.Category == FactCategoryHardware {
+			sb.WriteString(fmt.Sprintf("**Hardware:** %s = %s\n", f.Key, f.Value))
+		}
+	}
+
+	return sb.String()
+}
+
+// FormatDiscoveryAge returns a human-readable age string.
+func FormatDiscoveryAge(d *ResourceDiscovery) string {
+	if d == nil || d.UpdatedAt.IsZero() {
+		return "unknown"
+	}
+
+	age := time.Since(d.UpdatedAt)
+	switch {
+	case age < time.Minute:
+		return "just now"
+	case age < time.Hour:
+		mins := int(age.Minutes())
+		if mins == 1 {
+			return "1 minute ago"
+		}
+		return fmt.Sprintf("%d minutes ago", mins)
+	case age < 24*time.Hour:
+		hours := int(age.Hours())
+		if hours == 1 {
+			return "1 hour ago"
+		}
+		return fmt.Sprintf("%d hours ago", hours)
+	default:
+		days := int(age.Hours() / 24)
+		if days == 1 {
+			return "1 day ago"
+		}
+		return fmt.Sprintf("%d days ago", days)
+	}
+}
+
+// FilterDiscoveriesByResourceIDs returns discoveries that match any of the given resource IDs.
+// This is used to scope discovery context for targeted patrol runs.
+func FilterDiscoveriesByResourceIDs(discoveries []*ResourceDiscovery, resourceIDs []string) []*ResourceDiscovery {
+	if len(discoveries) == 0 {
+		return nil
+	}
+	if len(resourceIDs) == 0 {
+		return discoveries
+	}
+
+	tokens := buildResourceIDTokenSet(resourceIDs)
+	if len(tokens) == 0 {
+		return nil
+	}
+
+	filtered := make([]*ResourceDiscovery, 0, len(discoveries))
+	for _, d := range discoveries {
+		if discoveryMatchesTokens(d, tokens) {
+			filtered = append(filtered, d)
+		}
+	}
+	return filtered
+}
+
+func buildResourceIDTokenSet(resourceIDs []string) map[string]struct{} {
+	tokens := make(map[string]struct{})
+	for _, id := range resourceIDs {
+		addResourceIDTokens(tokens, id)
+	}
+	return tokens
+}
+
+func addResourceIDTokens(tokens map[string]struct{}, resourceID string) {
+	trimmed := strings.TrimSpace(resourceID)
+	if trimmed == "" {
+		return
+	}
+
+	addToken(tokens, trimmed)
+
+	if last := lastSegment(trimmed, '/'); last != "" {
+		addToken(tokens, last)
+	}
+	if last := lastSegment(trimmed, ':'); last != "" {
+		addToken(tokens, last)
+	}
+
+	lower := strings.ToLower(trimmed)
+	if strings.HasPrefix(lower, "vm-") {
+		addToken(tokens, trimmed[3:])
+	}
+	if strings.HasPrefix(lower, "ct-") {
+		addToken(tokens, trimmed[3:])
+	}
+	if strings.HasPrefix(lower, "lxc-") {
+		addToken(tokens, trimmed[4:])
+	}
+
+	if strings.Contains(lower, "qemu/") || strings.Contains(lower, "lxc/") || strings.HasPrefix(lower, "vm-") || strings.HasPrefix(lower, "ct-") {
+		if digits := trailingDigits(trimmed); digits != "" {
+			addToken(tokens, digits)
+		}
+	}
+
+	// docker:host/container -> host + container tokens
+	if strings.Contains(trimmed, ":") {
+		parts := strings.SplitN(trimmed, ":", 2)
+		if len(parts) == 2 {
+			rest := parts[1]
+			if slash := strings.Index(rest, "/"); slash >= 0 {
+				host := strings.TrimSpace(rest[:slash])
+				container := strings.TrimSpace(rest[slash+1:])
+				addToken(tokens, host)
+				addToken(tokens, container)
+			}
+		}
+	}
+}
+
+func discoveryMatchesTokens(d *ResourceDiscovery, tokens map[string]struct{}) bool {
+	if d == nil {
+		return false
+	}
+
+	candidates := discoveryTokens(d)
+	for _, candidate := range candidates {
+		if _, ok := tokens[candidate]; ok {
+			return true
+		}
+	}
+	return false
+}
+
+func discoveryTokens(d *ResourceDiscovery) []string {
+	var tokens []string
+	add := func(value string) {
+		trimmed := strings.TrimSpace(value)
+		if trimmed == "" {
+			return
+		}
+		tokens = append(tokens, strings.ToLower(trimmed))
+	}
+
+	add(d.ResourceID)
+	add(d.ID)
+	add(d.HostID)
+	if d.HostID != "" {
+		add("host:" + d.HostID)
+	}
+
+	switch d.ResourceType {
+	case ResourceTypeVM:
+		add("qemu/" + d.ResourceID)
+		add("vm/" + d.ResourceID)
+		add("vm-" + d.ResourceID)
+	case ResourceTypeLXC:
+		add("lxc/" + d.ResourceID)
+		add("ct/" + d.ResourceID)
+		add("ct-" + d.ResourceID)
+	case ResourceTypeDocker:
+		if d.HostID != "" {
+			add("docker:" + d.HostID)
+			add("docker:" + d.HostID + "/" + d.ResourceID)
+		}
+	case ResourceTypeHost:
+		add("host:" + d.ResourceID)
+	case ResourceTypeK8s:
+		add("k8s/" + d.ResourceID)
+		add("kubernetes/" + d.ResourceID)
+	}
+
+	return tokens
+}
+
+func addToken(tokens map[string]struct{}, value string) {
+	trimmed := strings.TrimSpace(value)
+	if trimmed == "" {
+		return
+	}
+	tokens[strings.ToLower(trimmed)] = struct{}{}
+}
+
+func lastSegment(value string, sep byte) string {
+	if value == "" {
+		return ""
+	}
+	idx := strings.LastIndexByte(value, sep)
+	if idx == -1 || idx+1 >= len(value) {
+		return ""
+	}
+	return value[idx+1:]
+}
+
+func trailingDigits(value string) string {
+	if value == "" {
+		return ""
+	}
+	i := len(value)
+	for i > 0 {
+		c := value[i-1]
+		if c < '0' || c > '9' {
+			break
+		}
+		i--
+	}
+	if i == len(value) {
+		return ""
+	}
+	return value[i:]
+}
+
+// GetCLIExample returns an example CLI command for the resource.
+func GetCLIExample(d *ResourceDiscovery, exampleCmd string) string {
+	if d == nil || d.CLIAccess == "" {
+		return ""
+	}
+
+	// Replace the placeholder with the example command
+	cli := d.CLIAccess
+	cli = strings.ReplaceAll(cli, "...", exampleCmd)
+	cli = strings.ReplaceAll(cli, "{command}", exampleCmd)
+
+	return cli
+}
+
+// FormatFactsTable formats facts as a simple table.
+func FormatFactsTable(facts []DiscoveryFact) string {
+	if len(facts) == 0 {
+		return ""
+	}
+
+	var sb strings.Builder
+	sb.WriteString("| Category | Key | Value |\n")
+	sb.WriteString("|----------|-----|-------|\n")
+
+	for _, f := range facts {
+		value := f.Value
+		if len(value) > 50 {
+			value = value[:47] + "..."
+		}
+		sb.WriteString(fmt.Sprintf("| %s | %s | %s |\n", f.Category, f.Key, value))
+	}
+
+	return sb.String()
+}
+
+// BuildResourceContextForPatrol builds context for Patrol findings.
+func BuildResourceContextForPatrol(store *Store, resourceIDs []string) string {
+	if store == nil || len(resourceIDs) == 0 {
+		return ""
+	}
+
+	discoveries, err := store.GetMultiple(resourceIDs)
+	if err != nil || len(discoveries) == 0 {
+		return ""
+	}
+
+	return FormatForAIContext(discoveries)
+}
+
+// ToJSON converts a discovery to a JSON-friendly map.
+func ToJSON(d *ResourceDiscovery) map[string]any {
+	if d == nil {
+		return nil
+	}
+
+	facts := make([]map[string]any, 0, len(d.Facts))
+	for _, f := range d.Facts {
+		facts = append(facts, map[string]any{
+			"category":   f.Category,
+			"key":        f.Key,
+			"value":      f.Value,
+			"source":     f.Source,
+			"confidence": f.Confidence,
+		})
+	}
+
+	ports := make([]map[string]any, 0, len(d.Ports))
+	for _, p := range d.Ports {
+		ports = append(ports, map[string]any{
+			"port":     p.Port,
+			"protocol": p.Protocol,
+			"process":  p.Process,
+			"address":  p.Address,
+		})
+	}
+
+	return map[string]any{
+		"id":              d.ID,
+		"resource_type":   d.ResourceType,
+		"resource_id":     d.ResourceID,
+		"host_id":         d.HostID,
+		"hostname":        d.Hostname,
+		"service_type":    d.ServiceType,
+		"service_name":    d.ServiceName,
+		"service_version": d.ServiceVersion,
+		"category":        d.Category,
+		"cli_access":      d.CLIAccess,
+		"facts":           facts,
+		"config_paths":    d.ConfigPaths,
+		"data_paths":      d.DataPaths,
+		"log_paths":       d.LogPaths,
+		"ports":           ports,
+		"user_notes":      d.UserNotes,
+		"confidence":      d.Confidence,
+		"ai_reasoning":    d.AIReasoning,
+		"discovered_at":   d.DiscoveredAt,
+		"updated_at":      d.UpdatedAt,
+		"scan_duration":   d.ScanDuration,
+	}
+}
diff --git a/internal/servicediscovery/formatters_test.go b/internal/servicediscovery/formatters_test.go
new file mode 100644
index 000000000..988ba2727
--- /dev/null
+++ b/internal/servicediscovery/formatters_test.go
@@ -0,0 +1,218 @@
+package servicediscovery
+
+import (
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestFormattersAndTables(t *testing.T) {
+	if FormatForAIContext(nil) != "" {
+		t.Fatalf("expected empty context for nil discoveries")
+	}
+
+	discovery := &ResourceDiscovery{
+		ID:             MakeResourceID(ResourceTypeDocker, "host1", "app"),
+		ResourceType:   ResourceTypeDocker,
+		ResourceID:     "app",
+		HostID:         "host1",
+		Hostname:       "host1",
+		ServiceType:    "app",
+		ServiceName:    "App Service",
+		ServiceVersion: "1.0",
+		Category:       CategoryWebServer,
+		CLIAccess:      "docker exec app ...",
+		ConfigPaths:    []string{"/etc/app/config.yml"},
+		DataPaths:      []string{"/var/lib/app"},
+		Ports:          []PortInfo{{Port: 80, Protocol: "tcp"}},
+		UserNotes:      "keepalive enabled",
+		Facts: []DiscoveryFact{
+			{Category: FactCategoryHardware, Key: "gpu", Value: "nvidia", Confidence: 0.9},
+			{Category: FactCategoryService, Key: "worker", Value: "enabled", Confidence: 0.9},
+		},
+	}
+
+	ctx := FormatForAIContext([]*ResourceDiscovery{discovery})
+	if !strings.Contains(ctx, "Infrastructure Discovery") || !strings.Contains(ctx, "App Service") {
+		t.Fatalf("unexpected context: %s", ctx)
+	}
+	if !strings.Contains(ctx, "docker exec") || !strings.Contains(ctx, "User Notes") {
+		t.Fatalf("missing expected fields in context")
+	}
+
+	if FormatSingleForAIContext(nil) != "" {
+		t.Fatalf("expected empty string for nil discovery")
+	}
+	if !strings.Contains(FormatSingleForAIContext(discovery), "App Service") {
+		t.Fatalf("expected single discovery output")
+	}
+
+	remediation := FormatForRemediation(discovery)
+	if !strings.Contains(remediation, "How to Execute Commands") || !strings.Contains(remediation, "Hardware") {
+		t.Fatalf("unexpected remediation output: %s", remediation)
+	}
+	if FormatForRemediation(nil) != "" {
+		t.Fatalf("expected empty remediation output for nil")
+	}
+
+	example := GetCLIExample(discovery, "ls /")
+	if !strings.Contains(example, "ls /") {
+		t.Fatalf("unexpected cli example: %s", example)
+	}
+	if GetCLIExample(&ResourceDiscovery{}, "ls /") != "" {
+		t.Fatalf("expected empty example when cli access missing")
+	}
+
+	table := FormatFactsTable([]DiscoveryFact{
+		{Category: FactCategoryVersion, Key: "app", Value: strings.Repeat("x", 60)},
+	})
+	if !strings.Contains(table, "...") {
+		t.Fatalf("expected truncated table value: %s", table)
+	}
+	if FormatFactsTable(nil) != "" {
+		t.Fatalf("expected empty facts table for nil")
+	}
+
+	jsonMap := ToJSON(discovery)
+	if jsonMap["service_name"] != "App Service" || jsonMap["resource_id"] != "app" {
+		t.Fatalf("unexpected json map: %#v", jsonMap)
+	}
+	if ToJSON(nil) != nil {
+		t.Fatalf("expected nil json map for nil discovery")
+	}
+}
+
+func TestFormatDiscoverySummaryAndAge(t *testing.T) {
+	now := time.Now()
+	if FormatDiscoverySummary(nil) == "" {
+		t.Fatalf("expected summary text for empty list")
+	}
+	if FormatDiscoveryAge(nil) != "unknown" {
+		t.Fatalf("expected unknown age for nil")
+	}
+	if FormatDiscoveryAge(&ResourceDiscovery{}) != "unknown" {
+		t.Fatalf("expected unknown age for zero timestamp")
+	}
+	discoveries := []*ResourceDiscovery{
+		{
+			ID:           MakeResourceID(ResourceTypeVM, "node1", "101"),
+			ResourceType: ResourceTypeVM,
+			ResourceID:   "101",
+			HostID:       "node1",
+			ServiceName:  "VM One",
+			Confidence:   0.95,
+			UpdatedAt:    now.Add(-2 * time.Hour),
+		},
+		{
+			ID:           MakeResourceID(ResourceTypeDocker, "host1", "app"),
+			ResourceType: ResourceTypeDocker,
+			ResourceID:   "app",
+			HostID:       "host1",
+			ServiceName:  "App",
+			Confidence:   0.75,
+			UpdatedAt:    now.Add(-2 * 24 * time.Hour),
+		},
+	}
+
+	summary := FormatDiscoverySummary(discoveries)
+	if !strings.Contains(summary, "[high confidence]") || !strings.Contains(summary, "[medium confidence]") {
+		t.Fatalf("unexpected summary: %s", summary)
+	}
+
+	tests := []struct {
+		name     string
+		updated  time.Time
+		expected string
+	}{
+		{name: "just-now", updated: now.Add(-30 * time.Second), expected: "just now"},
+		{name: "one-minute", updated: now.Add(-1 * time.Minute), expected: "1 minute ago"},
+		{name: "minutes", updated: now.Add(-10 * time.Minute), expected: "10 minutes ago"},
+		{name: "one-hour", updated: now.Add(-1 * time.Hour), expected: "1 hour ago"},
+		{name: "hours", updated: now.Add(-2 * time.Hour), expected: "2 hours ago"},
+		{name: "one-day", updated: now.Add(-24 * time.Hour), expected: "1 day ago"},
+		{name: "days", updated: now.Add(-3 * 24 * time.Hour), expected: "3 days ago"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := FormatDiscoveryAge(&ResourceDiscovery{UpdatedAt: tt.updated})
+			if got != tt.expected {
+				t.Fatalf("expected %s, got %s", tt.expected, got)
+			}
+		})
+	}
+}
+
+func TestBuildResourceContextForPatrol(t *testing.T) {
+	store, err := NewStore(t.TempDir())
+	if err != nil {
+		t.Fatalf("NewStore error: %v", err)
+	}
+	store.crypto = nil
+
+	discovery := &ResourceDiscovery{
+		ID:           MakeResourceID(ResourceTypeDocker, "host1", "app"),
+		ResourceType: ResourceTypeDocker,
+		ResourceID:   "app",
+		HostID:       "host1",
+		ServiceName:  "App Service",
+	}
+	if err := store.Save(discovery); err != nil {
+		t.Fatalf("Save error: %v", err)
+	}
+
+	ctx := BuildResourceContextForPatrol(store, []string{discovery.ID})
+	if !strings.Contains(ctx, "App Service") {
+		t.Fatalf("unexpected patrol context: %s", ctx)
+	}
+
+	if BuildResourceContextForPatrol(nil, []string{discovery.ID}) != "" {
+		t.Fatalf("expected empty context for nil store")
+	}
+	if BuildResourceContextForPatrol(store, nil) != "" {
+		t.Fatalf("expected empty context for empty ids")
+	}
+	if BuildResourceContextForPatrol(store, []string{"missing"}) != "" {
+		t.Fatalf("expected empty context for missing discoveries")
+	}
+}
+
+func TestFormatScopeHint(t *testing.T) {
+	discovery := &ResourceDiscovery{
+		ID:             MakeResourceID(ResourceTypeDocker, "host1", "app"),
+		ResourceType:   ResourceTypeDocker,
+		ResourceID:     "app",
+		HostID:         "host1",
+		Hostname:       "host1",
+		ServiceType:    "app",
+		ServiceName:    "App Service",
+		ServiceVersion: "1.2.3",
+		CLIAccess:      "docker exec app -- ...",
+		Ports:          []PortInfo{{Port: 80, Protocol: "tcp"}, {Port: 443, Protocol: "tcp"}},
+	}
+
+	hint := FormatScopeHint([]*ResourceDiscovery{discovery})
+	if !strings.Contains(hint, "Discovery:") || !strings.Contains(hint, "App Service") {
+		t.Fatalf("unexpected scope hint: %s", hint)
+	}
+	if FormatScopeHint(nil) != "" {
+		t.Fatalf("expected empty hint for nil")
+	}
+}
+
+func TestFilterImportantFactsLimit(t *testing.T) {
+	var facts []DiscoveryFact
+	for i := 0; i < 7; i++ {
+		facts = append(facts, DiscoveryFact{
+			Category:   FactCategoryVersion,
+			Key:        "k",
+			Value:      "v",
+			Confidence: 0.9,
+		})
+	}
+
+	important := filterImportantFacts(facts)
+	if len(important) != 5 {
+		t.Fatalf("expected 5 facts, got %d", len(important))
+	}
+}
diff --git a/internal/servicediscovery/service.go b/internal/servicediscovery/service.go
new file mode 100644
index 000000000..872019626
--- /dev/null
+++ b/internal/servicediscovery/service.go
@@ -0,0 +1,1753 @@
+// Package servicediscovery provides infrastructure discovery capabilities.
+// It discovers services, versions, configurations, and CLI access methods
+// for VMs, LXCs, Docker containers, Kubernetes pods, and hosts.
+package servicediscovery
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/rs/zerolog/log"
+)
+
+// StateProvider provides access to the current infrastructure state.
+type StateProvider interface {
+	GetState() StateSnapshot
+}
+
+// StateSnapshot represents the infrastructure state. This mirrors models.StateSnapshot
+// to avoid circular dependencies.
+type StateSnapshot struct {
+	VMs                []VM
+	Containers         []Container
+	DockerHosts        []DockerHost
+	KubernetesClusters []KubernetesCluster
+}
+
+// VM represents a virtual machine.
+type VM struct {
+	VMID     int
+	Name     string
+	Node     string
+	Status   string
+	Instance string
+	// Additional metadata for fingerprinting
+	CPUs        int      // Number of CPU cores
+	MaxMemory   uint64   // Max memory in bytes
+	MaxDisk     uint64   // Max disk in bytes
+	Tags        []string // User-defined tags
+	OSName      string   // Detected OS name
+	OSVersion   string   // OS version string
+	IPAddresses []string // IP addresses assigned to the VM
+	Template    bool     // True if this is a template
+}
+
+// Container represents an LXC container.
+type Container struct {
+	VMID     int
+	Name     string
+	Node     string
+	Status   string
+	Instance string
+	// Additional metadata for fingerprinting
+	CPUs        int      // Number of CPU cores
+	MaxMemory   uint64   // Max memory in bytes
+	MaxDisk     uint64   // Max disk in bytes
+	Tags        []string // User-defined tags
+	OSTemplate  string   // Template or OCI image used
+	OSName      string   // Detected OS name
+	IsOCI       bool     // True if OCI container (Proxmox 9.1+)
+	IPAddresses []string // IP addresses assigned to the container
+	Template    bool     // True if this is a template
+}
+
+// DockerHost represents a Docker host.
+type DockerHost struct {
+	AgentID    string
+	Hostname   string
+	Containers []DockerContainer
+}
+
+// DockerContainer represents a Docker container.
+type DockerContainer struct {
+	ID     string
+	Name   string
+	Image  string
+	Status string
+	Ports  []DockerPort
+	Labels map[string]string
+	Mounts []DockerMount
+}
+
+// DockerPort represents a port mapping.
+type DockerPort struct {
+	PublicPort  int
+	PrivatePort int
+	Protocol    string
+}
+
+// DockerMount represents a mount point.
+type DockerMount struct {
+	Source      string
+	Destination string
+}
+
+// KubernetesCluster represents a Kubernetes cluster.
+type KubernetesCluster struct {
+	ID      string
+	Name    string
+	AgentID string
+	Status  string
+	Pods    []KubernetesPod
+}
+
+// KubernetesPod represents a Kubernetes pod.
+type KubernetesPod struct {
+	UID        string
+	Name       string
+	Namespace  string
+	NodeName   string
+	Phase      string
+	Labels     map[string]string
+	OwnerKind  string // e.g., "Deployment", "StatefulSet", "DaemonSet"
+	OwnerName  string
+	Containers []KubernetesPodContainer
+}
+
+// KubernetesPodContainer represents a container within a Kubernetes pod.
+type KubernetesPodContainer struct {
+	Name         string
+	Image        string
+	Ready        bool
+	RestartCount int32
+	State        string // e.g., "running", "waiting", "terminated"
+}
+
+// AIAnalyzer provides AI analysis capabilities for discovery.
+type AIAnalyzer interface {
+	AnalyzeForDiscovery(ctx context.Context, prompt string) (string, error)
+}
+
+// WSMessage represents a WebSocket message for broadcasting.
+type WSMessage struct {
+	Type string      `json:"type"`
+	Data interface{} `json:"data"`
+}
+
+// WSBroadcaster provides WebSocket broadcasting capabilities.
+type WSBroadcaster interface {
+	BroadcastDiscoveryProgress(progress *DiscoveryProgress)
+}
+
+// Service manages infrastructure discovery.
+type Service struct {
+	store         *Store
+	scanner       *DeepScanner
+	stateProvider StateProvider
+	aiAnalyzer    AIAnalyzer
+	wsHub         WSBroadcaster // WebSocket hub for broadcasting progress
+
+	mu              sync.RWMutex
+	running         bool
+	stopCh          chan struct{}
+	intervalCh      chan time.Duration // Channel for live interval updates
+	interval        time.Duration
+	initialDelay    time.Duration
+	lastRun         time.Time
+	deepScanTimeout time.Duration // Timeout for individual deep scans
+	maxDiscoveryAge time.Duration // Max age before rediscovery (default 30 days)
+
+	// Cache for AI analysis results (by image name)
+	analysisCache map[string]*analysisCacheEntry
+	cacheMu       sync.RWMutex
+	cacheExpiry   time.Duration
+
+	// In-progress discovery tracking (prevents duplicate concurrent discoveries)
+	inProgressMu sync.Mutex
+	inProgress   map[string]*discoveryInProgress
+}
+
+// discoveryInProgress tracks an ongoing discovery operation.
+// Multiple callers can wait on the done channel for completion.
+type discoveryInProgress struct {
+	done   chan struct{}      // Closed when discovery completes
+	result *ResourceDiscovery // Result after completion
+	err    error              // Error after completion
+}
+
+// analysisCacheEntry holds a cached AI analysis result with its timestamp.
+type analysisCacheEntry struct {
+	result   *AIAnalysisResponse
+	cachedAt time.Time
+}
+
+// Config holds discovery service configuration.
+type Config struct {
+	DataDir         string
+	Interval        time.Duration // How often to run fingerprint collection (default 5 min)
+	CacheExpiry     time.Duration // How long to cache AI analysis results
+	DeepScanTimeout time.Duration // Timeout for individual deep scans (default 60s)
+
+	// Fingerprint-based discovery settings
+	MaxDiscoveryAge     time.Duration // Rediscover after this duration (default 30 days)
+	FingerprintInterval time.Duration // How often to collect fingerprints (default 5 min)
+}
+
+// DefaultConfig returns the default discovery configuration.
+func DefaultConfig() Config {
+	return Config{
+		Interval:            5 * time.Minute, // Fingerprint collection interval
+		CacheExpiry:         1 * time.Hour,
+		DeepScanTimeout:     60 * time.Second,
+		MaxDiscoveryAge:     30 * 24 * time.Hour, // 30 days
+		FingerprintInterval: 5 * time.Minute,
+	}
+}
+
+// NewService creates a new discovery service.
+func NewService(store *Store, scanner *DeepScanner, stateProvider StateProvider, cfg Config) *Service {
+	if cfg.Interval == 0 {
+		cfg.Interval = 5 * time.Minute
+	}
+	if cfg.CacheExpiry == 0 {
+		cfg.CacheExpiry = 1 * time.Hour
+	}
+	if cfg.DeepScanTimeout == 0 {
+		cfg.DeepScanTimeout = 60 * time.Second
+	}
+	if cfg.MaxDiscoveryAge == 0 {
+		cfg.MaxDiscoveryAge = 30 * 24 * time.Hour // 30 days
+	}
+
+	return &Service{
+		store:           store,
+		scanner:         scanner,
+		stateProvider:   stateProvider,
+		interval:        cfg.Interval,
+		initialDelay:    30 * time.Second,
+		cacheExpiry:     cfg.CacheExpiry,
+		deepScanTimeout: cfg.DeepScanTimeout,
+		maxDiscoveryAge: cfg.MaxDiscoveryAge,
+		stopCh:          make(chan struct{}),
+		intervalCh:      make(chan time.Duration, 1), // Buffered to prevent blocking
+		analysisCache:   make(map[string]*analysisCacheEntry),
+		inProgress:      make(map[string]*discoveryInProgress),
+	}
+}
+
+// SetAIAnalyzer sets the AI analyzer for discovery.
+func (s *Service) SetAIAnalyzer(analyzer AIAnalyzer) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	s.aiAnalyzer = analyzer
+}
+
+// Start begins the background discovery service.
+func (s *Service) Start(ctx context.Context) {
+	s.mu.Lock()
+	if s.running {
+		s.mu.Unlock()
+		return
+	}
+	s.running = true
+	s.stopCh = make(chan struct{})
+	s.mu.Unlock()
+
+	log.Info().
+		Dur("interval", s.interval).
+		Msg("Starting infrastructure discovery service")
+
+	go s.discoveryLoop(ctx)
+}
+
+// Stop stops the background discovery service.
+func (s *Service) Stop() {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.running {
+		close(s.stopCh)
+		s.running = false
+	}
+}
+
+// SetInterval updates the scan interval. Takes effect immediately if running.
+func (s *Service) SetInterval(interval time.Duration) {
+	s.mu.Lock()
+	s.interval = interval
+	running := s.running
+	s.mu.Unlock()
+
+	// If running, send the new interval to the loop (non-blocking)
+	if running {
+		select {
+		case s.intervalCh <- interval:
+			log.Info().Dur("interval", interval).Msg("Discovery interval updated (live)")
+		default:
+			// Channel full, interval will be picked up eventually
+			log.Debug().Dur("interval", interval).Msg("Discovery interval updated (pending)")
+		}
+	}
+}
+
+// needsDeepScan determines if a discovery result needs a deep scan based on quality.
+// Returns true if the discovery is incomplete or low-confidence.
+func (s *Service) needsDeepScan(discovery *ResourceDiscovery) bool {
+	if discovery == nil {
+		return true // No discovery at all
+	}
+
+	// Already has deep scan data (raw command outputs)
+	if len(discovery.RawCommandOutput) > 0 {
+		return false
+	}
+
+	// Low confidence - needs more investigation
+	if discovery.Confidence < 0.7 {
+		return true
+	}
+
+	// Unknown service type
+	if discovery.ServiceType == "" || discovery.ServiceType == "unknown" {
+		return true
+	}
+
+	// Missing key paths that deep scan could discover
+	if len(discovery.Facts) == 0 && len(discovery.ConfigPaths) == 0 && len(discovery.LogPaths) == 0 {
+		return true
+	}
+
+	return false
+}
+
+// SetWSHub sets the WebSocket hub for broadcasting progress updates.
+func (s *Service) SetWSHub(hub WSBroadcaster) {
+	s.mu.Lock()
+	s.wsHub = hub
+	s.mu.Unlock()
+
+	// Wire up the scanner's progress callback to broadcast via WebSocket
+	if s.scanner != nil {
+		s.scanner.SetProgressCallback(s.broadcastProgress)
+	}
+
+	log.Info().Msg("WebSocket hub connected to discovery service")
+}
+
+// broadcastProgress broadcasts discovery progress to all WebSocket clients.
+func (s *Service) broadcastProgress(progress *DiscoveryProgress) {
+	s.mu.RLock()
+	hub := s.wsHub
+	s.mu.RUnlock()
+
+	if hub == nil || progress == nil {
+		return
+	}
+
+	hub.BroadcastDiscoveryProgress(progress)
+}
+
+// IsRunning returns whether the background discovery loop is active.
+func (s *Service) IsRunning() bool {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	return s.running
+}
+
+// discoveryLoop runs periodic fingerprint collection (NOT actual discovery).
+// This is the new fingerprint-based approach: background loop only collects fingerprints
+// to detect changes. Discovery only runs on-demand when data is actually needed.
+func (s *Service) discoveryLoop(ctx context.Context) {
+	delay := s.initialDelay
+	if delay <= 0 {
+		delay = 30 * time.Second
+	}
+
+	// Run initial fingerprint collection after a short delay
+	select {
+	case <-time.After(delay):
+	case <-s.stopCh:
+		return
+	case <-ctx.Done():
+		return
+	}
+
+	s.collectFingerprints(ctx)
+
+	s.mu.RLock()
+	currentInterval := s.interval
+	s.mu.RUnlock()
+
+	ticker := time.NewTicker(currentInterval)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ticker.C:
+			s.collectFingerprints(ctx)
+		case newInterval := <-s.intervalCh:
+			// Interval changed - reset the ticker
+			ticker.Stop()
+			ticker = time.NewTicker(newInterval)
+			log.Info().Dur("interval", newInterval).Msg("Fingerprint collection interval reset")
+		case <-s.stopCh:
+			log.Info().Msg("Stopping discovery service")
+			return
+		case <-ctx.Done():
+			log.Info().Msg("Discovery context cancelled")
+			return
+		}
+	}
+}
+
+// collectFingerprints collects fingerprints from all resources (Docker, LXC, VM).
+// This is FREE (no AI calls) - it just hashes metadata to detect changes.
+func (s *Service) collectFingerprints(ctx context.Context) {
+	defer func() {
+		if r := recover(); r != nil {
+			log.Error().Interface("panic", r).Stack().Msg("Recovered from panic in fingerprint collection")
+		}
+	}()
+
+	s.mu.Lock()
+	s.lastRun = time.Now()
+	s.mu.Unlock()
+
+	if s.stateProvider == nil {
+		return
+	}
+
+	state := s.stateProvider.GetState()
+	changedCount := 0
+	newCount := 0
+
+	// Process Docker containers
+	for _, host := range state.DockerHosts {
+		for _, container := range host.Containers {
+			select {
+			case <-ctx.Done():
+				return
+			default:
+			}
+
+			// Generate new fingerprint (prefixed with docker: to avoid collisions)
+			newFP := GenerateDockerFingerprint(host.AgentID, &container)
+			fpKey := "docker:" + host.AgentID + ":" + newFP.ResourceID
+
+			// Get previous fingerprint
+			oldFP, _ := s.store.GetFingerprint(fpKey)
+
+			// Update the fingerprint's ResourceID to include prefix for storage
+			newFP.ResourceID = fpKey
+
+			// Save new fingerprint
+			if err := s.store.SaveFingerprint(newFP); err != nil {
+				log.Warn().Err(err).Str("container", container.Name).Msg("Failed to save Docker fingerprint")
+				continue
+			}
+
+			// Check if this is new or changed
+			if oldFP == nil {
+				newCount++
+				log.Debug().
+					Str("type", "docker").
+					Str("container", container.Name).
+					Str("hash", newFP.Hash).
+					Msg("New fingerprint captured")
+			} else if newFP.HasSchemaChanged(oldFP) {
+				// Schema changed - don't count as "changed" to avoid mass rediscovery
+				log.Debug().
+					Str("type", "docker").
+					Str("container", container.Name).
+					Int("old_schema", oldFP.SchemaVersion).
+					Int("new_schema", newFP.SchemaVersion).
+					Msg("Fingerprint schema updated")
+			} else if oldFP.Hash != newFP.Hash {
+				changedCount++
+				log.Info().
+					Str("type", "docker").
+					Str("container", container.Name).
+					Str("old_hash", oldFP.Hash).
+					Str("new_hash", newFP.Hash).
+					Msg("Fingerprint changed - discovery will run on next request")
+			}
+		}
+	}
+
+	// Process LXC containers
+	for _, lxc := range state.Containers {
+		select {
+		case <-ctx.Done():
+			return
+		default:
+		}
+
+		// Generate new fingerprint
+		newFP := GenerateLXCFingerprint(lxc.Node, &lxc)
+		fpKey := "lxc:" + lxc.Node + ":" + newFP.ResourceID
+
+		// Get previous fingerprint
+		oldFP, _ := s.store.GetFingerprint(fpKey)
+
+		// Update the fingerprint's ResourceID to include prefix for storage
+		newFP.ResourceID = fpKey
+
+		// Save new fingerprint
+		if err := s.store.SaveFingerprint(newFP); err != nil {
+			log.Warn().Err(err).Str("lxc", lxc.Name).Msg("Failed to save LXC fingerprint")
+			continue
+		}
+
+		// Check if this is new or changed
+		if oldFP == nil {
+			newCount++
+			log.Debug().
+				Str("type", "lxc").
+				Str("name", lxc.Name).
+				Int("vmid", lxc.VMID).
+				Str("hash", newFP.Hash).
+				Msg("New fingerprint captured")
+		} else if newFP.HasSchemaChanged(oldFP) {
+			log.Debug().
+				Str("type", "lxc").
+				Str("name", lxc.Name).
+				Int("vmid", lxc.VMID).
+				Int("old_schema", oldFP.SchemaVersion).
+				Int("new_schema", newFP.SchemaVersion).
+				Msg("Fingerprint schema updated")
+		} else if oldFP.Hash != newFP.Hash {
+			changedCount++
+			log.Info().
+				Str("type", "lxc").
+				Str("name", lxc.Name).
+				Int("vmid", lxc.VMID).
+				Str("old_hash", oldFP.Hash).
+				Str("new_hash", newFP.Hash).
+				Msg("Fingerprint changed - discovery will run on next request")
+		}
+	}
+
+	// Process VMs
+	for _, vm := range state.VMs {
+		select {
+		case <-ctx.Done():
+			return
+		default:
+		}
+
+		// Generate new fingerprint
+		newFP := GenerateVMFingerprint(vm.Node, &vm)
+		fpKey := "vm:" + vm.Node + ":" + newFP.ResourceID
+
+		// Get previous fingerprint
+		oldFP, _ := s.store.GetFingerprint(fpKey)
+
+		// Update the fingerprint's ResourceID to include prefix for storage
+		newFP.ResourceID = fpKey
+
+		// Save new fingerprint
+		if err := s.store.SaveFingerprint(newFP); err != nil {
+			log.Warn().Err(err).Str("vm", vm.Name).Msg("Failed to save VM fingerprint")
+			continue
+		}
+
+		// Check if this is new or changed
+		if oldFP == nil {
+			newCount++
+			log.Debug().
+				Str("type", "vm").
+				Str("name", vm.Name).
+				Int("vmid", vm.VMID).
+				Str("hash", newFP.Hash).
+				Msg("New fingerprint captured")
+		} else if newFP.HasSchemaChanged(oldFP) {
+			log.Debug().
+				Str("type", "vm").
+				Str("name", vm.Name).
+				Int("vmid", vm.VMID).
+				Int("old_schema", oldFP.SchemaVersion).
+				Int("new_schema", newFP.SchemaVersion).
+				Msg("Fingerprint schema updated")
+		} else if oldFP.Hash != newFP.Hash {
+			changedCount++
+			log.Info().
+				Str("type", "vm").
+				Str("name", vm.Name).
+				Int("vmid", vm.VMID).
+				Str("old_hash", oldFP.Hash).
+				Str("new_hash", newFP.Hash).
+				Msg("Fingerprint changed - discovery will run on next request")
+		}
+	}
+
+	// Process Kubernetes pods
+	for _, cluster := range state.KubernetesClusters {
+		for _, pod := range cluster.Pods {
+			select {
+			case <-ctx.Done():
+				return
+			default:
+			}
+
+			// Generate new fingerprint
+			newFP := GenerateK8sPodFingerprint(cluster.ID, &pod)
+			fpKey := "k8s:" + cluster.ID + ":" + pod.Namespace + "/" + pod.Name
+
+			// Get previous fingerprint
+			oldFP, _ := s.store.GetFingerprint(fpKey)
+
+			// Update the fingerprint's ResourceID to include prefix for storage
+			newFP.ResourceID = fpKey
+
+			// Save new fingerprint
+			if err := s.store.SaveFingerprint(newFP); err != nil {
+				log.Warn().Err(err).Str("pod", pod.Name).Str("namespace", pod.Namespace).Msg("Failed to save K8s pod fingerprint")
+				continue
+			}
+
+			// Check if this is new or changed
+			if oldFP == nil {
+				newCount++
+				log.Debug().
+					Str("type", "k8s").
+					Str("name", pod.Name).
+					Str("namespace", pod.Namespace).
+					Str("cluster", cluster.Name).
+					Str("hash", newFP.Hash).
+					Msg("New fingerprint captured")
+			} else if newFP.HasSchemaChanged(oldFP) {
+				log.Debug().
+					Str("type", "k8s").
+					Str("name", pod.Name).
+					Str("namespace", pod.Namespace).
+					Str("cluster", cluster.Name).
+					Int("old_schema", oldFP.SchemaVersion).
+					Int("new_schema", newFP.SchemaVersion).
+					Msg("Fingerprint schema updated")
+			} else if oldFP.Hash != newFP.Hash {
+				changedCount++
+				log.Info().
+					Str("type", "k8s").
+					Str("name", pod.Name).
+					Str("namespace", pod.Namespace).
+					Str("cluster", cluster.Name).
+					Str("old_hash", oldFP.Hash).
+					Str("new_hash", newFP.Hash).
+					Msg("Fingerprint changed - discovery will run on next request")
+			}
+		}
+	}
+
+	// Update last scan time
+	s.store.SetLastFingerprintScan(time.Now())
+
+	if newCount > 0 || changedCount > 0 {
+		log.Info().
+			Int("new", newCount).
+			Int("changed", changedCount).
+			Int("total", s.store.GetFingerprintCount()).
+			Msg("Fingerprint collection complete")
+	} else {
+		log.Debug().
+			Int("total", s.store.GetFingerprintCount()).
+			Msg("Fingerprint collection complete - no changes")
+	}
+
+	// Cleanup orphaned data (fingerprints/discoveries for removed resources)
+	s.cleanupOrphanedData(state)
+}
+
+// cleanupOrphanedData removes fingerprints and discoveries for resources that no longer exist.
+func (s *Service) cleanupOrphanedData(state StateSnapshot) {
+	// Safety check: Don't cleanup if state appears empty
+	// This prevents catastrophic deletion if state provider has an error
+	totalResources := len(state.Containers) + len(state.VMs) + len(state.KubernetesClusters)
+	for _, host := range state.DockerHosts {
+		totalResources += len(host.Containers)
+	}
+	if totalResources == 0 {
+		log.Debug().Msg("Skipping orphaned data cleanup - state is empty (may be an error)")
+		return
+	}
+
+	// Build set of current resource IDs
+	currentIDs := make(map[string]bool)
+
+	// Docker containers
+	for _, host := range state.DockerHosts {
+		for _, container := range host.Containers {
+			fpKey := "docker:" + host.AgentID + ":" + container.Name
+			currentIDs[fpKey] = true
+		}
+	}
+
+	// LXC containers
+	for _, lxc := range state.Containers {
+		fpKey := "lxc:" + lxc.Node + ":" + strconv.Itoa(lxc.VMID)
+		currentIDs[fpKey] = true
+	}
+
+	// VMs
+	for _, vm := range state.VMs {
+		fpKey := "vm:" + vm.Node + ":" + strconv.Itoa(vm.VMID)
+		currentIDs[fpKey] = true
+	}
+
+	// Kubernetes pods
+	for _, cluster := range state.KubernetesClusters {
+		for _, pod := range cluster.Pods {
+			fpKey := "k8s:" + cluster.ID + ":" + pod.Namespace + "/" + pod.Name
+			currentIDs[fpKey] = true
+		}
+	}
+
+	// Run cleanup
+	fpRemoved := s.store.CleanupOrphanedFingerprints(currentIDs)
+	discRemoved := s.store.CleanupOrphanedDiscoveries(currentIDs)
+
+	if fpRemoved > 0 || discRemoved > 0 {
+		log.Info().
+			Int("fingerprints_removed", fpRemoved).
+			Int("discoveries_removed", discRemoved).
+			Msg("Cleaned up orphaned data")
+	}
+}
+
+// discoverDockerContainers runs discovery on Docker containers using metadata.
+// Automatically runs deep scans when the shallow scan results are incomplete or low-confidence.
+func (s *Service) discoverDockerContainers(ctx context.Context, hosts []DockerHost) {
+	s.mu.RLock()
+	analyzer := s.aiAnalyzer
+	s.mu.RUnlock()
+
+	if analyzer == nil {
+		log.Debug().Msg("AI analyzer not set, skipping Docker discovery")
+		return
+	}
+
+	for _, host := range hosts {
+		for _, container := range host.Containers {
+			select {
+			case <-ctx.Done():
+				return
+			default:
+			}
+
+			// Build resource ID
+			id := MakeResourceID(ResourceTypeDocker, host.AgentID, container.Name)
+
+			// Check if we already have a recent discovery
+			if !s.store.NeedsRefresh(id, s.cacheExpiry) {
+				continue
+			}
+
+			// Check existing discovery to see if it needs a deep scan
+			existing, _ := s.store.Get(id)
+
+			// Analyze using metadata (shallow discovery)
+			discovery := s.analyzeDockerContainer(ctx, analyzer, container, host)
+			if discovery != nil {
+				// Smart auto deep scan: enhance if discovery is incomplete or low-confidence
+				// Also deep scan if there's no existing discovery (first time)
+				if s.scanner != nil && (existing == nil || s.needsDeepScan(discovery)) {
+					log.Info().
+						Str("id", id).
+						Float64("confidence", discovery.Confidence).
+						Str("serviceType", discovery.ServiceType).
+						Bool("firstDiscovery", existing == nil).
+						Msg("Auto deep scan triggered due to incomplete discovery")
+					discovery = s.enhanceWithDeepScan(ctx, discovery, host)
+				}
+
+				if err := s.store.Save(discovery); err != nil {
+					log.Warn().Err(err).Str("id", id).Msg("Failed to save discovery")
+				}
+			}
+		}
+	}
+}
+
+// enhanceWithDeepScan runs a deep scan and merges the results into the discovery.
+func (s *Service) enhanceWithDeepScan(ctx context.Context, discovery *ResourceDiscovery, host DockerHost) *ResourceDiscovery {
+	s.mu.RLock()
+	timeout := s.deepScanTimeout
+	analyzer := s.aiAnalyzer
+	s.mu.RUnlock()
+
+	if s.scanner == nil || analyzer == nil {
+		return discovery
+	}
+
+	// Create a timeout context for the deep scan
+	scanCtx, cancel := context.WithTimeout(ctx, timeout)
+	defer cancel()
+
+	req := DiscoveryRequest{
+		ResourceType: discovery.ResourceType,
+		ResourceID:   discovery.ResourceID,
+		HostID:       discovery.HostID,
+		Hostname:     discovery.Hostname,
+	}
+
+	scanResult, err := s.scanner.Scan(scanCtx, req)
+	if err != nil {
+		log.Debug().Err(err).Str("id", discovery.ID).Msg("Deep scan failed during background discovery")
+		return discovery
+	}
+
+	if len(scanResult.CommandOutputs) == 0 {
+		return discovery
+	}
+
+	// Build analysis request with command outputs
+	analysisReq := AIAnalysisRequest{
+		ResourceType:   discovery.ResourceType,
+		ResourceID:     discovery.ResourceID,
+		HostID:         discovery.HostID,
+		Hostname:       discovery.Hostname,
+		CommandOutputs: scanResult.CommandOutputs,
+	}
+
+	// Add metadata if available
+	if s.stateProvider != nil {
+		analysisReq.Metadata = s.getResourceMetadata(req)
+	}
+
+	// Build prompt and analyze
+	prompt := s.buildDeepAnalysisPrompt(analysisReq)
+	response, err := analyzer.AnalyzeForDiscovery(scanCtx, prompt)
+	if err != nil {
+		log.Debug().Err(err).Str("id", discovery.ID).Msg("Deep analysis failed during background discovery")
+		return discovery
+	}
+
+	result := s.parseAIResponse(response)
+	if result == nil {
+		return discovery
+	}
+
+	// Merge results - deep scan results take precedence for non-empty fields
+	if result.ServiceType != "" && result.ServiceType != "unknown" {
+		discovery.ServiceType = result.ServiceType
+	}
+	if result.ServiceName != "" {
+		discovery.ServiceName = result.ServiceName
+	}
+	if result.ServiceVersion != "" {
+		discovery.ServiceVersion = result.ServiceVersion
+	}
+	if result.Category != "" && result.Category != CategoryUnknown {
+		discovery.Category = result.Category
+	}
+	if result.CLIAccess != "" {
+		discovery.CLIAccess = s.formatCLIAccess(discovery.ResourceType, discovery.ResourceID, result.CLIAccess)
+	}
+	if len(result.Facts) > 0 {
+		discovery.Facts = result.Facts
+	}
+	if len(result.ConfigPaths) > 0 {
+		discovery.ConfigPaths = result.ConfigPaths
+	}
+	if len(result.DataPaths) > 0 {
+		discovery.DataPaths = result.DataPaths
+	}
+	if len(result.LogPaths) > 0 {
+		discovery.LogPaths = result.LogPaths
+	}
+	if len(result.Ports) > 0 {
+		discovery.Ports = result.Ports
+	}
+	if result.Confidence > discovery.Confidence {
+		discovery.Confidence = result.Confidence
+	}
+	if result.Reasoning != "" {
+		discovery.AIReasoning = result.Reasoning
+	}
+
+	// Store raw command outputs
+	discovery.RawCommandOutput = scanResult.CommandOutputs
+	discovery.ScanDuration = scanResult.CompletedAt.Sub(scanResult.StartedAt).Milliseconds()
+	discovery.UpdatedAt = time.Now()
+
+	// Parse docker_mounts if present (for LXCs/VMs running Docker)
+	if dockerMountsOutput, ok := scanResult.CommandOutputs["docker_mounts"]; ok {
+		discovery.DockerMounts = parseDockerMounts(dockerMountsOutput)
+		if len(discovery.DockerMounts) > 0 {
+			log.Debug().
+				Str("id", discovery.ID).
+				Int("mountCount", len(discovery.DockerMounts)).
+				Msg("Parsed Docker bind mounts from discovery")
+		}
+	}
+
+	log.Info().
+		Str("id", discovery.ID).
+		Int("commandOutputs", len(scanResult.CommandOutputs)).
+		Int("dockerMounts", len(discovery.DockerMounts)).
+		Dur("scanDuration", scanResult.CompletedAt.Sub(scanResult.StartedAt)).
+		Msg("Enhanced discovery with deep scan")
+
+	return discovery
+}
+
+// analyzeDockerContainer analyzes a Docker container using AI.
+func (s *Service) analyzeDockerContainer(ctx context.Context, analyzer AIAnalyzer, c DockerContainer, host DockerHost) *ResourceDiscovery {
+	// Check cache first (per-image timestamp)
+	s.cacheMu.RLock()
+	entry, found := s.analysisCache[c.Image]
+	cacheValid := found && time.Since(entry.cachedAt) < s.cacheExpiry
+	s.cacheMu.RUnlock()
+
+	var result *AIAnalysisResponse
+
+	if cacheValid {
+		result = entry.result
+	} else {
+		// Build prompt for AI analysis
+		prompt := s.buildMetadataAnalysisPrompt(c, host)
+
+		response, err := analyzer.AnalyzeForDiscovery(ctx, prompt)
+		if err != nil {
+			log.Warn().Err(err).Str("container", c.Name).Msg("AI analysis failed")
+			return nil
+		}
+
+		result = s.parseAIResponse(response)
+		if result == nil {
+			log.Warn().Str("container", c.Name).Msg("Failed to parse AI response")
+			return nil
+		}
+
+		// Cache the result with its own timestamp
+		s.cacheMu.Lock()
+		s.analysisCache[c.Image] = &analysisCacheEntry{
+			result:   result,
+			cachedAt: time.Now(),
+		}
+		s.cacheMu.Unlock()
+	}
+
+	// Skip unknown/low-confidence results
+	if result.ServiceType == "unknown" || result.Confidence < 0.5 {
+		return nil
+	}
+
+	// Build CLI access string
+	cliAccess := result.CLIAccess
+	if cliAccess != "" {
+		cliAccess = strings.ReplaceAll(cliAccess, "{container}", c.Name)
+	}
+
+	// Extract ports
+	var ports []PortInfo
+	for _, p := range c.Ports {
+		ports = append(ports, PortInfo{
+			Port:     p.PrivatePort,
+			Protocol: p.Protocol,
+			Address:  fmt.Sprintf(":%d", p.PublicPort),
+		})
+	}
+
+	return &ResourceDiscovery{
+		ID:             MakeResourceID(ResourceTypeDocker, host.AgentID, c.Name),
+		ResourceType:   ResourceTypeDocker,
+		ResourceID:     c.Name,
+		HostID:         host.AgentID,
+		Hostname:       host.Hostname,
+		ServiceType:    result.ServiceType,
+		ServiceName:    result.ServiceName,
+		ServiceVersion: result.ServiceVersion,
+		Category:       result.Category,
+		CLIAccess:      cliAccess,
+		Facts:          result.Facts,
+		ConfigPaths:    result.ConfigPaths,
+		DataPaths:      result.DataPaths,
+		LogPaths:       result.LogPaths,
+		Ports:          ports,
+		Confidence:     result.Confidence,
+		AIReasoning:    result.Reasoning,
+		DiscoveredAt:   time.Now(),
+		UpdatedAt:      time.Now(),
+	}
+}
+
+// DiscoverResource performs deep discovery on a specific resource.
+// Uses fingerprint-based detection to avoid unnecessary AI calls:
+// - Returns cached discovery if fingerprint hasn't changed
+// - Runs discovery only when fingerprint changed or discovery is too old
+// - Prevents duplicate concurrent discoveries for the same resource
+func (s *Service) DiscoverResource(ctx context.Context, req DiscoveryRequest) (*ResourceDiscovery, error) {
+	resourceID := MakeResourceID(req.ResourceType, req.HostID, req.ResourceID)
+
+	// Get current fingerprint (if available)
+	// Fingerprint key matches the resource ID format: type:host:id
+	currentFP, _ := s.store.GetFingerprint(resourceID)
+
+	// Get existing discovery
+	existing, _ := s.store.Get(resourceID)
+
+	// Determine if we need to run discovery
+	needsDiscovery := false
+	reason := ""
+
+	if req.Force {
+		needsDiscovery = true
+		reason = "forced"
+	} else if existing == nil {
+		needsDiscovery = true
+		reason = "no existing discovery"
+	} else if currentFP != nil && existing.Fingerprint != currentFP.Hash {
+		// Fingerprint hash differs - check if it's just a schema version change
+		if existing.FingerprintSchemaVersion != 0 && existing.FingerprintSchemaVersion != currentFP.SchemaVersion {
+			// Schema changed but container didn't - don't trigger rediscovery
+			// This prevents mass rediscovery when we upgrade the fingerprint algorithm
+			log.Debug().
+				Str("id", resourceID).
+				Int("old_schema", existing.FingerprintSchemaVersion).
+				Int("new_schema", currentFP.SchemaVersion).
+				Msg("Fingerprint schema changed, but not triggering rediscovery")
+		} else {
+			// Same schema version, different hash = real container change
+			needsDiscovery = true
+			reason = "fingerprint changed"
+		}
+	} else if time.Since(existing.DiscoveredAt) > s.maxDiscoveryAge {
+		needsDiscovery = true
+		reason = "discovery too old"
+	}
+
+	// Return cached discovery if still valid
+	if !needsDiscovery && existing != nil {
+		log.Debug().Str("id", resourceID).Msg("Discovery still valid, returning cached")
+		return existing, nil
+	}
+
+	// Check for duplicate concurrent discovery requests
+	s.inProgressMu.Lock()
+	if inProg, ok := s.inProgress[resourceID]; ok {
+		// Discovery already in progress - wait for it
+		s.inProgressMu.Unlock()
+		log.Debug().Str("id", resourceID).Msg("Discovery already in progress, waiting for result")
+
+		select {
+		case <-inProg.done:
+			return inProg.result, inProg.err
+		case <-ctx.Done():
+			return nil, ctx.Err()
+		}
+	}
+
+	// Claim this discovery slot
+	inProg := &discoveryInProgress{
+		done: make(chan struct{}),
+	}
+	s.inProgress[resourceID] = inProg
+	s.inProgressMu.Unlock()
+
+	// Ensure we clean up and notify waiters when done
+	defer func() {
+		close(inProg.done)
+		s.inProgressMu.Lock()
+		delete(s.inProgress, resourceID)
+		s.inProgressMu.Unlock()
+	}()
+
+	log.Info().Str("id", resourceID).Str("reason", reason).Msg("Running discovery")
+
+	s.mu.RLock()
+	analyzer := s.aiAnalyzer
+	s.mu.RUnlock()
+
+	if analyzer == nil {
+		inProg.err = fmt.Errorf("AI analyzer not configured")
+		return nil, inProg.err
+	}
+
+	// Run deep scan if scanner is available
+	var scanResult *ScanResult
+	if s.scanner != nil {
+		var err error
+		scanResult, err = s.scanner.Scan(ctx, req)
+		if err != nil {
+			log.Warn().Err(err).Str("id", resourceID).Msg("Deep scan failed, using metadata only")
+		}
+	}
+
+	// Build analysis request
+	analysisReq := AIAnalysisRequest{
+		ResourceType: req.ResourceType,
+		ResourceID:   req.ResourceID,
+		HostID:       req.HostID,
+		Hostname:     req.Hostname,
+	}
+
+	if scanResult != nil {
+		analysisReq.CommandOutputs = scanResult.CommandOutputs
+	}
+
+	// Add metadata if available
+	if s.stateProvider != nil {
+		analysisReq.Metadata = s.getResourceMetadata(req)
+	}
+
+	// Build prompt and analyze
+	prompt := s.buildDeepAnalysisPrompt(analysisReq)
+	response, err := analyzer.AnalyzeForDiscovery(ctx, prompt)
+	if err != nil {
+		inProg.err = fmt.Errorf("AI analysis failed: %w", err)
+		return nil, inProg.err
+	}
+
+	result := s.parseAIResponse(response)
+	if result == nil {
+		// Truncate response for error message
+		truncated := response
+		if len(truncated) > 500 {
+			truncated = truncated[:500] + "..."
+		}
+		inProg.err = fmt.Errorf("failed to parse AI response: %s", truncated)
+		return nil, inProg.err
+	}
+
+	// Resolve hostname from metadata if not provided in request
+	hostname := req.Hostname
+	if hostname == "" && analysisReq.Metadata != nil {
+		if name, ok := analysisReq.Metadata["name"].(string); ok && name != "" {
+			hostname = name
+		}
+	}
+
+	// Build discovery result
+	discovery := &ResourceDiscovery{
+		ID:               resourceID,
+		ResourceType:     req.ResourceType,
+		ResourceID:       req.ResourceID,
+		HostID:           req.HostID,
+		Hostname:         hostname,
+		ServiceType:      result.ServiceType,
+		ServiceName:      result.ServiceName,
+		ServiceVersion:   result.ServiceVersion,
+		Category:         result.Category,
+		CLIAccess:        s.formatCLIAccess(req.ResourceType, req.ResourceID, result.CLIAccess),
+		CLIAccessVersion: CLIAccessVersion,
+		Facts:            result.Facts,
+		ConfigPaths:      result.ConfigPaths,
+		DataPaths:        result.DataPaths,
+		LogPaths:         result.LogPaths,
+		Ports:            result.Ports,
+		Confidence:       result.Confidence,
+		AIReasoning:      result.Reasoning,
+		DiscoveredAt:     time.Now(),
+		UpdatedAt:        time.Now(),
+	}
+
+	// Store fingerprint with discovery
+	if currentFP != nil {
+		discovery.Fingerprint = currentFP.Hash
+		discovery.FingerprintedAt = currentFP.GeneratedAt
+		discovery.FingerprintSchemaVersion = currentFP.SchemaVersion
+	}
+
+	if scanResult != nil {
+		discovery.RawCommandOutput = scanResult.CommandOutputs
+		discovery.ScanDuration = scanResult.CompletedAt.Sub(scanResult.StartedAt).Milliseconds()
+
+		// Parse docker_mounts if present (for LXCs/VMs running Docker)
+		if dockerMountsOutput, ok := scanResult.CommandOutputs["docker_mounts"]; ok {
+			discovery.DockerMounts = parseDockerMounts(dockerMountsOutput)
+			if len(discovery.DockerMounts) > 0 {
+				log.Debug().
+					Str("id", discovery.ID).
+					Int("mountCount", len(discovery.DockerMounts)).
+					Msg("Parsed Docker bind mounts from on-demand discovery")
+			}
+		}
+	}
+
+	// Preserve user notes from existing discovery
+	if existing != nil {
+		discovery.UserNotes = existing.UserNotes
+		discovery.UserSecrets = existing.UserSecrets
+		if discovery.DiscoveredAt.IsZero() || existing.DiscoveredAt.Before(discovery.DiscoveredAt) {
+			discovery.DiscoveredAt = existing.DiscoveredAt
+		}
+	}
+
+	// Save discovery
+	if err := s.store.Save(discovery); err != nil {
+		inProg.err = fmt.Errorf("failed to save discovery: %w", err)
+		return nil, inProg.err
+	}
+
+	// Store result for any waiting goroutines
+	inProg.result = discovery
+	return discovery, nil
+}
+
+// getResourceMetadata retrieves metadata for a resource from the state.
+func (s *Service) getResourceMetadata(req DiscoveryRequest) map[string]any {
+	if s.stateProvider == nil {
+		return nil
+	}
+
+	state := s.stateProvider.GetState()
+	metadata := make(map[string]any)
+
+	switch req.ResourceType {
+	case ResourceTypeLXC:
+		for _, c := range state.Containers {
+			if fmt.Sprintf("%d", c.VMID) == req.ResourceID && c.Node == req.HostID {
+				metadata["name"] = c.Name
+				metadata["status"] = c.Status
+				metadata["vmid"] = c.VMID
+				break
+			}
+		}
+	case ResourceTypeVM:
+		for _, vm := range state.VMs {
+			if fmt.Sprintf("%d", vm.VMID) == req.ResourceID && vm.Node == req.HostID {
+				metadata["name"] = vm.Name
+				metadata["status"] = vm.Status
+				metadata["vmid"] = vm.VMID
+				break
+			}
+		}
+	case ResourceTypeDocker:
+		for _, host := range state.DockerHosts {
+			if host.AgentID == req.HostID || host.Hostname == req.HostID {
+				for _, c := range host.Containers {
+					if c.Name == req.ResourceID {
+						metadata["image"] = c.Image
+						metadata["status"] = c.Status
+						metadata["labels"] = c.Labels
+						break
+					}
+				}
+				break
+			}
+		}
+	}
+
+	return metadata
+}
+
+// formatCLIAccess formats the CLI access string with actual values.
+func (s *Service) formatCLIAccess(resourceType ResourceType, resourceID, cliTemplate string) string {
+	if cliTemplate == "" {
+		// Use default template
+		cliTemplate = GetCLIAccessTemplate(resourceType)
+	}
+
+	result := cliTemplate
+	result = strings.ReplaceAll(result, "{vmid}", resourceID)
+	result = strings.ReplaceAll(result, "{container}", resourceID)
+	result = strings.ReplaceAll(result, "{command}", "...")
+
+	return result
+}
+
+// buildMetadataAnalysisPrompt builds a prompt for shallow metadata-based analysis.
+func (s *Service) buildMetadataAnalysisPrompt(c DockerContainer, host DockerHost) string {
+	info := map[string]any{
+		"name":   c.Name,
+		"image":  c.Image,
+		"status": c.Status,
+		"host":   host.Hostname,
+	}
+
+	if len(c.Ports) > 0 {
+		var ports []map[string]any
+		for _, p := range c.Ports {
+			ports = append(ports, map[string]any{
+				"public":   p.PublicPort,
+				"private":  p.PrivatePort,
+				"protocol": p.Protocol,
+			})
+		}
+		info["ports"] = ports
+	}
+
+	if len(c.Labels) > 0 {
+		info["labels"] = c.Labels
+	}
+
+	if len(c.Mounts) > 0 {
+		var mounts []string
+		for _, m := range c.Mounts {
+			mounts = append(mounts, m.Destination)
+		}
+		info["mounts"] = mounts
+	}
+
+	infoJSON, _ := json.MarshalIndent(info, "", "  ")
+
+	return fmt.Sprintf(`Analyze this Docker container and identify what service it's running.
+
+Container Information:
+%s
+
+Based on the image name, ports, labels, and mounts, determine:
+1. What service/application is this?
+2. What category does it belong to?
+3. How should CLI commands be executed?
+
+Respond in this exact JSON format:
+{
+  "service_type": "lowercase_type",
+  "service_name": "Human Readable Name",
+  "service_version": "version if detectable from image tag",
+  "category": "database|web_server|cache|monitoring|backup|nvr|storage|container|network|security|media|home_automation|unknown",
+  "cli_access": "docker exec {container} <cli-tool>",
+  "facts": [],
+  "config_paths": [],
+  "data_paths": [],
+  "log_paths": [],
+  "ports": [],
+  "confidence": 0.0-1.0,
+  "reasoning": "Brief explanation"
+}
+
+Respond with ONLY valid JSON.`, string(infoJSON))
+}
+
+// buildDeepAnalysisPrompt builds a prompt for deep analysis with command outputs.
+func (s *Service) buildDeepAnalysisPrompt(req AIAnalysisRequest) string {
+	var sections []string
+
+	sections = append(sections, fmt.Sprintf(`Resource Type: %s
+Resource ID: %s
+Host: %s (%s)`, req.ResourceType, req.ResourceID, req.Hostname, req.HostID))
+
+	if len(req.Metadata) > 0 {
+		metaJSON, _ := json.MarshalIndent(req.Metadata, "", "  ")
+		sections = append(sections, fmt.Sprintf("Metadata:\n%s", string(metaJSON)))
+	}
+
+	if len(req.CommandOutputs) > 0 {
+		sections = append(sections, "Command Outputs:")
+		for name, output := range req.CommandOutputs {
+			// Truncate long outputs
+			if len(output) > 2000 {
+				output = output[:2000] + "\n... (truncated)"
+			}
+			sections = append(sections, fmt.Sprintf("--- %s ---\n%s", name, output))
+		}
+	}
+
+	return fmt.Sprintf(`Analyze this infrastructure resource and provide detailed discovery information.
+
+%s
+
+Based on all available information, determine:
+1. What service/application is running?
+2. What version is it?
+3. What are the important configuration paths?
+4. What data paths should be backed up?
+5. What log paths are useful for troubleshooting?
+6. What ports are in use?
+7. Any special hardware (GPU, TPU, etc.)?
+8. Any dependencies (databases, message queues, etc.)?
+
+Respond in this exact JSON format:
+{
+  "service_type": "lowercase_type (e.g., frigate, postgres, pbs)",
+  "service_name": "Human Readable Name",
+  "service_version": "version number if found",
+  "category": "database|web_server|cache|monitoring|backup|nvr|storage|container|virtualizer|network|security|media|home_automation|unknown",
+  "cli_access": "command to access this service's CLI",
+  "facts": [
+    {"category": "version|config|service|port|hardware|network|storage|dependency|security", "key": "fact_name", "value": "fact_value", "source": "command_name", "confidence": 0.9}
+  ],
+  "config_paths": ["/path/to/config.yml"],
+  "data_paths": ["/path/to/data"],
+  "log_paths": ["/var/log/service/", "/path/to/app.log"],
+  "ports": [{"port": 8080, "protocol": "tcp", "process": "nginx", "address": "0.0.0.0"}],
+  "confidence": 0.0-1.0,
+  "reasoning": "Explanation of identification"
+}
+
+Important:
+- Extract version numbers from package lists, process output, or config files
+- Identify config and data paths from mount points and file listings
+- Identify log paths (e.g., /var/log/, application-specific logs) for troubleshooting
+- Note any special hardware like Coral TPU, NVIDIA GPU
+- For LXC/VM, the CLI access should use pct exec or qm guest exec
+- For Docker, use docker exec
+
+Respond with ONLY valid JSON.`, strings.Join(sections, "\n\n"))
+}
+
+// parseAIResponse parses the AI's JSON response.
+func (s *Service) parseAIResponse(response string) *AIAnalysisResponse {
+	log.Debug().Str("raw_response", response).Msg("Discovery raw response")
+	response = strings.TrimSpace(response)
+
+	// Handle markdown code blocks
+	if strings.HasPrefix(response, "```") {
+		lines := strings.Split(response, "\n")
+		var jsonLines []string
+		inBlock := false
+		for _, line := range lines {
+			if strings.HasPrefix(line, "```") {
+				inBlock = !inBlock
+				continue
+			}
+			if inBlock {
+				jsonLines = append(jsonLines, line)
+			}
+		}
+		response = strings.Join(jsonLines, "\n")
+	}
+
+	// Find JSON object
+	start := strings.Index(response, "{")
+	end := strings.LastIndex(response, "}")
+	if start >= 0 && end > start {
+		response = response[start : end+1]
+	}
+
+	var result AIAnalysisResponse
+	if err := json.Unmarshal([]byte(response), &result); err != nil {
+		log.Debug().Err(err).Str("response", response).Msg("Failed to parse AI response")
+		return nil
+	}
+
+	// Set discovered_at for facts
+	now := time.Now()
+	for i := range result.Facts {
+		result.Facts[i].DiscoveredAt = now
+	}
+
+	return &result
+}
+
+// parseDockerMounts parses the docker_mounts command output into a slice of DockerBindMount.
+// The output format is:
+// CONTAINER:container_name
+// source|destination|type
+// source|destination|type
+// CONTAINER:another_container
+// source|destination|type
+func parseDockerMounts(output string) []DockerBindMount {
+	if output == "" || output == "no_docker_mounts" {
+		return nil
+	}
+
+	var mounts []DockerBindMount
+	var currentContainer string
+
+	lines := strings.Split(output, "\n")
+	for _, line := range lines {
+		line = strings.TrimSpace(line)
+		if line == "" {
+			continue
+		}
+
+		// Check if this is a container header
+		if strings.HasPrefix(line, "CONTAINER:") {
+			currentContainer = strings.TrimPrefix(line, "CONTAINER:")
+			continue
+		}
+
+		// Skip if we don't have a current container
+		if currentContainer == "" {
+			continue
+		}
+
+		// Parse mount line: source|destination|type
+		parts := strings.Split(line, "|")
+		if len(parts) < 2 {
+			continue
+		}
+
+		mount := DockerBindMount{
+			ContainerName: currentContainer,
+			Source:        parts[0],
+			Destination:   parts[1],
+		}
+		if len(parts) >= 3 {
+			mount.Type = parts[2]
+		}
+
+		// Only include bind mounts and volumes (skip tmpfs, etc.)
+		if mount.Type == "" || mount.Type == "bind" || mount.Type == "volume" {
+			mounts = append(mounts, mount)
+		}
+	}
+
+	return mounts
+}
+
+// GetDiscovery retrieves a discovery by ID.
+func (s *Service) GetDiscovery(id string) (*ResourceDiscovery, error) {
+	d, err := s.store.Get(id)
+	if err != nil || d == nil {
+		return d, err
+	}
+	s.upgradeCLIAccessIfNeeded(d)
+	return d, nil
+}
+
+// GetDiscoveryByResource retrieves a discovery by resource type and ID.
+func (s *Service) GetDiscoveryByResource(resourceType ResourceType, hostID, resourceID string) (*ResourceDiscovery, error) {
+	d, err := s.store.GetByResource(resourceType, hostID, resourceID)
+	if err != nil || d == nil {
+		return d, err
+	}
+	s.upgradeCLIAccessIfNeeded(d)
+	return d, nil
+}
+
+// ListDiscoveries returns all discoveries.
+func (s *Service) ListDiscoveries() ([]*ResourceDiscovery, error) {
+	discoveries, err := s.store.List()
+	if err != nil {
+		return nil, err
+	}
+	for _, d := range discoveries {
+		s.upgradeCLIAccessIfNeeded(d)
+	}
+	return discoveries, nil
+}
+
+// ListDiscoveriesByType returns discoveries for a specific resource type.
+func (s *Service) ListDiscoveriesByType(resourceType ResourceType) ([]*ResourceDiscovery, error) {
+	discoveries, err := s.store.ListByType(resourceType)
+	if err != nil {
+		return nil, err
+	}
+	for _, d := range discoveries {
+		s.upgradeCLIAccessIfNeeded(d)
+	}
+	return discoveries, nil
+}
+
+// ListDiscoveriesByHost returns discoveries for a specific host.
+func (s *Service) ListDiscoveriesByHost(hostID string) ([]*ResourceDiscovery, error) {
+	discoveries, err := s.store.ListByHost(hostID)
+	if err != nil {
+		return nil, err
+	}
+	for _, d := range discoveries {
+		s.upgradeCLIAccessIfNeeded(d)
+	}
+	return discoveries, nil
+}
+
+// upgradeDiscoveryIfNeeded upgrades cached discovery fields to current versions.
+// This ensures cached discoveries get the new instructional CLI access format
+// and have hostname populated without requiring a full re-discovery.
+func (s *Service) upgradeCLIAccessIfNeeded(d *ResourceDiscovery) {
+	if d == nil {
+		return
+	}
+
+	upgraded := false
+
+	// Upgrade CLI access if version is outdated
+	if d.CLIAccessVersion < CLIAccessVersion {
+		oldCLI := d.CLIAccess
+		d.CLIAccess = GetCLIAccessTemplate(d.ResourceType)
+		d.CLIAccessVersion = CLIAccessVersion
+		upgraded = true
+
+		log.Debug().
+			Str("id", d.ID).
+			Str("old_cli", oldCLI).
+			Str("new_cli", d.CLIAccess).
+			Int("new_version", CLIAccessVersion).
+			Msg("Upgraded CLI access pattern to new version")
+	}
+
+	// Fix empty hostname by looking up the resource name from state
+	if d.Hostname == "" && s.stateProvider != nil {
+		state := s.stateProvider.GetState()
+		hostname := s.lookupHostnameFromState(d.ResourceType, d.HostID, d.ResourceID, state)
+		if hostname != "" {
+			d.Hostname = hostname
+			upgraded = true
+			log.Debug().
+				Str("id", d.ID).
+				Str("hostname", hostname).
+				Msg("Populated missing hostname from state")
+		}
+	}
+
+	_ = upgraded // Suppress unused variable warning if logging is disabled
+}
+
+// lookupHostnameFromState finds the hostname/name for a resource from state
+func (s *Service) lookupHostnameFromState(resourceType ResourceType, hostID, resourceID string, state StateSnapshot) string {
+	switch resourceType {
+	case ResourceTypeLXC:
+		for _, c := range state.Containers {
+			if fmt.Sprintf("%d", c.VMID) == resourceID && c.Node == hostID {
+				return c.Name
+			}
+		}
+	case ResourceTypeVM:
+		for _, vm := range state.VMs {
+			if fmt.Sprintf("%d", vm.VMID) == resourceID && vm.Node == hostID {
+				return vm.Name
+			}
+		}
+	case ResourceTypeDocker:
+		for _, host := range state.DockerHosts {
+			if host.AgentID == hostID || host.Hostname == hostID {
+				for _, c := range host.Containers {
+					if c.Name == resourceID {
+						return host.Hostname
+					}
+				}
+			}
+		}
+	}
+	return ""
+}
+
+// UpdateNotes updates user notes for a discovery.
+func (s *Service) UpdateNotes(id string, notes string, secrets map[string]string) error {
+	return s.store.UpdateNotes(id, notes, secrets)
+}
+
+// DeleteDiscovery deletes a discovery.
+func (s *Service) DeleteDiscovery(id string) error {
+	return s.store.Delete(id)
+}
+
+// GetProgress returns the progress of an ongoing discovery.
+func (s *Service) GetProgress(resourceID string) *DiscoveryProgress {
+	if s.scanner == nil {
+		return nil
+	}
+	return s.scanner.GetProgress(resourceID)
+}
+
+// GetStatus returns the service status including fingerprint statistics.
+func (s *Service) GetStatus() map[string]any {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+
+	s.cacheMu.RLock()
+	cacheSize := len(s.analysisCache)
+	s.cacheMu.RUnlock()
+
+	// Get fingerprint stats
+	fingerprintCount := 0
+	var lastFingerprintScan time.Time
+	if s.store != nil {
+		fingerprintCount = s.store.GetFingerprintCount()
+		lastFingerprintScan = s.store.GetLastFingerprintScan()
+	}
+
+	return map[string]any{
+		"running":               s.running,
+		"last_run":              s.lastRun,
+		"interval":              s.interval.String(),
+		"cache_size":            cacheSize,
+		"ai_analyzer_set":       s.aiAnalyzer != nil,
+		"scanner_set":           s.scanner != nil,
+		"store_set":             s.store != nil,
+		"deep_scan_timeout":     s.deepScanTimeout.String(),
+		"max_discovery_age":     s.maxDiscoveryAge.String(),
+		"fingerprint_count":     fingerprintCount,
+		"last_fingerprint_scan": lastFingerprintScan,
+	}
+}
+
+// GetMaxDiscoveryAge returns the current max discovery age (staleness threshold).
+func (s *Service) GetMaxDiscoveryAge() time.Duration {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	return s.maxDiscoveryAge
+}
+
+// SetMaxDiscoveryAge updates the max discovery age (staleness threshold).
+// Discoveries older than this duration will be re-run when requested.
+func (s *Service) SetMaxDiscoveryAge(age time.Duration) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	// Enforce minimum of 1 day
+	if age < 24*time.Hour {
+		age = 24 * time.Hour
+	}
+
+	s.maxDiscoveryAge = age
+	log.Info().Dur("max_discovery_age", age).Msg("Max discovery age updated")
+}
+
+// ClearCache clears the AI analysis cache.
+func (s *Service) ClearCache() {
+	s.cacheMu.Lock()
+	defer s.cacheMu.Unlock()
+	s.analysisCache = make(map[string]*analysisCacheEntry)
+}
+
+// --- AI Chat Integration Methods ---
+
+// GetDiscoveryForAIChat returns discovery data for AI chat context.
+// It will run discovery if needed (fingerprint changed or no data exists).
+// This is the just-in-time discovery approach: only call AI when data is actually needed.
+func (s *Service) GetDiscoveryForAIChat(ctx context.Context, resourceType ResourceType, hostID, resourceID string) (*ResourceDiscovery, error) {
+	// This is the same as DiscoverResource but without Force
+	return s.DiscoverResource(ctx, DiscoveryRequest{
+		ResourceType: resourceType,
+		ResourceID:   resourceID,
+		HostID:       hostID,
+		Force:        false, // Let fingerprint logic decide
+	})
+}
+
+// GetDiscoveriesForAIContext returns discoveries for multiple resources.
+// Used when AI chat needs context about the infrastructure.
+// Only runs discovery for resources that actually need it (fingerprint changed).
+func (s *Service) GetDiscoveriesForAIContext(ctx context.Context, resourceIDs []string) ([]*ResourceDiscovery, error) {
+	var results []*ResourceDiscovery
+	for _, id := range resourceIDs {
+		resourceType, hostID, resourceID, err := ParseResourceID(id)
+		if err != nil {
+			log.Debug().Err(err).Str("id", id).Msg("Failed to parse resource ID for AI context")
+			continue
+		}
+		discovery, err := s.GetDiscoveryForAIChat(ctx, resourceType, hostID, resourceID)
+		if err != nil {
+			log.Debug().Err(err).Str("id", id).Msg("Failed to get discovery for AI context")
+			continue
+		}
+		if discovery != nil {
+			results = append(results, discovery)
+		}
+	}
+	return results, nil
+}
+
+// GetChangedResourceCount returns the count of resources whose fingerprint has changed
+// since their last discovery.
+func (s *Service) GetChangedResourceCount() (int, error) {
+	if s.store == nil {
+		return 0, nil
+	}
+	changed, err := s.store.GetChangedResources()
+	if err != nil {
+		return 0, err
+	}
+	return len(changed), nil
+}
+
+// GetStaleResourceCount returns the count of resources whose discovery is older
+// than maxDiscoveryAge.
+func (s *Service) GetStaleResourceCount() (int, error) {
+	if s.store == nil {
+		return 0, nil
+	}
+	stale, err := s.store.GetStaleResources(s.maxDiscoveryAge)
+	if err != nil {
+		return 0, err
+	}
+	return len(stale), nil
+}
diff --git a/internal/servicediscovery/service_test.go b/internal/servicediscovery/service_test.go
new file mode 100644
index 000000000..b4fb0af1b
--- /dev/null
+++ b/internal/servicediscovery/service_test.go
@@ -0,0 +1,797 @@
+package servicediscovery
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+)
+
+type stubAnalyzer struct {
+	mu       sync.Mutex
+	calls    int
+	response string
+}
+
+func (s *stubAnalyzer) AnalyzeForDiscovery(ctx context.Context, prompt string) (string, error) {
+	s.mu.Lock()
+	s.calls++
+	s.mu.Unlock()
+	return s.response, nil
+}
+
+type errorAnalyzer struct{}
+
+func (errorAnalyzer) AnalyzeForDiscovery(ctx context.Context, prompt string) (string, error) {
+	return "", context.Canceled
+}
+
+type stubStateProvider struct {
+	state StateSnapshot
+}
+
+func (s stubStateProvider) GetState() StateSnapshot {
+	return s.state
+}
+
+type panicStateProvider struct{}
+
+func (panicStateProvider) GetState() StateSnapshot {
+	panic("boom")
+}
+
+func TestService_parseAIResponse_Markdown(t *testing.T) {
+	service := &Service{}
+	response := "```json\n{\n  \"service_type\": \"nginx\",\n  \"service_name\": \"Nginx\",\n  \"service_version\": \"1.2\",\n  \"category\": \"web_server\",\n  \"cli_access\": \"docker exec {container} bash\",\n  \"facts\": [{\"category\": \"version\", \"key\": \"nginx\", \"value\": \"1.2\", \"source\": \"cmd\", \"confidence\": 0.9}],\n  \"config_paths\": [\"/etc/nginx/nginx.conf\"],\n  \"data_paths\": [\"/var/www\"],\n  \"ports\": [{\"port\": 80, \"protocol\": \"tcp\", \"process\": \"nginx\", \"address\": \"0.0.0.0\"}],\n  \"confidence\": 0.9,\n  \"reasoning\": \"image name\"\n}\n```"
+
+	parsed := service.parseAIResponse(response)
+	if parsed == nil {
+		t.Fatalf("expected parsed response")
+	}
+	if parsed.ServiceType != "nginx" || parsed.ServiceName != "Nginx" {
+		t.Fatalf("unexpected parsed result: %#v", parsed)
+	}
+	if len(parsed.Facts) != 1 || parsed.Facts[0].DiscoveredAt.IsZero() {
+		t.Fatalf("expected fact timestamp set: %#v", parsed.Facts)
+	}
+
+	if service.parseAIResponse("not json") != nil {
+		t.Fatalf("expected nil for invalid json")
+	}
+}
+
+func TestService_analyzeDockerContainer_CacheAndPorts(t *testing.T) {
+	store, err := NewStore(t.TempDir())
+	if err != nil {
+		t.Fatalf("NewStore error: %v", err)
+	}
+	store.crypto = nil
+	service := NewService(store, nil, nil, Config{CacheExpiry: time.Hour})
+
+	analyzer := &stubAnalyzer{
+		response: `{"service_type":"nginx","service_name":"Nginx","service_version":"1.2","category":"web_server","cli_access":"docker exec {container} nginx -v","facts":[],"config_paths":[],"data_paths":[],"ports":[],"confidence":0.9,"reasoning":"image"}`,
+	}
+
+	container := DockerContainer{
+		Name:   "web",
+		Image:  "nginx:latest",
+		Status: "running",
+		Ports: []DockerPort{
+			{PublicPort: 8080, PrivatePort: 80, Protocol: "tcp"},
+		},
+	}
+	host := DockerHost{
+		AgentID:  "host1",
+		Hostname: "host1",
+	}
+
+	first := service.analyzeDockerContainer(context.Background(), analyzer, container, host)
+	if first == nil {
+		t.Fatalf("expected discovery")
+	}
+	if !strings.Contains(first.CLIAccess, "web") {
+		t.Fatalf("expected cli access to include container name, got %s", first.CLIAccess)
+	}
+	if len(first.Ports) != 1 || first.Ports[0].Port != 80 || first.Ports[0].Address != ":8080" {
+		t.Fatalf("unexpected ports: %#v", first.Ports)
+	}
+
+	second := service.analyzeDockerContainer(context.Background(), analyzer, container, host)
+	if second == nil {
+		t.Fatalf("expected cached discovery")
+	}
+
+	analyzer.mu.Lock()
+	calls := analyzer.calls
+	analyzer.mu.Unlock()
+	if calls != 1 {
+		t.Fatalf("expected analyzer called once, got %d", calls)
+	}
+
+	lowAnalyzer := &stubAnalyzer{
+		response: `{"service_type":"unknown","service_name":"","service_version":"","category":"unknown","cli_access":"","facts":[],"config_paths":[],"data_paths":[],"ports":[],"confidence":0.4,"reasoning":""}`,
+	}
+	lowContainer := DockerContainer{Name: "mystery", Image: "unknown:latest"}
+	if got := service.analyzeDockerContainer(context.Background(), lowAnalyzer, lowContainer, host); got != nil {
+		t.Fatalf("expected low confidence discovery to be skipped")
+	}
+}
+
+func TestService_DiscoverResource_RecentAndNoAnalyzer(t *testing.T) {
+	store, err := NewStore(t.TempDir())
+	if err != nil {
+		t.Fatalf("NewStore error: %v", err)
+	}
+	store.crypto = nil
+	service := NewService(store, nil, nil, DefaultConfig())
+
+	req := DiscoveryRequest{
+		ResourceType: ResourceTypeDocker,
+		ResourceID:   "nginx",
+		HostID:       "host1",
+		Hostname:     "host1",
+	}
+	discovery := &ResourceDiscovery{
+		ID:           MakeResourceID(req.ResourceType, req.HostID, req.ResourceID),
+		ResourceType: req.ResourceType,
+		ResourceID:   req.ResourceID,
+		HostID:       req.HostID,
+		Hostname:     req.Hostname,
+		ServiceName:  "Existing",
+	}
+	if err := store.Save(discovery); err != nil {
+		t.Fatalf("Save error: %v", err)
+	}
+
+	found, err := service.DiscoverResource(context.Background(), req)
+	if err != nil {
+		t.Fatalf("DiscoverResource error: %v", err)
+	}
+	if found == nil || found.ServiceName != "Existing" {
+		t.Fatalf("unexpected discovery: %#v", found)
+	}
+
+	_, err = service.DiscoverResource(context.Background(), DiscoveryRequest{
+		ResourceType: ResourceTypeVM,
+		ResourceID:   "101",
+		HostID:       "node1",
+		Hostname:     "node1",
+		Force:        true,
+	})
+	if err == nil || !strings.Contains(err.Error(), "AI analyzer") {
+		t.Fatalf("expected analyzer error, got %v", err)
+	}
+
+	service.SetAIAnalyzer(errorAnalyzer{})
+	_, err = service.DiscoverResource(context.Background(), DiscoveryRequest{
+		ResourceType: ResourceTypeVM,
+		ResourceID:   "102",
+		HostID:       "node1",
+		Hostname:     "node1",
+		Force:        true,
+	})
+	if err == nil || !strings.Contains(err.Error(), "AI analysis failed") {
+		t.Fatalf("expected analysis error, got %v", err)
+	}
+
+	service.SetAIAnalyzer(&stubAnalyzer{response: "not json"})
+	_, err = service.DiscoverResource(context.Background(), DiscoveryRequest{
+		ResourceType: ResourceTypeVM,
+		ResourceID:   "103",
+		HostID:       "node1",
+		Hostname:     "node1",
+		Force:        true,
+	})
+	if err == nil || !strings.Contains(err.Error(), "failed to parse") {
+		t.Fatalf("expected parse error, got %v", err)
+	}
+}
+
+func TestService_getResourceMetadata(t *testing.T) {
+	state := StateSnapshot{
+		VMs: []VM{
+			{VMID: 101, Name: "vm1", Node: "node1", Status: "running"},
+		},
+		Containers: []Container{
+			{VMID: 201, Name: "lxc1", Node: "node2", Status: "stopped"},
+		},
+		DockerHosts: []DockerHost{
+			{
+				AgentID:  "agent1",
+				Hostname: "dock1",
+				Containers: []DockerContainer{
+					{Name: "redis", Image: "redis:latest", Status: "running", Labels: map[string]string{"tier": "cache"}},
+				},
+			},
+		},
+	}
+
+	service := NewService(nil, nil, stubStateProvider{state: state}, DefaultConfig())
+
+	vmMeta := service.getResourceMetadata(DiscoveryRequest{
+		ResourceType: ResourceTypeVM,
+		ResourceID:   "101",
+		HostID:       "node1",
+	})
+	if vmMeta["name"] != "vm1" || vmMeta["vmid"] != 101 {
+		t.Fatalf("unexpected vm metadata: %#v", vmMeta)
+	}
+
+	lxcMeta := service.getResourceMetadata(DiscoveryRequest{
+		ResourceType: ResourceTypeLXC,
+		ResourceID:   "201",
+		HostID:       "node2",
+	})
+	if lxcMeta["name"] != "lxc1" || lxcMeta["status"] != "stopped" {
+		t.Fatalf("unexpected lxc metadata: %#v", lxcMeta)
+	}
+
+	dockerMeta := service.getResourceMetadata(DiscoveryRequest{
+		ResourceType: ResourceTypeDocker,
+		ResourceID:   "redis",
+		HostID:       "agent1",
+	})
+	if dockerMeta["image"] != "redis:latest" || dockerMeta["status"] != "running" {
+		t.Fatalf("unexpected docker metadata: %#v", dockerMeta)
+	}
+
+	dockerByHost := service.getResourceMetadata(DiscoveryRequest{
+		ResourceType: ResourceTypeDocker,
+		ResourceID:   "redis",
+		HostID:       "dock1",
+	})
+	if dockerByHost["image"] != "redis:latest" {
+		t.Fatalf("unexpected docker hostname metadata: %#v", dockerByHost)
+	}
+}
+
+func TestService_formatCLIAccessAndStatus(t *testing.T) {
+	service := NewService(nil, nil, nil, DefaultConfig())
+	formatted := service.formatCLIAccess(ResourceTypeDocker, "redis", "")
+	// New format is instructional, should mention the container name and pulse_control
+	if !strings.Contains(formatted, "redis") || !strings.Contains(formatted, "docker exec") {
+		t.Fatalf("unexpected cli access: %s", formatted)
+	}
+
+	service.analysisCache = map[string]*analysisCacheEntry{
+		"nginx:latest": {
+			result:   &AIAnalysisResponse{ServiceType: "nginx"},
+			cachedAt: time.Now(),
+		},
+	}
+	service.running = true
+	status := service.GetStatus()
+	if status["running"] != true || status["cache_size"] != 1 {
+		t.Fatalf("unexpected status: %#v", status)
+	}
+
+	service.ClearCache()
+	if len(service.analysisCache) != 0 {
+		t.Fatalf("expected cache cleared")
+	}
+}
+
+func TestService_DefaultsAndSetAnalyzer(t *testing.T) {
+	service := NewService(nil, nil, nil, Config{})
+	if service.interval == 0 || service.cacheExpiry == 0 {
+		t.Fatalf("expected defaults for interval and cache expiry")
+	}
+
+	analyzer := &stubAnalyzer{response: `{}`}
+	service.SetAIAnalyzer(analyzer)
+	if service.aiAnalyzer == nil {
+		t.Fatalf("expected analyzer set")
+	}
+	if service.GetProgress("missing") != nil {
+		t.Fatalf("expected nil progress without scanner")
+	}
+	if service.getResourceMetadata(DiscoveryRequest{}) != nil {
+		t.Fatalf("expected nil metadata without state provider")
+	}
+}
+
+func TestService_FingerprintCollectionAndDiscoveryWrappers(t *testing.T) {
+	store, err := NewStore(t.TempDir())
+	if err != nil {
+		t.Fatalf("NewStore error: %v", err)
+	}
+	store.crypto = nil
+	state := StateSnapshot{
+		DockerHosts: []DockerHost{
+			{
+				AgentID:  "host1",
+				Hostname: "host1",
+				Containers: []DockerContainer{
+					{Name: "web", Image: "nginx:latest", Status: "running"},
+				},
+			},
+		},
+	}
+	service := NewService(store, nil, stubStateProvider{state: state}, DefaultConfig())
+	service.SetAIAnalyzer(&stubAnalyzer{
+		response: `{"service_type":"nginx","service_name":"Nginx","service_version":"1.2","category":"web_server","cli_access":"docker exec {container} nginx -v","facts":[],"config_paths":[],"data_paths":[],"ports":[],"confidence":0.9,"reasoning":"image"}`,
+	})
+
+	// First, collect fingerprints (no AI calls)
+	service.collectFingerprints(context.Background())
+
+	// Verify fingerprint was collected (key format is type:host:id)
+	fp, err := store.GetFingerprint("docker:host1:web")
+	if err != nil {
+		t.Fatalf("GetFingerprint error: %v", err)
+	}
+	if fp == nil {
+		t.Fatalf("expected fingerprint to be collected")
+	}
+
+	// Now trigger on-demand discovery (this makes AI call)
+	id := MakeResourceID(ResourceTypeDocker, "host1", "web")
+	discovery, err := service.DiscoverResource(context.Background(), DiscoveryRequest{
+		ResourceType: ResourceTypeDocker,
+		ResourceID:   "web",
+		HostID:       "host1",
+		Hostname:     "host1",
+	})
+	if err != nil {
+		t.Fatalf("DiscoverResource error: %v", err)
+	}
+	if discovery == nil {
+		t.Fatalf("expected discovery result")
+	}
+
+	if got, err := service.GetDiscovery(id); err != nil || got == nil {
+		t.Fatalf("GetDiscovery error: %v", err)
+	}
+	if got, err := service.GetDiscoveryByResource(ResourceTypeDocker, "host1", "web"); err != nil || got == nil {
+		t.Fatalf("GetDiscoveryByResource error: %v", err)
+	}
+
+	if list, err := service.ListDiscoveries(); err != nil || len(list) != 1 {
+		t.Fatalf("ListDiscoveries unexpected: %v len=%d", err, len(list))
+	}
+	if list, err := service.ListDiscoveriesByType(ResourceTypeDocker); err != nil || len(list) != 1 {
+		t.Fatalf("ListDiscoveriesByType unexpected: %v len=%d", err, len(list))
+	}
+	if list, err := service.ListDiscoveriesByHost("host1"); err != nil || len(list) != 1 {
+		t.Fatalf("ListDiscoveriesByHost unexpected: %v len=%d", err, len(list))
+	}
+
+	if err := service.UpdateNotes(id, "note", map[string]string{"k": "v"}); err != nil {
+		t.Fatalf("UpdateNotes error: %v", err)
+	}
+	updated, err := service.GetDiscovery(id)
+	if err != nil || updated.UserNotes != "note" {
+		t.Fatalf("expected updated notes: %#v err=%v", updated, err)
+	}
+
+	scanner := NewDeepScanner(&stubExecutor{})
+	scanner.progress[id] = &DiscoveryProgress{ResourceID: id}
+	service.scanner = scanner
+	if service.GetProgress(id) == nil {
+		t.Fatalf("expected progress")
+	}
+
+	if err := service.DeleteDiscovery(id); err != nil {
+		t.Fatalf("DeleteDiscovery error: %v", err)
+	}
+
+	service.stateProvider = nil
+	service.collectFingerprints(context.Background())
+}
+
+func TestService_PromptsAndDiscoveryLoop(t *testing.T) {
+	service := NewService(nil, nil, nil, DefaultConfig())
+
+	container := DockerContainer{
+		Name:   "web",
+		Image:  "nginx:latest",
+		Status: "running",
+		Ports: []DockerPort{
+			{PublicPort: 8080, PrivatePort: 80, Protocol: "tcp"},
+		},
+		Labels: map[string]string{"app": "nginx"},
+		Mounts: []DockerMount{{Destination: "/etc/nginx"}},
+	}
+	host := DockerHost{Hostname: "host1"}
+	prompt := service.buildMetadataAnalysisPrompt(container, host)
+	if !strings.Contains(prompt, "\"ports\"") || !strings.Contains(prompt, "\"labels\"") || !strings.Contains(prompt, "\"mounts\"") {
+		t.Fatalf("unexpected metadata prompt: %s", prompt)
+	}
+
+	longOutput := strings.Repeat("a", 2100)
+	deepPrompt := service.buildDeepAnalysisPrompt(AIAnalysisRequest{
+		ResourceType: ResourceTypeDocker,
+		ResourceID:   "web",
+		HostID:       "host1",
+		Hostname:     "host1",
+		Metadata:     map[string]any{"image": "nginx"},
+		CommandOutputs: map[string]string{
+			"ps": longOutput,
+		},
+	})
+	if !strings.Contains(deepPrompt, "(truncated)") || !strings.Contains(deepPrompt, "Metadata:") {
+		t.Fatalf("unexpected deep prompt")
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+	service.initialDelay = time.Millisecond
+	service.Start(ctx)
+	service.Start(ctx)
+	service.Stop()
+
+	service.stopCh = make(chan struct{})
+	close(service.stopCh)
+	service.discoveryLoop(context.Background())
+
+	service.initialDelay = 0
+	service.stopCh = make(chan struct{})
+	close(service.stopCh)
+	service.discoveryLoop(context.Background())
+}
+
+func TestService_FingerprintLoop_StopAndCancel(t *testing.T) {
+	state := StateSnapshot{
+		DockerHosts: []DockerHost{
+			{
+				AgentID:  "host1",
+				Hostname: "host1",
+				Containers: []DockerContainer{
+					{Name: "web", Image: "nginx:latest", Status: "running"},
+				},
+			},
+		},
+	}
+
+	runLoop := func(stopWithCancel bool) {
+		store, err := NewStore(t.TempDir())
+		if err != nil {
+			t.Fatalf("NewStore error: %v", err)
+		}
+		store.crypto = nil
+
+		service := NewService(store, nil, stubStateProvider{state: state}, DefaultConfig())
+		// Analyzer should NOT be called - background loop only collects fingerprints
+		analyzer := &stubAnalyzer{
+			response: `{"service_type":"nginx","service_name":"Nginx","service_version":"1.2","category":"web_server","cli_access":"docker exec {container} nginx -v","facts":[],"config_paths":[],"data_paths":[],"ports":[],"confidence":0.9,"reasoning":"image"}`,
+		}
+		service.SetAIAnalyzer(analyzer)
+		service.initialDelay = time.Millisecond
+		service.interval = time.Millisecond
+		service.cacheExpiry = time.Nanosecond
+
+		done := make(chan struct{})
+		ctx, cancel := context.WithCancel(context.Background())
+		go func() {
+			service.discoveryLoop(ctx)
+			close(done)
+		}()
+
+		time.Sleep(5 * time.Millisecond)
+		if stopWithCancel {
+			cancel()
+		} else {
+			close(service.stopCh)
+		}
+
+		select {
+		case <-done:
+		case <-time.After(50 * time.Millisecond):
+			t.Fatalf("discoveryLoop did not stop")
+		}
+
+		// Verify fingerprints were collected (background loop does NOT make AI calls)
+		// Key format is type:host:id
+		fp, err := store.GetFingerprint("docker:host1:web")
+		if err != nil {
+			t.Fatalf("GetFingerprint error: %v", err)
+		}
+		if fp == nil {
+			t.Fatalf("expected fingerprint to be collected")
+		}
+
+		// Verify NO AI calls were made in background loop
+		analyzer.mu.Lock()
+		calls := analyzer.calls
+		analyzer.mu.Unlock()
+		if calls > 0 {
+			t.Fatalf("expected no AI calls in background loop (fingerprint-only), got %d", calls)
+		}
+	}
+
+	runLoop(false)
+	runLoop(true)
+}
+
+func TestService_DiscoverDockerContainersSkips(t *testing.T) {
+	store, err := NewStore(t.TempDir())
+	if err != nil {
+		t.Fatalf("NewStore error: %v", err)
+	}
+	store.crypto = nil
+
+	service := NewService(store, nil, nil, DefaultConfig())
+	service.discoverDockerContainers(context.Background(), []DockerHost{{AgentID: "host1"}})
+
+	service.SetAIAnalyzer(&stubAnalyzer{
+		response: `{"service_type":"nginx","service_name":"Nginx","service_version":"1.2","category":"web_server","cli_access":"docker exec {container} nginx -v","facts":[],"config_paths":[],"data_paths":[],"ports":[],"confidence":0.9,"reasoning":"image"}`,
+	})
+
+	id := MakeResourceID(ResourceTypeDocker, "host1", "web")
+	if err := store.Save(&ResourceDiscovery{ID: id, ResourceType: ResourceTypeDocker}); err != nil {
+		t.Fatalf("Save error: %v", err)
+	}
+	service.cacheExpiry = time.Hour
+	service.discoverDockerContainers(context.Background(), []DockerHost{
+		{AgentID: "host1", Containers: []DockerContainer{{Name: "web", Image: "nginx:latest"}}},
+	})
+
+	badAnalyzer := &stubAnalyzer{response: "not json"}
+	if got := service.analyzeDockerContainer(context.Background(), badAnalyzer, DockerContainer{Name: "bad", Image: "bad"}, DockerHost{AgentID: "host1"}); got != nil {
+		t.Fatalf("expected nil for bad analysis")
+	}
+
+	canceled, cancel := context.WithCancel(context.Background())
+	cancel()
+	analyzer := &stubAnalyzer{response: `{"service_type":"nginx","service_name":"Nginx","service_version":"1.2","category":"web_server","cli_access":"docker exec {container} nginx -v","facts":[],"config_paths":[],"data_paths":[],"ports":[],"confidence":0.9,"reasoning":"image"}`}
+	service.SetAIAnalyzer(analyzer)
+	service.discoverDockerContainers(canceled, []DockerHost{
+		{AgentID: "host1", Containers: []DockerContainer{{Name: "web2", Image: "nginx:latest"}}},
+	})
+	analyzer.mu.Lock()
+	calls := analyzer.calls
+	analyzer.mu.Unlock()
+	if calls != 0 {
+		t.Fatalf("expected analyzer not called on canceled context")
+	}
+
+	errAnalyzer := errorAnalyzer{}
+	if got := service.analyzeDockerContainer(context.Background(), errAnalyzer, DockerContainer{Name: "err", Image: "err"}, DockerHost{AgentID: "host1"}); got != nil {
+		t.Fatalf("expected nil when analyzer returns error")
+	}
+
+	storePath := filepath.Join(t.TempDir(), "file")
+	if err := os.WriteFile(storePath, []byte("x"), 0600); err != nil {
+		t.Fatalf("WriteFile error: %v", err)
+	}
+	service.store.dataDir = storePath
+	service.discoverDockerContainers(context.Background(), []DockerHost{
+		{AgentID: "host1", Containers: []DockerContainer{{Name: "web3", Image: "nginx:latest"}}},
+	})
+}
+
+func TestService_CollectFingerprintsRecover(t *testing.T) {
+	service := NewService(nil, nil, panicStateProvider{}, DefaultConfig())
+	service.collectFingerprints(context.Background())
+}
+
+func TestService_DiscoverResource_SaveError(t *testing.T) {
+	store, err := NewStore(t.TempDir())
+	if err != nil {
+		t.Fatalf("NewStore error: %v", err)
+	}
+	store.crypto = nil
+
+	badPath := filepath.Join(t.TempDir(), "file")
+	if err := os.WriteFile(badPath, []byte("x"), 0600); err != nil {
+		t.Fatalf("WriteFile error: %v", err)
+	}
+	store.dataDir = badPath
+
+	service := NewService(store, nil, nil, DefaultConfig())
+	service.SetAIAnalyzer(&stubAnalyzer{
+		response: `{"service_type":"nginx","service_name":"Nginx","service_version":"1.2","category":"web_server","cli_access":"docker exec {container} nginx -v","facts":[],"config_paths":[],"data_paths":[],"ports":[],"confidence":0.9,"reasoning":"image"}`,
+	})
+
+	_, err = service.DiscoverResource(context.Background(), DiscoveryRequest{
+		ResourceType: ResourceTypeDocker,
+		ResourceID:   "web",
+		HostID:       "host1",
+		Hostname:     "host1",
+		Force:        true,
+	})
+	if err == nil || !strings.Contains(err.Error(), "failed to save discovery") {
+		t.Fatalf("expected save error, got %v", err)
+	}
+}
+
+func TestService_DiscoverResource_ScanError(t *testing.T) {
+	store, err := NewStore(t.TempDir())
+	if err != nil {
+		t.Fatalf("NewStore error: %v", err)
+	}
+	store.crypto = nil
+
+	scanner := NewDeepScanner(nil)
+	service := NewService(store, scanner, nil, DefaultConfig())
+	service.SetAIAnalyzer(&stubAnalyzer{
+		response: `{"service_type":"nginx","service_name":"Nginx","service_version":"1.2","category":"web_server","cli_access":"docker exec {container} nginx -v","facts":[],"config_paths":[],"data_paths":[],"ports":[],"confidence":0.9,"reasoning":"image"}`,
+	})
+
+	_, err = service.DiscoverResource(context.Background(), DiscoveryRequest{
+		ResourceType: ResourceTypeDocker,
+		ResourceID:   "web",
+		HostID:       "host1",
+		Hostname:     "host1",
+		Force:        true,
+	})
+	if err != nil {
+		t.Fatalf("expected scan error to be tolerated, got %v", err)
+	}
+}
+
+func TestService_DiscoveryLoop_ContextDoneAtStart(t *testing.T) {
+	service := NewService(nil, nil, nil, DefaultConfig())
+	service.initialDelay = time.Hour
+	service.stopCh = make(chan struct{})
+
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+
+	service.discoveryLoop(ctx)
+}
+
+func TestService_DiscoverResource_WithScanResult(t *testing.T) {
+	store, err := NewStore(t.TempDir())
+	if err != nil {
+		t.Fatalf("NewStore error: %v", err)
+	}
+	store.crypto = nil
+
+	exec := &stubExecutor{
+		agents: []ConnectedAgent{{AgentID: "host1", Hostname: "host1"}},
+	}
+	scanner := NewDeepScanner(exec)
+	scanner.maxParallel = 1
+
+	state := StateSnapshot{
+		DockerHosts: []DockerHost{
+			{
+				AgentID:  "host1",
+				Hostname: "host1",
+				Containers: []DockerContainer{
+					{Name: "web", Image: "nginx:latest", Status: "running"},
+				},
+			},
+		},
+	}
+
+	service := NewService(store, scanner, stubStateProvider{state: state}, DefaultConfig())
+	service.SetAIAnalyzer(&stubAnalyzer{
+		response: `{"service_type":"nginx","service_name":"Nginx","service_version":"1.2","category":"web_server","cli_access":"docker exec {container} nginx -v","facts":[],"config_paths":[],"data_paths":[],"ports":[{"port":80,"protocol":"tcp","process":"nginx","address":"0.0.0.0"}],"confidence":0.9,"reasoning":"image"}`,
+	})
+
+	existing := &ResourceDiscovery{
+		ID:           MakeResourceID(ResourceTypeDocker, "host1", "web"),
+		ResourceType: ResourceTypeDocker,
+		ResourceID:   "web",
+		HostID:       "host1",
+		Hostname:     "host1",
+		UserNotes:    "keep",
+		UserSecrets:  map[string]string{"token": "secret"},
+		DiscoveredAt: time.Now().Add(-2 * time.Hour),
+	}
+	if err := store.Save(existing); err != nil {
+		t.Fatalf("Save error: %v", err)
+	}
+
+	found, err := service.DiscoverResource(context.Background(), DiscoveryRequest{
+		ResourceType: ResourceTypeDocker,
+		ResourceID:   "web",
+		HostID:       "host1",
+		Hostname:     "host1",
+		Force:        true,
+	})
+	if err != nil {
+		t.Fatalf("DiscoverResource error: %v", err)
+	}
+	if found.UserNotes != "keep" || found.UserSecrets["token"] != "secret" {
+		t.Fatalf("expected user fields preserved: %#v", found)
+	}
+	if len(found.RawCommandOutput) == 0 {
+		t.Fatalf("expected raw command output")
+	}
+	if found.DiscoveredAt.After(existing.DiscoveredAt) {
+		t.Fatalf("expected older discovered_at preserved")
+	}
+}
+
+func TestParseDockerMounts(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected []DockerBindMount
+	}{
+		{
+			name:     "empty input",
+			input:    "",
+			expected: nil,
+		},
+		{
+			name:     "no_docker_mounts marker",
+			input:    "no_docker_mounts",
+			expected: nil,
+		},
+		{
+			name:     "only done marker",
+			input:    "docker_mounts_done",
+			expected: nil,
+		},
+		{
+			name:  "single container with bind mount",
+			input: "CONTAINER:homepage\n/home/user/homepage/config|/app/config|bind\ndocker_mounts_done",
+			expected: []DockerBindMount{
+				{ContainerName: "homepage", Source: "/home/user/homepage/config", Destination: "/app/config", Type: "bind"},
+			},
+		},
+		{
+			name:  "single container with volume",
+			input: "CONTAINER:nginx\nnginx_data|/usr/share/nginx/html|volume\ndocker_mounts_done",
+			expected: []DockerBindMount{
+				{ContainerName: "nginx", Source: "nginx_data", Destination: "/usr/share/nginx/html", Type: "volume"},
+			},
+		},
+		{
+			name:  "multiple containers",
+			input: "CONTAINER:homepage\n/home/user/config|/app/config|bind\nCONTAINER:watchtower\n/var/run/docker.sock|/var/run/docker.sock|bind\ndocker_mounts_done",
+			expected: []DockerBindMount{
+				{ContainerName: "homepage", Source: "/home/user/config", Destination: "/app/config", Type: "bind"},
+				{ContainerName: "watchtower", Source: "/var/run/docker.sock", Destination: "/var/run/docker.sock", Type: "bind"},
+			},
+		},
+		{
+			name:  "container with multiple mounts",
+			input: "CONTAINER:jellyfin\n/media/movies|/movies|bind\n/media/tv|/tv|bind\n/config/jellyfin|/config|bind\ndocker_mounts_done",
+			expected: []DockerBindMount{
+				{ContainerName: "jellyfin", Source: "/media/movies", Destination: "/movies", Type: "bind"},
+				{ContainerName: "jellyfin", Source: "/media/tv", Destination: "/tv", Type: "bind"},
+				{ContainerName: "jellyfin", Source: "/config/jellyfin", Destination: "/config", Type: "bind"},
+			},
+		},
+		{
+			name:     "container with no mounts",
+			input:    "CONTAINER:alpine\ndocker_mounts_done",
+			expected: nil,
+		},
+		{
+			name:  "filters out tmpfs",
+			input: "CONTAINER:app\n/data|/data|bind\n||tmpfs\ndocker_mounts_done",
+			expected: []DockerBindMount{
+				{ContainerName: "app", Source: "/data", Destination: "/data", Type: "bind"},
+			},
+		},
+		{
+			name:  "mount without type defaults to included",
+			input: "CONTAINER:app\n/config|/app/config\ndocker_mounts_done",
+			expected: []DockerBindMount{
+				{ContainerName: "app", Source: "/config", Destination: "/app/config", Type: ""},
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := parseDockerMounts(tt.input)
+			if len(result) != len(tt.expected) {
+				t.Fatalf("expected %d mounts, got %d: %#v", len(tt.expected), len(result), result)
+			}
+			for i := range tt.expected {
+				if result[i].ContainerName != tt.expected[i].ContainerName {
+					t.Errorf("mount %d: expected container %q, got %q", i, tt.expected[i].ContainerName, result[i].ContainerName)
+				}
+				if result[i].Source != tt.expected[i].Source {
+					t.Errorf("mount %d: expected source %q, got %q", i, tt.expected[i].Source, result[i].Source)
+				}
+				if result[i].Destination != tt.expected[i].Destination {
+					t.Errorf("mount %d: expected destination %q, got %q", i, tt.expected[i].Destination, result[i].Destination)
+				}
+				if result[i].Type != tt.expected[i].Type {
+					t.Errorf("mount %d: expected type %q, got %q", i, tt.expected[i].Type, result[i].Type)
+				}
+			}
+		})
+	}
+}
diff --git a/internal/servicediscovery/store.go b/internal/servicediscovery/store.go
new file mode 100644
index 000000000..26b43b519
--- /dev/null
+++ b/internal/servicediscovery/store.go
@@ -0,0 +1,651 @@
+package servicediscovery
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/rcourtman/pulse-go-rewrite/internal/crypto"
+	"github.com/rs/zerolog/log"
+)
+
+// CryptoManager interface for encryption/decryption.
+type CryptoManager interface {
+	Encrypt(plaintext []byte) ([]byte, error)
+	Decrypt(ciphertext []byte) ([]byte, error)
+}
+
+// Store provides encrypted per-resource storage for discovery data.
+type Store struct {
+	mu        sync.RWMutex
+	dataDir   string
+	crypto    CryptoManager
+	cache     map[string]*ResourceDiscovery // In-memory cache
+	cacheTime map[string]time.Time          // Cache timestamps
+	cacheTTL  time.Duration
+
+	// Fingerprint storage (in-memory with file persistence)
+	fingerprintDir      string
+	fingerprints        map[string]*ContainerFingerprint // resourceID -> fingerprint
+	fingerprintMu       sync.RWMutex
+	lastFingerprintScan time.Time
+}
+
+// For testing - allows injecting a mock crypto manager
+var newCryptoManagerAt = crypto.NewCryptoManagerAt
+
+// For testing - allows injecting a mock marshaler.
+var marshalDiscovery = json.Marshal
+
+// NewStore creates a new discovery store with automatic encryption.
+func NewStore(dataDir string) (*Store, error) {
+	discoveryDir := filepath.Join(dataDir, "discovery")
+	if err := os.MkdirAll(discoveryDir, 0700); err != nil {
+		return nil, fmt.Errorf("failed to create discovery directory: %w", err)
+	}
+
+	// Create fingerprint subdirectory
+	fingerprintDir := filepath.Join(discoveryDir, "fingerprints")
+	if err := os.MkdirAll(fingerprintDir, 0700); err != nil {
+		return nil, fmt.Errorf("failed to create fingerprint directory: %w", err)
+	}
+
+	// Initialize crypto manager for encryption (uses same key as other Pulse secrets)
+	cryptoMgr, err := newCryptoManagerAt(dataDir)
+	if err != nil {
+		log.Warn().Err(err).Msg("Failed to initialize crypto for discovery store, data will be unencrypted")
+	}
+
+	store := &Store{
+		dataDir:        discoveryDir,
+		fingerprintDir: fingerprintDir,
+		crypto:         cryptoMgr,
+		cache:          make(map[string]*ResourceDiscovery),
+		cacheTime:      make(map[string]time.Time),
+		cacheTTL:       5 * time.Minute,
+		fingerprints:   make(map[string]*ContainerFingerprint),
+	}
+
+	// Load existing fingerprints from disk
+	store.loadFingerprints()
+
+	return store, nil
+}
+
+// getFilePath returns the file path for a resource ID.
+func (s *Store) getFilePath(id string) string {
+	// Sanitize ID for filename: replace : with _
+	safeID := strings.ReplaceAll(id, ":", "_")
+	safeID = strings.ReplaceAll(safeID, "/", "_")
+	return filepath.Join(s.dataDir, safeID+".enc")
+}
+
+// Save persists a discovery to encrypted storage.
+func (s *Store) Save(d *ResourceDiscovery) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	if d.ID == "" {
+		return fmt.Errorf("discovery ID is required")
+	}
+
+	// Update timestamp
+	d.UpdatedAt = time.Now()
+	if d.DiscoveredAt.IsZero() {
+		d.DiscoveredAt = d.UpdatedAt
+	}
+
+	data, err := marshalDiscovery(d)
+	if err != nil {
+		return fmt.Errorf("failed to marshal discovery: %w", err)
+	}
+
+	// Encrypt if crypto is available
+	if s.crypto != nil {
+		encrypted, err := s.crypto.Encrypt(data)
+		if err != nil {
+			return fmt.Errorf("failed to encrypt discovery: %w", err)
+		}
+		data = encrypted
+	}
+
+	// Write atomically using tmp file + rename
+	filePath := s.getFilePath(d.ID)
+	tmpPath := filePath + ".tmp"
+
+	if err := os.WriteFile(tmpPath, data, 0600); err != nil {
+		return fmt.Errorf("failed to write discovery file: %w", err)
+	}
+
+	if err := os.Rename(tmpPath, filePath); err != nil {
+		_ = os.Remove(tmpPath)
+		return fmt.Errorf("failed to finalize discovery file: %w", err)
+	}
+
+	// Update cache
+	s.cache[d.ID] = d
+	s.cacheTime[d.ID] = time.Now()
+
+	log.Debug().Str("id", d.ID).Str("service", d.ServiceType).Msg("Discovery saved")
+	return nil
+}
+
+// Get retrieves a discovery from storage.
+func (s *Store) Get(id string) (*ResourceDiscovery, error) {
+	s.mu.RLock()
+	// Check cache first
+	if cached, ok := s.cache[id]; ok {
+		if cacheTime, hasTime := s.cacheTime[id]; hasTime {
+			if time.Since(cacheTime) < s.cacheTTL {
+				s.mu.RUnlock()
+				return cached, nil
+			}
+		}
+	}
+	s.mu.RUnlock()
+
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	filePath := s.getFilePath(id)
+	data, err := os.ReadFile(filePath)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil, nil // Not found is not an error
+		}
+		return nil, fmt.Errorf("failed to read discovery file: %w", err)
+	}
+
+	// Decrypt if crypto is available
+	if s.crypto != nil {
+		decrypted, err := s.crypto.Decrypt(data)
+		if err != nil {
+			return nil, fmt.Errorf("failed to decrypt discovery: %w", err)
+		}
+		data = decrypted
+	}
+
+	var discovery ResourceDiscovery
+	if err := json.Unmarshal(data, &discovery); err != nil {
+		return nil, fmt.Errorf("failed to unmarshal discovery: %w", err)
+	}
+
+	// Update cache
+	s.cache[id] = &discovery
+	s.cacheTime[id] = time.Now()
+
+	return &discovery, nil
+}
+
+// GetByResource retrieves a discovery by resource type and ID.
+func (s *Store) GetByResource(resourceType ResourceType, hostID, resourceID string) (*ResourceDiscovery, error) {
+	id := MakeResourceID(resourceType, hostID, resourceID)
+	return s.Get(id)
+}
+
+// Delete removes a discovery from storage.
+func (s *Store) Delete(id string) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	filePath := s.getFilePath(id)
+	if err := os.Remove(filePath); err != nil {
+		if os.IsNotExist(err) {
+			return nil // Already deleted
+		}
+		return fmt.Errorf("failed to delete discovery file: %w", err)
+	}
+
+	// Remove from cache
+	delete(s.cache, id)
+	delete(s.cacheTime, id)
+
+	log.Debug().Str("id", id).Msg("Discovery deleted")
+	return nil
+}
+
+// List returns all discoveries.
+func (s *Store) List() ([]*ResourceDiscovery, error) {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+
+	entries, err := os.ReadDir(s.dataDir)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return []*ResourceDiscovery{}, nil
+		}
+		return nil, fmt.Errorf("failed to list discovery directory: %w", err)
+	}
+
+	var discoveries []*ResourceDiscovery
+	for _, entry := range entries {
+		// Skip tmp files first to avoid reading partial writes.
+		if strings.HasSuffix(entry.Name(), ".tmp") {
+			continue
+		}
+		if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".enc") {
+			continue
+		}
+
+		data, err := os.ReadFile(filepath.Join(s.dataDir, entry.Name()))
+		if err != nil {
+			log.Warn().Err(err).Str("file", entry.Name()).Msg("Failed to read discovery file")
+			continue
+		}
+
+		// Decrypt if crypto is available
+		if s.crypto != nil {
+			decrypted, err := s.crypto.Decrypt(data)
+			if err != nil {
+				log.Warn().Err(err).Str("file", entry.Name()).Msg("Failed to decrypt discovery")
+				continue
+			}
+			data = decrypted
+		}
+
+		var discovery ResourceDiscovery
+		if err := json.Unmarshal(data, &discovery); err != nil {
+			log.Warn().Err(err).Str("file", entry.Name()).Msg("Failed to unmarshal discovery")
+			continue
+		}
+
+		discoveries = append(discoveries, &discovery)
+	}
+
+	return discoveries, nil
+}
+
+// ListByType returns discoveries for a specific resource type.
+func (s *Store) ListByType(resourceType ResourceType) ([]*ResourceDiscovery, error) {
+	all, err := s.List()
+	if err != nil {
+		return nil, err
+	}
+
+	var filtered []*ResourceDiscovery
+	for _, d := range all {
+		if d.ResourceType == resourceType {
+			filtered = append(filtered, d)
+		}
+	}
+	return filtered, nil
+}
+
+// ListByHost returns discoveries for a specific host.
+func (s *Store) ListByHost(hostID string) ([]*ResourceDiscovery, error) {
+	all, err := s.List()
+	if err != nil {
+		return nil, err
+	}
+
+	var filtered []*ResourceDiscovery
+	for _, d := range all {
+		if d.HostID == hostID {
+			filtered = append(filtered, d)
+		}
+	}
+	return filtered, nil
+}
+
+// UpdateNotes updates just the user notes and secrets for a discovery.
+func (s *Store) UpdateNotes(id string, notes string, secrets map[string]string) error {
+	discovery, err := s.Get(id)
+	if err != nil {
+		return err
+	}
+	if discovery == nil {
+		return fmt.Errorf("discovery not found: %s", id)
+	}
+
+	discovery.UserNotes = notes
+	if secrets != nil {
+		discovery.UserSecrets = secrets
+	}
+
+	return s.Save(discovery)
+}
+
+// GetMultiple retrieves multiple discoveries by ID.
+func (s *Store) GetMultiple(ids []string) ([]*ResourceDiscovery, error) {
+	var discoveries []*ResourceDiscovery
+	for _, id := range ids {
+		d, err := s.Get(id)
+		if err != nil {
+			log.Warn().Err(err).Str("id", id).Msg("Failed to get discovery")
+			continue
+		}
+		if d != nil {
+			discoveries = append(discoveries, d)
+		}
+	}
+	return discoveries, nil
+}
+
+// ClearCache clears the in-memory cache.
+func (s *Store) ClearCache() {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	s.cache = make(map[string]*ResourceDiscovery)
+	s.cacheTime = make(map[string]time.Time)
+}
+
+// Exists checks if a discovery exists for the given ID.
+func (s *Store) Exists(id string) bool {
+	s.mu.RLock()
+	if _, ok := s.cache[id]; ok {
+		s.mu.RUnlock()
+		return true
+	}
+	s.mu.RUnlock()
+
+	filePath := s.getFilePath(id)
+	_, err := os.Stat(filePath)
+	return err == nil
+}
+
+// GetAge returns how old the discovery is, or -1 if not found.
+func (s *Store) GetAge(id string) time.Duration {
+	d, err := s.Get(id)
+	if err != nil || d == nil {
+		return -1
+	}
+	return time.Since(d.UpdatedAt)
+}
+
+// NeedsRefresh checks if a discovery needs to be refreshed.
+func (s *Store) NeedsRefresh(id string, maxAge time.Duration) bool {
+	age := s.GetAge(id)
+	if age < 0 {
+		return true // Not found, needs discovery
+	}
+	return age > maxAge
+}
+
+// --- Fingerprint Storage Methods ---
+
+// getFingerprintFilePath returns the file path for a fingerprint.
+func (s *Store) getFingerprintFilePath(resourceID string) string {
+	// Sanitize ID for filename
+	safeID := strings.ReplaceAll(resourceID, ":", "_")
+	safeID = strings.ReplaceAll(safeID, "/", "_")
+	return filepath.Join(s.fingerprintDir, safeID+".json")
+}
+
+// loadFingerprints loads all fingerprints from disk into memory.
+func (s *Store) loadFingerprints() {
+	s.fingerprintMu.Lock()
+	defer s.fingerprintMu.Unlock()
+
+	entries, err := os.ReadDir(s.fingerprintDir)
+	if err != nil {
+		if !os.IsNotExist(err) {
+			log.Warn().Err(err).Msg("Failed to read fingerprint directory")
+		}
+		return
+	}
+
+	for _, entry := range entries {
+		if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".json") {
+			continue
+		}
+
+		data, err := os.ReadFile(filepath.Join(s.fingerprintDir, entry.Name()))
+		if err != nil {
+			log.Warn().Err(err).Str("file", entry.Name()).Msg("Failed to read fingerprint file")
+			continue
+		}
+
+		var fp ContainerFingerprint
+		if err := json.Unmarshal(data, &fp); err != nil {
+			log.Warn().Err(err).Str("file", entry.Name()).Msg("Failed to unmarshal fingerprint")
+			continue
+		}
+
+		s.fingerprints[fp.ResourceID] = &fp
+	}
+
+	log.Debug().Int("count", len(s.fingerprints)).Msg("Loaded fingerprints from disk")
+}
+
+// SaveFingerprint stores a container fingerprint.
+func (s *Store) SaveFingerprint(fp *ContainerFingerprint) error {
+	if fp == nil || fp.ResourceID == "" {
+		return fmt.Errorf("fingerprint or resource ID is required")
+	}
+
+	s.fingerprintMu.Lock()
+	defer s.fingerprintMu.Unlock()
+
+	// Update in-memory cache
+	s.fingerprints[fp.ResourceID] = fp
+
+	// Persist to disk
+	data, err := json.Marshal(fp)
+	if err != nil {
+		return fmt.Errorf("failed to marshal fingerprint: %w", err)
+	}
+
+	filePath := s.getFingerprintFilePath(fp.ResourceID)
+	tmpPath := filePath + ".tmp"
+
+	if err := os.WriteFile(tmpPath, data, 0600); err != nil {
+		return fmt.Errorf("failed to write fingerprint file: %w", err)
+	}
+
+	if err := os.Rename(tmpPath, filePath); err != nil {
+		_ = os.Remove(tmpPath)
+		return fmt.Errorf("failed to finalize fingerprint file: %w", err)
+	}
+
+	return nil
+}
+
+// GetFingerprint retrieves the last known fingerprint for a resource.
+func (s *Store) GetFingerprint(resourceID string) (*ContainerFingerprint, error) {
+	s.fingerprintMu.RLock()
+	defer s.fingerprintMu.RUnlock()
+
+	fp, ok := s.fingerprints[resourceID]
+	if !ok {
+		return nil, nil // Not found is not an error
+	}
+	return fp, nil
+}
+
+// GetAllFingerprints returns all stored fingerprints.
+func (s *Store) GetAllFingerprints() map[string]*ContainerFingerprint {
+	s.fingerprintMu.RLock()
+	defer s.fingerprintMu.RUnlock()
+
+	result := make(map[string]*ContainerFingerprint, len(s.fingerprints))
+	for k, v := range s.fingerprints {
+		result[k] = v
+	}
+	return result
+}
+
+// GetChangedResources returns resource IDs where the fingerprint changed since last discovery.
+// It compares the stored fingerprint hash against the discovery's fingerprint field.
+func (s *Store) GetChangedResources() ([]string, error) {
+	s.fingerprintMu.RLock()
+	fingerprints := make(map[string]*ContainerFingerprint, len(s.fingerprints))
+	for k, v := range s.fingerprints {
+		fingerprints[k] = v
+	}
+	s.fingerprintMu.RUnlock()
+
+	var changed []string
+	for resourceID, fp := range fingerprints {
+		// Build the full discovery ID
+		discoveryID := MakeResourceID(ResourceTypeDocker, fp.HostID, resourceID)
+
+		// Get the discovery
+		discovery, err := s.Get(discoveryID)
+		if err != nil {
+			continue
+		}
+
+		// If no discovery exists, it needs discovery
+		if discovery == nil {
+			changed = append(changed, discoveryID)
+			continue
+		}
+
+		// If fingerprint hash differs from discovery's stored fingerprint, it changed
+		if discovery.Fingerprint != fp.Hash {
+			changed = append(changed, discoveryID)
+		}
+	}
+
+	return changed, nil
+}
+
+// GetStaleResources returns resources not discovered in maxAge duration.
+func (s *Store) GetStaleResources(maxAge time.Duration) ([]string, error) {
+	discoveries, err := s.List()
+	if err != nil {
+		return nil, err
+	}
+
+	var stale []string
+	now := time.Now()
+	for _, d := range discoveries {
+		if now.Sub(d.DiscoveredAt) > maxAge {
+			stale = append(stale, d.ID)
+		}
+	}
+
+	return stale, nil
+}
+
+// SetLastFingerprintScan updates the timestamp of the last fingerprint scan.
+func (s *Store) SetLastFingerprintScan(t time.Time) {
+	s.fingerprintMu.Lock()
+	defer s.fingerprintMu.Unlock()
+	s.lastFingerprintScan = t
+}
+
+// GetLastFingerprintScan returns the timestamp of the last fingerprint scan.
+func (s *Store) GetLastFingerprintScan() time.Time {
+	s.fingerprintMu.RLock()
+	defer s.fingerprintMu.RUnlock()
+	return s.lastFingerprintScan
+}
+
+// GetFingerprintCount returns the number of stored fingerprints.
+func (s *Store) GetFingerprintCount() int {
+	s.fingerprintMu.RLock()
+	defer s.fingerprintMu.RUnlock()
+	return len(s.fingerprints)
+}
+
+// CleanupOrphanedFingerprints removes fingerprints for resources that no longer exist.
+// Pass in a set of current resource IDs (e.g., "docker:host1:nginx", "lxc:node1:101").
+// Returns the number of fingerprints removed.
+func (s *Store) CleanupOrphanedFingerprints(currentResourceIDs map[string]bool) int {
+	s.fingerprintMu.Lock()
+	defer s.fingerprintMu.Unlock()
+
+	removed := 0
+	for fpID := range s.fingerprints {
+		if !currentResourceIDs[fpID] {
+			// Remove from memory
+			delete(s.fingerprints, fpID)
+
+			// Remove from disk
+			filePath := s.getFingerprintFilePath(fpID)
+			if err := os.Remove(filePath); err != nil && !os.IsNotExist(err) {
+				log.Warn().Err(err).Str("id", fpID).Msg("Failed to remove orphaned fingerprint file")
+			} else {
+				log.Debug().Str("id", fpID).Msg("Removed orphaned fingerprint")
+			}
+			removed++
+		}
+	}
+
+	return removed
+}
+
+// CleanupOrphanedDiscoveries removes discoveries for resources that no longer exist.
+// Pass in a set of current resource IDs.
+// Returns the number of discoveries removed.
+func (s *Store) CleanupOrphanedDiscoveries(currentResourceIDs map[string]bool) int {
+	// List all discovery files
+	entries, err := os.ReadDir(s.dataDir)
+	if err != nil {
+		log.Warn().Err(err).Msg("Failed to read discovery directory for cleanup")
+		return 0
+	}
+
+	removed := 0
+	for _, entry := range entries {
+		if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".enc") {
+			continue
+		}
+
+		// Convert filename back to resource ID
+		// Filename format: type_host_id.enc (underscores replace colons and slashes)
+		baseName := strings.TrimSuffix(entry.Name(), ".enc")
+		resourceID := filenameToResourceID(baseName)
+
+		if !currentResourceIDs[resourceID] {
+			filePath := filepath.Join(s.dataDir, entry.Name())
+			if err := os.Remove(filePath); err != nil {
+				log.Warn().Err(err).Str("file", entry.Name()).Msg("Failed to remove orphaned discovery file")
+			} else {
+				log.Debug().Str("id", resourceID).Msg("Removed orphaned discovery")
+				removed++
+			}
+		}
+	}
+
+	return removed
+}
+
+// filenameToResourceID converts a discovery filename back to a resource ID.
+// Reverses the transformation done in getFilePath.
+func filenameToResourceID(filename string) string {
+	// The filename uses underscores for colons and slashes
+	// We need to be smart about this - the format is type_host_resourceid
+	// First underscore separates type, rest could have underscores in host/resource names
+
+	parts := strings.SplitN(filename, "_", 3)
+	if len(parts) < 3 {
+		return filename // Can't parse, return as-is
+	}
+
+	resourceType := parts[0]
+	host := parts[1]
+	resourceID := parts[2]
+
+	// For k8s, the resource ID might have been namespace/name which became namespace_name
+	// We convert back: k8s:cluster:namespace/name
+	if resourceType == "k8s" && strings.Contains(resourceID, "_") {
+		// Could be namespace_name, convert back to namespace/name
+		resourceID = strings.Replace(resourceID, "_", "/", 1)
+	}
+
+	return resourceType + ":" + host + ":" + resourceID
+}
+
+// ListDiscoveryIDs returns all discovery IDs currently stored.
+func (s *Store) ListDiscoveryIDs() []string {
+	entries, err := os.ReadDir(s.dataDir)
+	if err != nil {
+		return nil
+	}
+
+	var ids []string
+	for _, entry := range entries {
+		if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".enc") {
+			continue
+		}
+		baseName := strings.TrimSuffix(entry.Name(), ".enc")
+		ids = append(ids, filenameToResourceID(baseName))
+	}
+	return ids
+}
diff --git a/internal/servicediscovery/store_test.go b/internal/servicediscovery/store_test.go
new file mode 100644
index 000000000..374797af4
--- /dev/null
+++ b/internal/servicediscovery/store_test.go
@@ -0,0 +1,469 @@
+package servicediscovery
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/rcourtman/pulse-go-rewrite/internal/crypto"
+)
+
+type fakeCrypto struct{}
+
+func (fakeCrypto) Encrypt(plaintext []byte) ([]byte, error) {
+	out := make([]byte, len(plaintext))
+	for i := range plaintext {
+		out[i] = plaintext[len(plaintext)-1-i]
+	}
+	return out, nil
+}
+
+func (fakeCrypto) Decrypt(ciphertext []byte) ([]byte, error) {
+	return fakeCrypto{}.Encrypt(ciphertext)
+}
+
+type errorCrypto struct{}
+
+func (errorCrypto) Encrypt(plaintext []byte) ([]byte, error) {
+	return nil, os.ErrInvalid
+}
+
+func (errorCrypto) Decrypt(ciphertext []byte) ([]byte, error) {
+	return nil, os.ErrInvalid
+}
+
+func TestStore_SaveGetListAndNotes(t *testing.T) {
+	store, err := NewStore(t.TempDir())
+	if err != nil {
+		t.Fatalf("NewStore error: %v", err)
+	}
+	store.crypto = nil
+
+	d1 := &ResourceDiscovery{
+		ID:           MakeResourceID(ResourceTypeDocker, "host1", "nginx"),
+		ResourceType: ResourceTypeDocker,
+		ResourceID:   "nginx",
+		HostID:       "host1",
+		ServiceName:  "Nginx",
+	}
+	if err := store.Save(d1); err != nil {
+		t.Fatalf("Save error: %v", err)
+	}
+
+	got, err := store.Get(d1.ID)
+	if err != nil {
+		t.Fatalf("Get error: %v", err)
+	}
+	if got == nil || got.ServiceName != "Nginx" {
+		t.Fatalf("unexpected discovery: %#v", got)
+	}
+	if !store.Exists(d1.ID) {
+		t.Fatalf("expected discovery to exist")
+	}
+
+	if err := store.UpdateNotes(d1.ID, "notes", map[string]string{"token": "abc"}); err != nil {
+		t.Fatalf("UpdateNotes error: %v", err)
+	}
+	updated, err := store.Get(d1.ID)
+	if err != nil {
+		t.Fatalf("Get updated error: %v", err)
+	}
+	if updated.UserNotes != "notes" || updated.UserSecrets["token"] != "abc" {
+		t.Fatalf("notes not updated: %#v", updated)
+	}
+
+	d2 := &ResourceDiscovery{
+		ID:           MakeResourceID(ResourceTypeVM, "node1", "101"),
+		ResourceType: ResourceTypeVM,
+		ResourceID:   "101",
+		HostID:       "node1",
+		ServiceName:  "VM",
+	}
+	if err := store.Save(d2); err != nil {
+		t.Fatalf("Save d2 error: %v", err)
+	}
+
+	list, err := store.List()
+	if err != nil {
+		t.Fatalf("List error: %v", err)
+	}
+	if len(list) != 2 {
+		t.Fatalf("expected 2 discoveries, got %d", len(list))
+	}
+
+	byType, err := store.ListByType(ResourceTypeVM)
+	if err != nil {
+		t.Fatalf("ListByType error: %v", err)
+	}
+	if len(byType) != 1 || byType[0].ID != d2.ID {
+		t.Fatalf("unexpected ListByType: %#v", byType)
+	}
+
+	byHost, err := store.ListByHost("host1")
+	if err != nil {
+		t.Fatalf("ListByHost error: %v", err)
+	}
+	if len(byHost) != 1 || byHost[0].ID != d1.ID {
+		t.Fatalf("unexpected ListByHost: %#v", byHost)
+	}
+
+	summary := updated.ToSummary()
+	if summary.ID != d1.ID || !summary.HasUserNotes {
+		t.Fatalf("unexpected summary: %#v", summary)
+	}
+
+	if err := store.Delete(d1.ID); err != nil {
+		t.Fatalf("Delete error: %v", err)
+	}
+	if store.Exists(d1.ID) {
+		t.Fatalf("expected discovery to be deleted")
+	}
+}
+
+func TestStore_CryptoRoundTripAndPaths(t *testing.T) {
+	store, err := NewStore(t.TempDir())
+	if err != nil {
+		t.Fatalf("NewStore error: %v", err)
+	}
+	store.crypto = fakeCrypto{}
+
+	id := "docker:host1:app/name"
+	d := &ResourceDiscovery{
+		ID:           id,
+		ResourceType: ResourceTypeDocker,
+		ResourceID:   "app/name",
+		HostID:       "host1",
+		ServiceName:  "App",
+	}
+	if err := store.Save(d); err != nil {
+		t.Fatalf("Save error: %v", err)
+	}
+
+	path := store.getFilePath(id)
+	base := filepath.Base(path)
+	if strings.Contains(base, ":") || strings.Contains(base, "/") {
+		t.Fatalf("expected sanitized base filename, got %s", base)
+	}
+
+	loaded, err := store.Get(id)
+	if err != nil {
+		t.Fatalf("Get error: %v", err)
+	}
+	if loaded == nil || loaded.ServiceName != "App" {
+		t.Fatalf("unexpected discovery: %#v", loaded)
+	}
+
+	store.ClearCache()
+	if _, err := store.Get(id); err != nil {
+		t.Fatalf("Get with decrypt error: %v", err)
+	}
+	list, err := store.List()
+	if err != nil || len(list) != 1 {
+		t.Fatalf("List with decrypt error: %v len=%d", err, len(list))
+	}
+}
+
+func TestStore_NeedsRefreshAndGetMultiple(t *testing.T) {
+	store, err := NewStore(t.TempDir())
+	if err != nil {
+		t.Fatalf("NewStore error: %v", err)
+	}
+	store.crypto = nil
+
+	if !store.NeedsRefresh("missing", time.Minute) {
+		t.Fatalf("expected missing discovery to need refresh")
+	}
+
+	d := &ResourceDiscovery{
+		ID:           MakeResourceID(ResourceTypeHost, "host1", "host1"),
+		ResourceType: ResourceTypeHost,
+		ResourceID:   "host1",
+		HostID:       "host1",
+		ServiceName:  "Host",
+	}
+	if err := store.Save(d); err != nil {
+		t.Fatalf("Save error: %v", err)
+	}
+
+	path := store.getFilePath(d.ID)
+	data, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("ReadFile error: %v", err)
+	}
+	var saved ResourceDiscovery
+	if err := json.Unmarshal(data, &saved); err != nil {
+		t.Fatalf("Unmarshal error: %v", err)
+	}
+	saved.UpdatedAt = time.Now().Add(-2 * time.Hour)
+	data, err = json.Marshal(&saved)
+	if err != nil {
+		t.Fatalf("Marshal error: %v", err)
+	}
+	if err := os.WriteFile(path, data, 0600); err != nil {
+		t.Fatalf("WriteFile error: %v", err)
+	}
+
+	store.ClearCache()
+	if !store.NeedsRefresh(d.ID, time.Minute) {
+		t.Fatalf("expected old discovery to need refresh")
+	}
+
+	ids := []string{d.ID, "missing"}
+	multi, err := store.GetMultiple(ids)
+	if err != nil {
+		t.Fatalf("GetMultiple error: %v", err)
+	}
+	if len(multi) != 1 || multi[0].ID != d.ID {
+		t.Fatalf("unexpected GetMultiple: %#v", multi)
+	}
+}
+
+func TestStore_ErrorsAndListSkips(t *testing.T) {
+	dir := t.TempDir()
+	store, err := NewStore(dir)
+	if err != nil {
+		t.Fatalf("NewStore error: %v", err)
+	}
+	store.crypto = nil
+
+	if err := store.Save(&ResourceDiscovery{}); err == nil {
+		t.Fatalf("expected error for empty ID")
+	}
+
+	store.crypto = errorCrypto{}
+	if err := store.Save(&ResourceDiscovery{ID: "bad"}); err == nil {
+		t.Fatalf("expected encrypt error")
+	}
+
+	store.crypto = nil
+	if _, err := store.Get("missing"); err != nil {
+		t.Fatalf("unexpected missing error: %v", err)
+	}
+
+	d := &ResourceDiscovery{
+		ID:           MakeResourceID(ResourceTypeDocker, "host1", "web"),
+		ResourceType: ResourceTypeDocker,
+		ResourceID:   "web",
+		HostID:       "host1",
+		ServiceName:  "Web",
+		UserSecrets:  map[string]string{"token": "abc"},
+	}
+	if err := store.Save(d); err != nil {
+		t.Fatalf("Save error: %v", err)
+	}
+
+	// Corrupt file to force unmarshal error during List.
+	badPath := filepath.Join(store.dataDir, "bad.enc")
+	if err := os.WriteFile(badPath, []byte("{bad"), 0600); err != nil {
+		t.Fatalf("WriteFile error: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(store.dataDir, "note.txt"), []byte("skip"), 0600); err != nil {
+		t.Fatalf("WriteFile error: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(store.dataDir, "skip.enc.tmp"), []byte("skip"), 0600); err != nil {
+		t.Fatalf("WriteFile error: %v", err)
+	}
+	if err := os.MkdirAll(filepath.Join(store.dataDir, "dir"), 0700); err != nil {
+		t.Fatalf("MkdirAll error: %v", err)
+	}
+	unreadable := filepath.Join(store.dataDir, "unreadable.enc")
+	if err := os.WriteFile(unreadable, []byte("nope"), 0000); err != nil {
+		t.Fatalf("WriteFile error: %v", err)
+	}
+
+	list, err := store.List()
+	if err != nil {
+		t.Fatalf("List error: %v", err)
+	}
+	if len(list) != 1 {
+		t.Fatalf("expected 1 discovery, got %d", len(list))
+	}
+
+	store.crypto = errorCrypto{}
+	list, err = store.List()
+	if err != nil {
+		t.Fatalf("List with crypto error: %v", err)
+	}
+	if len(list) != 0 {
+		t.Fatalf("expected crypto errors to skip entries")
+	}
+
+	store.crypto = errorCrypto{}
+	store.ClearCache()
+	if _, err := store.Get(d.ID); err == nil {
+		t.Fatalf("expected decrypt error")
+	}
+
+	store.crypto = nil
+	if got, err := store.GetByResource(ResourceTypeDocker, "host1", "web"); err != nil || got == nil {
+		t.Fatalf("GetByResource error: %v", err)
+	}
+
+	if err := store.UpdateNotes(d.ID, "notes-only", nil); err != nil {
+		t.Fatalf("UpdateNotes error: %v", err)
+	}
+	updated, err := store.Get(d.ID)
+	if err != nil || updated.UserSecrets == nil {
+		t.Fatalf("expected secrets to be preserved: %#v err=%v", updated, err)
+	}
+
+	store.crypto = errorCrypto{}
+	store.ClearCache()
+	if err := store.UpdateNotes(d.ID, "notes", nil); err == nil {
+		t.Fatalf("expected update notes error with crypto failure")
+	}
+	if got, err := store.GetMultiple([]string{d.ID}); err != nil || len(got) != 0 {
+		t.Fatalf("expected GetMultiple to skip errors")
+	}
+
+	if err := store.UpdateNotes("missing", "notes", nil); err == nil {
+		t.Fatalf("expected error for missing discovery")
+	}
+
+	if err := store.Delete("missing"); err != nil {
+		t.Fatalf("unexpected delete error: %v", err)
+	}
+}
+
+func TestStore_NewStoreError(t *testing.T) {
+	dir := t.TempDir()
+	file := filepath.Join(dir, "file")
+	if err := os.WriteFile(file, []byte("x"), 0600); err != nil {
+		t.Fatalf("WriteFile error: %v", err)
+	}
+
+	if _, err := NewStore(file); err == nil {
+		t.Fatalf("expected error for file data dir")
+	}
+}
+
+func TestStore_NewStoreCryptoFailure(t *testing.T) {
+	orig := newCryptoManagerAt
+	newCryptoManagerAt = func(dataDir string) (*crypto.CryptoManager, error) {
+		manager, err := crypto.NewCryptoManagerAt(dataDir)
+		if err != nil {
+			return nil, err
+		}
+		return manager, os.ErrInvalid
+	}
+	t.Cleanup(func() {
+		newCryptoManagerAt = orig
+	})
+
+	store, err := NewStore(t.TempDir())
+	if err != nil {
+		t.Fatalf("NewStore error: %v", err)
+	}
+	if store.crypto == nil {
+		t.Fatalf("expected crypto manager despite init warning")
+	}
+}
+
+func TestStore_SaveMarshalError(t *testing.T) {
+	store, err := NewStore(t.TempDir())
+	if err != nil {
+		t.Fatalf("NewStore error: %v", err)
+	}
+	store.crypto = nil
+
+	orig := marshalDiscovery
+	marshalDiscovery = func(any) ([]byte, error) {
+		return nil, os.ErrInvalid
+	}
+	t.Cleanup(func() {
+		marshalDiscovery = orig
+	})
+
+	if err := store.Save(&ResourceDiscovery{ID: "marshal"}); err == nil {
+		t.Fatalf("expected marshal error")
+	}
+}
+
+func TestStore_SaveAndGetErrors(t *testing.T) {
+	store, err := NewStore(t.TempDir())
+	if err != nil {
+		t.Fatalf("NewStore error: %v", err)
+	}
+	store.crypto = nil
+
+	id := MakeResourceID(ResourceTypeDocker, "host1", "web")
+	filePath := store.getFilePath(id)
+	if err := os.MkdirAll(filePath, 0700); err != nil {
+		t.Fatalf("MkdirAll error: %v", err)
+	}
+	if err := store.Save(&ResourceDiscovery{ID: id}); err == nil {
+		t.Fatalf("expected rename error")
+	}
+
+	tmpFile := filepath.Join(t.TempDir(), "file")
+	if err := os.WriteFile(tmpFile, []byte("x"), 0600); err != nil {
+		t.Fatalf("WriteFile error: %v", err)
+	}
+	store.dataDir = tmpFile
+	if err := store.Save(&ResourceDiscovery{ID: "bad"}); err == nil {
+		t.Fatalf("expected write error")
+	}
+
+	store.dataDir = t.TempDir()
+	store.crypto = nil
+	badPath := store.getFilePath("bad")
+	if err := os.WriteFile(badPath, []byte("{bad"), 0600); err != nil {
+		t.Fatalf("WriteFile error: %v", err)
+	}
+	if _, err := store.Get("bad"); err == nil {
+		t.Fatalf("expected unmarshal error")
+	}
+}
+
+func TestStore_ListErrors(t *testing.T) {
+	store, err := NewStore(t.TempDir())
+	if err != nil {
+		t.Fatalf("NewStore error: %v", err)
+	}
+	store.crypto = nil
+
+	store.dataDir = filepath.Join(t.TempDir(), "missing")
+	list, err := store.List()
+	if err != nil || len(list) != 0 {
+		t.Fatalf("expected empty list for missing dir")
+	}
+
+	file := filepath.Join(t.TempDir(), "file")
+	if err := os.WriteFile(file, []byte("x"), 0600); err != nil {
+		t.Fatalf("WriteFile error: %v", err)
+	}
+	store.dataDir = file
+	if _, err := store.List(); err == nil {
+		t.Fatalf("expected list error for file path")
+	}
+	if _, err := store.ListByType(ResourceTypeDocker); err == nil {
+		t.Fatalf("expected list by type error")
+	}
+	if _, err := store.ListByHost("host1"); err == nil {
+		t.Fatalf("expected list by host error")
+	}
+}
+
+func TestStore_DeleteError(t *testing.T) {
+	store, err := NewStore(t.TempDir())
+	if err != nil {
+		t.Fatalf("NewStore error: %v", err)
+	}
+	store.crypto = nil
+
+	id := MakeResourceID(ResourceTypeDocker, "host1", "dir")
+	filePath := store.getFilePath(id)
+	if err := os.MkdirAll(filePath, 0700); err != nil {
+		t.Fatalf("MkdirAll error: %v", err)
+	}
+	nested := filepath.Join(filePath, "nested")
+	if err := os.WriteFile(nested, []byte("x"), 0600); err != nil {
+		t.Fatalf("WriteFile error: %v", err)
+	}
+	if err := store.Delete(id); err == nil {
+		t.Fatalf("expected delete error for non-empty dir")
+	}
+}
diff --git a/internal/servicediscovery/tools_adapter.go b/internal/servicediscovery/tools_adapter.go
new file mode 100644
index 000000000..143063a28
--- /dev/null
+++ b/internal/servicediscovery/tools_adapter.go
@@ -0,0 +1,226 @@
+package servicediscovery
+
+import (
+	"context"
+
+	"github.com/rcourtman/pulse-go-rewrite/internal/ai/tools"
+)
+
+// ToolsAdapter wraps Service to implement tools.DiscoverySource
+type ToolsAdapter struct {
+	service *Service
+}
+
+// NewToolsAdapter creates a new adapter for the discovery service
+func NewToolsAdapter(service *Service) *ToolsAdapter {
+	if service == nil {
+		return nil
+	}
+	return &ToolsAdapter{service: service}
+}
+
+// GetDiscovery implements tools.DiscoverySource
+func (a *ToolsAdapter) GetDiscovery(id string) (tools.DiscoverySourceData, error) {
+	discovery, err := a.service.GetDiscovery(id)
+	if err != nil {
+		return tools.DiscoverySourceData{}, err
+	}
+	if discovery == nil {
+		return tools.DiscoverySourceData{}, nil
+	}
+	return a.convertToSourceData(discovery), nil
+}
+
+// GetDiscoveryByResource implements tools.DiscoverySource
+func (a *ToolsAdapter) GetDiscoveryByResource(resourceType, hostID, resourceID string) (tools.DiscoverySourceData, error) {
+	discovery, err := a.service.GetDiscoveryByResource(ResourceType(resourceType), hostID, resourceID)
+	if err != nil {
+		return tools.DiscoverySourceData{}, err
+	}
+	if discovery == nil {
+		return tools.DiscoverySourceData{}, nil
+	}
+	return a.convertToSourceData(discovery), nil
+}
+
+// ListDiscoveries implements tools.DiscoverySource
+func (a *ToolsAdapter) ListDiscoveries() ([]tools.DiscoverySourceData, error) {
+	discoveries, err := a.service.ListDiscoveries()
+	if err != nil {
+		return nil, err
+	}
+	return a.convertList(discoveries), nil
+}
+
+// ListDiscoveriesByType implements tools.DiscoverySource
+func (a *ToolsAdapter) ListDiscoveriesByType(resourceType string) ([]tools.DiscoverySourceData, error) {
+	discoveries, err := a.service.ListDiscoveriesByType(ResourceType(resourceType))
+	if err != nil {
+		return nil, err
+	}
+	return a.convertList(discoveries), nil
+}
+
+// ListDiscoveriesByHost implements tools.DiscoverySource
+func (a *ToolsAdapter) ListDiscoveriesByHost(hostID string) ([]tools.DiscoverySourceData, error) {
+	discoveries, err := a.service.ListDiscoveriesByHost(hostID)
+	if err != nil {
+		return nil, err
+	}
+	return a.convertList(discoveries), nil
+}
+
+// FormatForAIContext implements tools.DiscoverySource
+func (a *ToolsAdapter) FormatForAIContext(sourceData []tools.DiscoverySourceData) string {
+	// Convert back to ResourceDiscovery for formatting
+	discoveries := make([]*ResourceDiscovery, 0, len(sourceData))
+	for _, sd := range sourceData {
+		discoveries = append(discoveries, a.convertFromSourceData(sd))
+	}
+	return FormatForAIContext(discoveries)
+}
+
+// TriggerDiscovery implements tools.DiscoverySource - initiates discovery for a resource
+func (a *ToolsAdapter) TriggerDiscovery(ctx context.Context, resourceType, hostID, resourceID string) (tools.DiscoverySourceData, error) {
+	req := DiscoveryRequest{
+		ResourceType: ResourceType(resourceType),
+		HostID:       hostID,
+		ResourceID:   resourceID,
+		Force:        false, // Don't force if recently discovered
+	}
+
+	discovery, err := a.service.DiscoverResource(ctx, req)
+	if err != nil {
+		return tools.DiscoverySourceData{}, err
+	}
+	if discovery == nil {
+		return tools.DiscoverySourceData{}, nil
+	}
+	return a.convertToSourceData(discovery), nil
+}
+
+func (a *ToolsAdapter) convertToSourceData(d *ResourceDiscovery) tools.DiscoverySourceData {
+	facts := make([]tools.DiscoverySourceFact, 0, len(d.Facts))
+	for _, f := range d.Facts {
+		facts = append(facts, tools.DiscoverySourceFact{
+			Category:   string(f.Category),
+			Key:        f.Key,
+			Value:      f.Value,
+			Source:     f.Source,
+			Confidence: f.Confidence,
+		})
+	}
+
+	ports := make([]tools.DiscoverySourcePort, 0, len(d.Ports))
+	for _, p := range d.Ports {
+		ports = append(ports, tools.DiscoverySourcePort{
+			Port:     p.Port,
+			Protocol: p.Protocol,
+			Process:  p.Process,
+			Address:  p.Address,
+		})
+	}
+
+	dockerMounts := make([]tools.DiscoverySourceDockerMount, 0, len(d.DockerMounts))
+	for _, m := range d.DockerMounts {
+		dockerMounts = append(dockerMounts, tools.DiscoverySourceDockerMount{
+			ContainerName: m.ContainerName,
+			Source:        m.Source,
+			Destination:   m.Destination,
+			Type:          m.Type,
+			ReadOnly:      m.ReadOnly,
+		})
+	}
+
+	return tools.DiscoverySourceData{
+		ID:             d.ID,
+		ResourceType:   string(d.ResourceType),
+		ResourceID:     d.ResourceID,
+		HostID:         d.HostID,
+		Hostname:       d.Hostname,
+		ServiceType:    d.ServiceType,
+		ServiceName:    d.ServiceName,
+		ServiceVersion: d.ServiceVersion,
+		Category:       string(d.Category),
+		CLIAccess:      d.CLIAccess,
+		Facts:          facts,
+		ConfigPaths:    d.ConfigPaths,
+		DataPaths:      d.DataPaths,
+		LogPaths:       d.LogPaths,
+		Ports:          ports,
+		DockerMounts:   dockerMounts,
+		UserNotes:      d.UserNotes,
+		Confidence:     d.Confidence,
+		AIReasoning:    d.AIReasoning,
+		DiscoveredAt:   d.DiscoveredAt,
+		UpdatedAt:      d.UpdatedAt,
+	}
+}
+
+func (a *ToolsAdapter) convertFromSourceData(sd tools.DiscoverySourceData) *ResourceDiscovery {
+	facts := make([]DiscoveryFact, 0, len(sd.Facts))
+	for _, f := range sd.Facts {
+		facts = append(facts, DiscoveryFact{
+			Category:   FactCategory(f.Category),
+			Key:        f.Key,
+			Value:      f.Value,
+			Source:     f.Source,
+			Confidence: f.Confidence,
+		})
+	}
+
+	ports := make([]PortInfo, 0, len(sd.Ports))
+	for _, p := range sd.Ports {
+		ports = append(ports, PortInfo{
+			Port:     p.Port,
+			Protocol: p.Protocol,
+			Process:  p.Process,
+			Address:  p.Address,
+		})
+	}
+
+	dockerMounts := make([]DockerBindMount, 0, len(sd.DockerMounts))
+	for _, m := range sd.DockerMounts {
+		dockerMounts = append(dockerMounts, DockerBindMount{
+			ContainerName: m.ContainerName,
+			Source:        m.Source,
+			Destination:   m.Destination,
+			Type:          m.Type,
+			ReadOnly:      m.ReadOnly,
+		})
+	}
+
+	return &ResourceDiscovery{
+		ID:             sd.ID,
+		ResourceType:   ResourceType(sd.ResourceType),
+		ResourceID:     sd.ResourceID,
+		HostID:         sd.HostID,
+		Hostname:       sd.Hostname,
+		ServiceType:    sd.ServiceType,
+		ServiceName:    sd.ServiceName,
+		ServiceVersion: sd.ServiceVersion,
+		Category:       ServiceCategory(sd.Category),
+		CLIAccess:      sd.CLIAccess,
+		Facts:          facts,
+		ConfigPaths:    sd.ConfigPaths,
+		DataPaths:      sd.DataPaths,
+		LogPaths:       sd.LogPaths,
+		Ports:          ports,
+		DockerMounts:   dockerMounts,
+		UserNotes:      sd.UserNotes,
+		Confidence:     sd.Confidence,
+		AIReasoning:    sd.AIReasoning,
+		DiscoveredAt:   sd.DiscoveredAt,
+		UpdatedAt:      sd.UpdatedAt,
+	}
+}
+
+func (a *ToolsAdapter) convertList(discoveries []*ResourceDiscovery) []tools.DiscoverySourceData {
+	result := make([]tools.DiscoverySourceData, 0, len(discoveries))
+	for _, d := range discoveries {
+		if d != nil {
+			result = append(result, a.convertToSourceData(d))
+		}
+	}
+	return result
+}
diff --git a/internal/servicediscovery/types.go b/internal/servicediscovery/types.go
new file mode 100644
index 000000000..7bc8b8362
--- /dev/null
+++ b/internal/servicediscovery/types.go
@@ -0,0 +1,298 @@
+// Package discovery provides AI-powered infrastructure discovery capabilities.
+// It discovers services, versions, configurations, and CLI access methods
+// for VMs, LXCs, Docker containers, Kubernetes pods, and hosts.
+package servicediscovery
+
+import (
+	"fmt"
+	"time"
+)
+
+// ResourceType identifies the type of infrastructure resource.
+type ResourceType string
+
+const (
+	ResourceTypeVM        ResourceType = "vm"
+	ResourceTypeLXC       ResourceType = "lxc"
+	ResourceTypeDocker    ResourceType = "docker"
+	ResourceTypeK8s       ResourceType = "k8s"
+	ResourceTypeHost      ResourceType = "host"
+	ResourceTypeDockerVM  ResourceType = "docker_vm"  // Docker on a VM
+	ResourceTypeDockerLXC ResourceType = "docker_lxc" // Docker in an LXC
+)
+
+// FactCategory categorizes discovery facts.
+type FactCategory string
+
+const (
+	FactCategoryVersion    FactCategory = "version"
+	FactCategoryConfig     FactCategory = "config"
+	FactCategoryService    FactCategory = "service"
+	FactCategoryPort       FactCategory = "port"
+	FactCategoryHardware   FactCategory = "hardware"
+	FactCategoryNetwork    FactCategory = "network"
+	FactCategoryStorage    FactCategory = "storage"
+	FactCategoryDependency FactCategory = "dependency"
+	FactCategorySecurity   FactCategory = "security"
+)
+
+// ServiceCategory categorizes the type of service discovered.
+type ServiceCategory string
+
+const (
+	CategoryDatabase     ServiceCategory = "database"
+	CategoryWebServer    ServiceCategory = "web_server"
+	CategoryCache        ServiceCategory = "cache"
+	CategoryMessageQueue ServiceCategory = "message_queue"
+	CategoryMonitoring   ServiceCategory = "monitoring"
+	CategoryBackup       ServiceCategory = "backup"
+	CategoryNVR          ServiceCategory = "nvr"
+	CategoryStorage      ServiceCategory = "storage"
+	CategoryContainer    ServiceCategory = "container"
+	CategoryVirtualizer  ServiceCategory = "virtualizer"
+	CategoryNetwork      ServiceCategory = "network"
+	CategorySecurity     ServiceCategory = "security"
+	CategoryMedia        ServiceCategory = "media"
+	CategoryHomeAuto     ServiceCategory = "home_automation"
+	CategoryUnknown      ServiceCategory = "unknown"
+)
+
+// ResourceDiscovery is the main data model for discovered resource information.
+type ResourceDiscovery struct {
+	// Identity
+	ID           string       `json:"id"`            // Unique ID: "lxc:minipc:101"
+	ResourceType ResourceType `json:"resource_type"` // vm, lxc, docker, k8s, host
+	ResourceID   string       `json:"resource_id"`   // 101, container-name, etc.
+	HostID       string       `json:"host_id"`       // Proxmox node name or host agent ID
+	Hostname     string       `json:"hostname"`      // Human-readable host name
+
+	// AI-discovered info
+	ServiceType    string          `json:"service_type"`    // frigate, postgres, pbs
+	ServiceName    string          `json:"service_name"`    // Human-readable name
+	ServiceVersion string          `json:"service_version"` // v0.13.2
+	Category       ServiceCategory `json:"category"`        // nvr, database, backup
+	CLIAccess      string          `json:"cli_access"`      // pct exec 101 -- ...
+
+	// Deep discovery facts
+	Facts        []DiscoveryFact   `json:"facts"`
+	ConfigPaths  []string          `json:"config_paths"`
+	DataPaths    []string          `json:"data_paths"`
+	LogPaths     []string          `json:"log_paths"`
+	Ports        []PortInfo        `json:"ports"`
+	DockerMounts []DockerBindMount `json:"docker_mounts,omitempty"` // Docker container bind mounts (source->dest)
+
+	// User-added (also encrypted)
+	UserNotes   string            `json:"user_notes"`
+	UserSecrets map[string]string `json:"user_secrets"` // tokens, creds
+
+	// Metadata
+	Confidence   float64   `json:"confidence"`    // 0-1 confidence score
+	AIReasoning  string    `json:"ai_reasoning"`  // AI explanation
+	DiscoveredAt time.Time `json:"discovered_at"` // First discovery
+	UpdatedAt    time.Time `json:"updated_at"`    // Last update
+	ScanDuration int64     `json:"scan_duration"` // Scan duration in ms
+
+	// Fingerprint tracking for just-in-time discovery
+	Fingerprint              string    `json:"fingerprint,omitempty"`                // Hash when discovery was done
+	FingerprintedAt          time.Time `json:"fingerprinted_at,omitempty"`           // When fingerprint was captured
+	FingerprintSchemaVersion int       `json:"fingerprint_schema_version,omitempty"` // Schema version when fingerprint was captured
+	CLIAccessVersion         int       `json:"cli_access_version,omitempty"`         // Version of CLI access pattern format
+
+	// Raw data for debugging/re-analysis
+	RawCommandOutput map[string]string `json:"raw_command_output,omitempty"`
+}
+
+// DiscoveryFact represents a single discovered fact about a resource.
+type DiscoveryFact struct {
+	Category     FactCategory `json:"category"`   // version, config, service, port
+	Key          string       `json:"key"`        // e.g., "coral_tpu", "mqtt_broker"
+	Value        string       `json:"value"`      // e.g., "/dev/apex_0", "mosquitto:1883"
+	Source       string       `json:"source"`     // command that found this
+	Confidence   float64      `json:"confidence"` // 0-1 confidence for this fact
+	DiscoveredAt time.Time    `json:"discovered_at"`
+}
+
+// PortInfo represents information about a listening port.
+type PortInfo struct {
+	Port     int    `json:"port"`
+	Protocol string `json:"protocol"` // tcp, udp
+	Process  string `json:"process"`  // process name
+	Address  string `json:"address"`  // bind address
+}
+
+// DockerBindMount represents a Docker bind mount with source and destination paths.
+// This is critical for knowing where to actually edit files - the source path on the
+// host filesystem, not the destination path inside the container.
+type DockerBindMount struct {
+	ContainerName string `json:"container_name"`      // Docker container name
+	Source        string `json:"source"`              // Host path (where to actually write files)
+	Destination   string `json:"destination"`         // Container path (what the service sees)
+	Type          string `json:"type,omitempty"`      // Mount type: bind, volume, tmpfs
+	ReadOnly      bool   `json:"read_only,omitempty"` // Whether mount is read-only
+}
+
+// MakeResourceID creates a standardized resource ID.
+func MakeResourceID(resourceType ResourceType, hostID, resourceID string) string {
+	return fmt.Sprintf("%s:%s:%s", resourceType, hostID, resourceID)
+}
+
+// ParseResourceID parses a resource ID into its components.
+func ParseResourceID(id string) (resourceType ResourceType, hostID, resourceID string, err error) {
+	var parts [3]string
+	count := 0
+	start := 0
+	for i, c := range id {
+		if c == ':' {
+			if count < 2 {
+				parts[count] = id[start:i]
+				count++
+				start = i + 1
+			}
+		}
+	}
+	if count == 2 {
+		parts[2] = id[start:]
+		return ResourceType(parts[0]), parts[1], parts[2], nil
+	}
+	return "", "", "", fmt.Errorf("invalid resource ID format: %s", id)
+}
+
+// DiscoveryRequest represents a request to discover a resource.
+type DiscoveryRequest struct {
+	ResourceType ResourceType `json:"resource_type"`
+	ResourceID   string       `json:"resource_id"`
+	HostID       string       `json:"host_id"`
+	Hostname     string       `json:"hostname"`
+	Force        bool         `json:"force"` // Force re-scan even if recent
+}
+
+// DiscoveryStatus represents the status of a discovery scan.
+type DiscoveryStatus string
+
+const (
+	DiscoveryStatusPending    DiscoveryStatus = "pending"
+	DiscoveryStatusRunning    DiscoveryStatus = "running"
+	DiscoveryStatusCompleted  DiscoveryStatus = "completed"
+	DiscoveryStatusFailed     DiscoveryStatus = "failed"
+	DiscoveryStatusNotStarted DiscoveryStatus = "not_started"
+)
+
+// DiscoveryProgress represents the progress of an ongoing discovery.
+type DiscoveryProgress struct {
+	ResourceID      string          `json:"resource_id"`
+	Status          DiscoveryStatus `json:"status"`
+	CurrentStep     string          `json:"current_step"`
+	CurrentCommand  string          `json:"current_command,omitempty"`
+	TotalSteps      int             `json:"total_steps"`
+	CompletedSteps  int             `json:"completed_steps"`
+	ElapsedMs       int64           `json:"elapsed_ms,omitempty"`
+	PercentComplete float64         `json:"percent_complete,omitempty"`
+	StartedAt       time.Time       `json:"started_at"`
+	Error           string          `json:"error,omitempty"`
+}
+
+// UpdateNotesRequest represents a request to update user notes.
+type UpdateNotesRequest struct {
+	UserNotes   string            `json:"user_notes"`
+	UserSecrets map[string]string `json:"user_secrets,omitempty"`
+}
+
+// DiscoverySummary provides a summary of discoveries for listing.
+type DiscoverySummary struct {
+	ID             string          `json:"id"`
+	ResourceType   ResourceType    `json:"resource_type"`
+	ResourceID     string          `json:"resource_id"`
+	HostID         string          `json:"host_id"`
+	Hostname       string          `json:"hostname"`
+	ServiceType    string          `json:"service_type"`
+	ServiceName    string          `json:"service_name"`
+	ServiceVersion string          `json:"service_version"`
+	Category       ServiceCategory `json:"category"`
+	Confidence     float64         `json:"confidence"`
+	HasUserNotes   bool            `json:"has_user_notes"`
+	UpdatedAt      time.Time       `json:"updated_at"`
+	Fingerprint    string          `json:"fingerprint,omitempty"` // Current fingerprint
+	NeedsDiscovery bool            `json:"needs_discovery"`       // True if fingerprint changed
+}
+
+// ToSummary converts a full discovery to a summary.
+func (d *ResourceDiscovery) ToSummary() DiscoverySummary {
+	return DiscoverySummary{
+		ID:             d.ID,
+		ResourceType:   d.ResourceType,
+		ResourceID:     d.ResourceID,
+		HostID:         d.HostID,
+		Hostname:       d.Hostname,
+		ServiceType:    d.ServiceType,
+		ServiceName:    d.ServiceName,
+		ServiceVersion: d.ServiceVersion,
+		Category:       d.Category,
+		Confidence:     d.Confidence,
+		HasUserNotes:   d.UserNotes != "",
+		UpdatedAt:      d.UpdatedAt,
+		Fingerprint:    d.Fingerprint,
+		NeedsDiscovery: false, // Will be set by caller if fingerprint changed
+	}
+}
+
+// AIAnalysisRequest is sent to the AI for analysis.
+type AIAnalysisRequest struct {
+	ResourceType   ResourceType      `json:"resource_type"`
+	ResourceID     string            `json:"resource_id"`
+	HostID         string            `json:"host_id"`
+	Hostname       string            `json:"hostname"`
+	CommandOutputs map[string]string `json:"command_outputs"`
+	ExistingFacts  []DiscoveryFact   `json:"existing_facts,omitempty"`
+	Metadata       map[string]any    `json:"metadata,omitempty"` // Image, labels, etc.
+}
+
+// AIAnalysisResponse is returned by the AI.
+type AIAnalysisResponse struct {
+	ServiceType    string          `json:"service_type"`
+	ServiceName    string          `json:"service_name"`
+	ServiceVersion string          `json:"service_version"`
+	Category       ServiceCategory `json:"category"`
+	CLIAccess      string          `json:"cli_access"`
+	Facts          []DiscoveryFact `json:"facts"`
+	ConfigPaths    []string        `json:"config_paths"`
+	DataPaths      []string        `json:"data_paths"`
+	LogPaths       []string        `json:"log_paths"`
+	Ports          []PortInfo      `json:"ports"`
+	Confidence     float64         `json:"confidence"`
+	Reasoning      string          `json:"reasoning"`
+}
+
+// ContainerFingerprint captures the key metadata that indicates a container changed.
+// This is used for just-in-time discovery - only running discovery when something
+// actually changed rather than on a fixed timer.
+// FingerprintSchemaVersion is incremented when the fingerprint algorithm changes.
+// This prevents mass rediscovery when we add new fields to the fingerprint hash.
+// Old fingerprints with different schema versions are treated as "schema changed"
+// rather than "container changed", allowing for more controlled migration.
+const FingerprintSchemaVersion = 3 // v3: Removed IP addresses (DHCP churn caused false positives)
+
+// CLIAccessVersion is incremented when the CLI access pattern format changes.
+// When a discovery has an older version, its CLIAccess field is regenerated
+// to use the new instructional format.
+const CLIAccessVersion = 2 // v2: Changed from shell commands to pulse_control instructions
+
+type ContainerFingerprint struct {
+	ResourceID    string    `json:"resource_id"`
+	HostID        string    `json:"host_id"`
+	Hash          string    `json:"hash"`           // SHA256 of metadata (truncated to 16 chars)
+	SchemaVersion int       `json:"schema_version"` // Version of fingerprint algorithm
+	GeneratedAt   time.Time `json:"generated_at"`
+
+	// Components that went into the hash (for debugging)
+	ImageID    string   `json:"image_id,omitempty"`
+	ImageName  string   `json:"image_name,omitempty"`
+	Ports      []string `json:"ports,omitempty"`
+	MountPaths []string `json:"mount_paths,omitempty"`
+	EnvKeys    []string `json:"env_keys,omitempty"`   // Keys only, not values (security)
+	CreatedAt  string   `json:"created_at,omitempty"` // Container creation time
+}
+
+// IsSchemaOutdated returns true if this fingerprint was created with an older schema.
+func (fp *ContainerFingerprint) IsSchemaOutdated() bool {
+	return fp.SchemaVersion < FingerprintSchemaVersion
+}
diff --git a/internal/servicediscovery/types_test.go b/internal/servicediscovery/types_test.go
new file mode 100644
index 000000000..34646a9ac
--- /dev/null
+++ b/internal/servicediscovery/types_test.go
@@ -0,0 +1,22 @@
+package servicediscovery
+
+import "testing"
+
+func TestResourceIDHelpers(t *testing.T) {
+	id := MakeResourceID(ResourceTypeDocker, "host1", "app")
+	if id != "docker:host1:app" {
+		t.Fatalf("unexpected id: %s", id)
+	}
+
+	rt, host, res, err := ParseResourceID(id)
+	if err != nil {
+		t.Fatalf("ParseResourceID error: %v", err)
+	}
+	if rt != ResourceTypeDocker || host != "host1" || res != "app" {
+		t.Fatalf("unexpected parse result: %s %s %s", rt, host, res)
+	}
+
+	if _, _, _, err := ParseResourceID("invalid"); err == nil {
+		t.Fatalf("expected parse error for invalid id")
+	}
+}
diff --git a/internal/websocket/hub_tenant_test.go b/internal/websocket/hub_tenant_test.go
index 085e29a17..cd3e0ed21 100644
--- a/internal/websocket/hub_tenant_test.go
+++ b/internal/websocket/hub_tenant_test.go
@@ -83,6 +83,8 @@ func TestHub_Setters_Coverage(t *testing.T) {
 func TestHub_DispatchToTenantClients(t *testing.T) {
 	// This tests the internal logic of iterating clients
 	hub := NewHub(nil)
+	go hub.Run()
+	defer hub.Stop()
 
 	// Create a mock client
 	client := &Client{
@@ -93,11 +95,9 @@ func TestHub_DispatchToTenantClients(t *testing.T) {
 	}
 
 	// Manually register (simulating register channel)
-	hub.clients[client] = true
 	hub.register <- client
 
 	// Allow registration to process
-	go hub.Run()
 	time.Sleep(50 * time.Millisecond)
 
 	// Now broadcast to org1 (internal method)