diff --git a/internal/ai/tools/adapters.go b/internal/ai/tools/adapters.go index 5c07f8e8a..70fc7ca06 100644 --- a/internal/ai/tools/adapters.go +++ b/internal/ai/tools/adapters.go @@ -1,11 +1,15 @@ package tools import ( + "context" "fmt" + "sort" + "strings" "time" "github.com/rcourtman/pulse-go-rewrite/internal/alerts" "github.com/rcourtman/pulse-go-rewrite/internal/models" + "github.com/rcourtman/pulse-go-rewrite/pkg/proxmox" ) // StateGetter provides access to the current infrastructure state @@ -88,6 +92,128 @@ func (a *StorageMCPAdapter) GetCephClusters() []models.CephCluster { return state.CephClusters } +// StorageConfigSource provides storage configuration data with context. +type StorageConfigSource interface { + GetStorageConfig(ctx context.Context, instance string) (map[string][]proxmox.Storage, error) +} + +// StorageConfigMCPAdapter adapts monitoring storage config access to MCP StorageConfigProvider interface. +type StorageConfigMCPAdapter struct { + source StorageConfigSource + timeout time.Duration +} + +// NewStorageConfigMCPAdapter creates a new adapter for storage config data. +func NewStorageConfigMCPAdapter(source StorageConfigSource) *StorageConfigMCPAdapter { + if source == nil { + return nil + } + return &StorageConfigMCPAdapter{ + source: source, + timeout: 5 * time.Second, + } +} + +// GetStorageConfig implements mcp.StorageConfigProvider. +func (a *StorageConfigMCPAdapter) GetStorageConfig(instance string) ([]StorageConfigSummary, error) { + if a == nil || a.source == nil { + return nil, fmt.Errorf("storage config source not available") + } + ctx, cancel := context.WithTimeout(context.Background(), a.timeout) + defer cancel() + + storageByInstance, err := a.source.GetStorageConfig(ctx, instance) + if err != nil { + return nil, err + } + + result := make([]StorageConfigSummary, 0) + seen := make(map[string]bool) + for inst, storages := range storageByInstance { + for _, storage := range storages { + key := inst + ":" + storage.Storage + if seen[key] { + continue + } + seen[key] = true + entry := StorageConfigSummary{ + ID: storage.Storage, + Name: storage.Storage, + Instance: inst, + Type: storage.Type, + Content: storage.Content, + Nodes: parseStorageConfigNodes(storage.Nodes), + Path: storage.Path, + Shared: storage.Shared == 1, + Enabled: storage.Enabled == 1, + Active: storage.Active == 1, + } + result = append(result, entry) + } + } + + sort.Slice(result, func(i, j int) bool { + if result[i].Instance != result[j].Instance { + return result[i].Instance < result[j].Instance + } + return result[i].ID < result[j].ID + }) + + return result, nil +} + +func parseStorageConfigNodes(nodes string) []string { + nodes = strings.TrimSpace(nodes) + if nodes == "" { + return nil + } + parts := strings.Split(nodes, ",") + result := make([]string, 0, len(parts)) + for _, part := range parts { + node := strings.TrimSpace(part) + if node == "" { + continue + } + result = append(result, node) + } + if len(result) == 0 { + return nil + } + return result +} + +// GuestConfigSource provides guest configuration data with context. +type GuestConfigSource interface { + GetGuestConfig(ctx context.Context, guestType, instance, node string, vmid int) (map[string]interface{}, error) +} + +// GuestConfigMCPAdapter adapts monitoring config access to MCP GuestConfigProvider interface. +type GuestConfigMCPAdapter struct { + source GuestConfigSource + timeout time.Duration +} + +// NewGuestConfigMCPAdapter creates a new adapter for guest config data. +func NewGuestConfigMCPAdapter(source GuestConfigSource) *GuestConfigMCPAdapter { + if source == nil { + return nil + } + return &GuestConfigMCPAdapter{ + source: source, + timeout: 5 * time.Second, + } +} + +// GetGuestConfig implements mcp.GuestConfigProvider. +func (a *GuestConfigMCPAdapter) GetGuestConfig(guestType, instance, node string, vmid int) (map[string]interface{}, error) { + if a == nil || a.source == nil { + return nil, fmt.Errorf("guest config source not available") + } + ctx, cancel := context.WithTimeout(context.Background(), a.timeout) + defer cancel() + return a.source.GetGuestConfig(ctx, guestType, instance, node, vmid) +} + // BackupMCPAdapter adapts the monitor state to MCP BackupProvider interface type BackupMCPAdapter struct { stateGetter StateGetter @@ -746,6 +872,8 @@ type DiscoverySource interface { ListDiscoveriesByType(resourceType string) ([]DiscoverySourceData, error) ListDiscoveriesByHost(hostID string) ([]DiscoverySourceData, error) FormatForAIContext(discoveries []DiscoverySourceData) string + // TriggerDiscovery initiates discovery for a resource, returning discovered data + TriggerDiscovery(ctx context.Context, resourceType, hostID, resourceID string) (DiscoverySourceData, error) } // DiscoverySourceData represents discovery data from the source @@ -763,6 +891,9 @@ type DiscoverySourceData struct { Facts []DiscoverySourceFact ConfigPaths []string DataPaths []string + LogPaths []string + Ports []DiscoverySourcePort + DockerMounts []DiscoverySourceDockerMount // Docker bind mounts (for LXCs/VMs running Docker) UserNotes string Confidence float64 AIReasoning string @@ -770,15 +901,33 @@ type DiscoverySourceData struct { UpdatedAt time.Time } -// DiscoverySourceFact represents a fact from the source -type DiscoverySourceFact struct { - Category string - Key string - Value string - Source string +// DiscoverySourceDockerMount represents a Docker bind mount from the source +type DiscoverySourceDockerMount struct { + ContainerName string // Docker container name + Source string // Host path (where to actually write files) + Destination string // Container path (what the service sees) + Type string // Mount type: bind, volume, tmpfs + ReadOnly bool // Whether mount is read-only } -// DiscoveryMCPAdapter adapts aidiscovery.Service to MCP DiscoveryProvider interface +// DiscoverySourcePort represents a port from the source +type DiscoverySourcePort struct { + Port int + Protocol string + Process string + Address string +} + +// DiscoverySourceFact represents a fact from the source +type DiscoverySourceFact struct { + Category string + Key string + Value string + Source string + Confidence float64 // 0-1 confidence for this fact +} + +// DiscoveryMCPAdapter adapts servicediscovery.Service to MCP DiscoveryProvider interface type DiscoveryMCPAdapter struct { source DiscoverySource } @@ -876,10 +1025,30 @@ func (a *DiscoveryMCPAdapter) FormatForAIContext(discoveries []*ResourceDiscover facts := make([]DiscoverySourceFact, 0, len(d.Facts)) for _, f := range d.Facts { facts = append(facts, DiscoverySourceFact{ - Category: f.Category, - Key: f.Key, - Value: f.Value, - Source: f.Source, + Category: f.Category, + Key: f.Key, + Value: f.Value, + Source: f.Source, + Confidence: f.Confidence, + }) + } + ports := make([]DiscoverySourcePort, 0, len(d.Ports)) + for _, p := range d.Ports { + ports = append(ports, DiscoverySourcePort{ + Port: p.Port, + Protocol: p.Protocol, + Process: p.Process, + Address: p.Address, + }) + } + dockerMounts := make([]DiscoverySourceDockerMount, 0, len(d.BindMounts)) + for _, m := range d.BindMounts { + dockerMounts = append(dockerMounts, DiscoverySourceDockerMount{ + ContainerName: m.ContainerName, + Source: m.Source, + Destination: m.Destination, + Type: m.Type, + ReadOnly: m.ReadOnly, }) } sourceData = append(sourceData, DiscoverySourceData{ @@ -896,6 +1065,8 @@ func (a *DiscoveryMCPAdapter) FormatForAIContext(discoveries []*ResourceDiscover Facts: facts, ConfigPaths: d.ConfigPaths, DataPaths: d.DataPaths, + Ports: ports, + DockerMounts: dockerMounts, UserNotes: d.UserNotes, Confidence: d.Confidence, AIReasoning: d.AIReasoning, @@ -907,6 +1078,20 @@ func (a *DiscoveryMCPAdapter) FormatForAIContext(discoveries []*ResourceDiscover return a.source.FormatForAIContext(sourceData) } +// TriggerDiscovery implements tools.DiscoveryProvider +func (a *DiscoveryMCPAdapter) TriggerDiscovery(ctx context.Context, resourceType, hostID, resourceID string) (*ResourceDiscoveryInfo, error) { + if a.source == nil { + return nil, fmt.Errorf("discovery source not available") + } + + data, err := a.source.TriggerDiscovery(ctx, resourceType, hostID, resourceID) + if err != nil { + return nil, err + } + + return a.convertToInfo(data), nil +} + func (a *DiscoveryMCPAdapter) convertToInfo(data DiscoverySourceData) *ResourceDiscoveryInfo { if data.ID == "" { return nil @@ -915,10 +1100,33 @@ func (a *DiscoveryMCPAdapter) convertToInfo(data DiscoverySourceData) *ResourceD facts := make([]DiscoveryFact, 0, len(data.Facts)) for _, f := range data.Facts { facts = append(facts, DiscoveryFact{ - Category: f.Category, - Key: f.Key, - Value: f.Value, - Source: f.Source, + Category: f.Category, + Key: f.Key, + Value: f.Value, + Source: f.Source, + Confidence: f.Confidence, + }) + } + + ports := make([]DiscoveryPortInfo, 0, len(data.Ports)) + for _, p := range data.Ports { + ports = append(ports, DiscoveryPortInfo{ + Port: p.Port, + Protocol: p.Protocol, + Process: p.Process, + Address: p.Address, + }) + } + + // Convert DockerMounts to BindMounts + bindMounts := make([]DiscoveryMount, 0, len(data.DockerMounts)) + for _, m := range data.DockerMounts { + bindMounts = append(bindMounts, DiscoveryMount{ + ContainerName: m.ContainerName, + Source: m.Source, + Destination: m.Destination, + Type: m.Type, + ReadOnly: m.ReadOnly, }) } @@ -936,6 +1144,9 @@ func (a *DiscoveryMCPAdapter) convertToInfo(data DiscoverySourceData) *ResourceD Facts: facts, ConfigPaths: data.ConfigPaths, DataPaths: data.DataPaths, + LogPaths: data.LogPaths, + Ports: ports, + BindMounts: bindMounts, UserNotes: data.UserNotes, Confidence: data.Confidence, AIReasoning: data.AIReasoning, diff --git a/internal/ai/tools/control_run_command_test.go b/internal/ai/tools/control_run_command_test.go index fa8713ddc..2df6c2608 100644 --- a/internal/ai/tools/control_run_command_test.go +++ b/internal/ai/tools/control_run_command_test.go @@ -6,8 +6,10 @@ import ( "github.com/rcourtman/pulse-go-rewrite/internal/agentexec" "github.com/rcourtman/pulse-go-rewrite/internal/ai/approval" + "github.com/rcourtman/pulse-go-rewrite/internal/models" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" ) func TestPulseToolExecutor_ExecuteRunCommand(t *testing.T) { @@ -65,7 +67,8 @@ func TestPulseToolExecutor_ExecuteRunCommand(t *testing.T) { {AgentID: "agent1", Hostname: "node1"}, }).Twice() agentSrv.On("ExecuteCommand", mock.Anything, "agent1", mock.MatchedBy(func(payload agentexec.ExecuteCommandPayload) bool { - return payload.Command == "uptime" && payload.TargetType == "host" && payload.TargetID == "host1" + // For direct host targets, TargetID is empty - resolveTargetForCommand returns "" for host type + return payload.Command == "uptime" && payload.TargetType == "host" && payload.TargetID == "" })).Return(&agentexec.CommandResultPayload{ Stdout: "ok", ExitCode: 0, @@ -85,6 +88,107 @@ func TestPulseToolExecutor_ExecuteRunCommand(t *testing.T) { }) } +func TestPulseToolExecutor_RunCommandLXCRouting(t *testing.T) { + ctx := context.Background() + + t.Run("LXCCommandRoutedCorrectly", func(t *testing.T) { + // Test that commands targeting LXCs are routed with correct target type/ID + // The agent handles sh -c wrapping, so tool just sends raw command + agents := []agentexec.ConnectedAgent{{AgentID: "proxmox-agent", Hostname: "delly"}} + mockAgent := &mockAgentServer{} + mockAgent.On("GetConnectedAgents").Return(agents) + mockAgent.On("ExecuteCommand", mock.Anything, "proxmox-agent", mock.MatchedBy(func(cmd agentexec.ExecuteCommandPayload) bool { + // Tool sends raw command, agent will wrap in sh -c + return cmd.TargetType == "container" && + cmd.TargetID == "108" && + cmd.Command == "grep pattern /var/log/*.log" + })).Return(&agentexec.CommandResultPayload{ + ExitCode: 0, + Stdout: "matched line", + }, nil) + + state := models.StateSnapshot{ + Containers: []models.Container{ + {VMID: 108, Name: "jellyfin", Node: "delly"}, + }, + } + + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: state}, + AgentServer: mockAgent, + ControlLevel: ControlLevelAutonomous, + }) + result, err := exec.executeRunCommand(ctx, map[string]interface{}{ + "command": "grep pattern /var/log/*.log", + "target_host": "jellyfin", + }) + require.NoError(t, err) + assert.Contains(t, result.Content[0].Text, "Command completed successfully") + mockAgent.AssertExpectations(t) + }) + + t.Run("VMCommandRoutedCorrectly", func(t *testing.T) { + // Test that commands targeting VMs are routed with correct target type/ID + agents := []agentexec.ConnectedAgent{{AgentID: "proxmox-agent", Hostname: "delly"}} + mockAgent := &mockAgentServer{} + mockAgent.On("GetConnectedAgents").Return(agents) + mockAgent.On("ExecuteCommand", mock.Anything, "proxmox-agent", mock.MatchedBy(func(cmd agentexec.ExecuteCommandPayload) bool { + return cmd.TargetType == "vm" && + cmd.TargetID == "100" && + cmd.Command == "ls /tmp/*.txt" + })).Return(&agentexec.CommandResultPayload{ + ExitCode: 0, + Stdout: "result", + }, nil) + + state := models.StateSnapshot{ + VMs: []models.VM{ + {VMID: 100, Name: "test-vm", Node: "delly"}, + }, + } + + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: state}, + AgentServer: mockAgent, + ControlLevel: ControlLevelAutonomous, + }) + result, err := exec.executeRunCommand(ctx, map[string]interface{}{ + "command": "ls /tmp/*.txt", + "target_host": "test-vm", + }) + require.NoError(t, err) + assert.Contains(t, result.Content[0].Text, "Command completed successfully") + mockAgent.AssertExpectations(t) + }) + + t.Run("DirectHostRoutedCorrectly", func(t *testing.T) { + // Direct host commands have target type "host" + agents := []agentexec.ConnectedAgent{{AgentID: "host-agent", Hostname: "tower"}} + mockAgent := &mockAgentServer{} + mockAgent.On("GetConnectedAgents").Return(agents) + mockAgent.On("ExecuteCommand", mock.Anything, "host-agent", mock.MatchedBy(func(cmd agentexec.ExecuteCommandPayload) bool { + return cmd.TargetType == "host" && + cmd.Command == "ls /tmp/*.txt" + })).Return(&agentexec.CommandResultPayload{ + ExitCode: 0, + Stdout: "files", + }, nil) + + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: models.StateSnapshot{}}, + AgentServer: mockAgent, + ControlLevel: ControlLevelAutonomous, + }) + result, err := exec.executeRunCommand(ctx, map[string]interface{}{ + "command": "ls /tmp/*.txt", + "target_host": "tower", + }) + require.NoError(t, err) + assert.Contains(t, result.Content[0].Text, "Command completed successfully") + mockAgent.AssertExpectations(t) + }) +} + func TestPulseToolExecutor_FindAgentForCommand(t *testing.T) { t.Run("NoAgentServer", func(t *testing.T) { exec := NewPulseToolExecutor(ExecutorConfig{}) diff --git a/internal/ai/tools/control_test.go b/internal/ai/tools/control_test.go index 4db58ef7b..12d86528d 100644 --- a/internal/ai/tools/control_test.go +++ b/internal/ai/tools/control_test.go @@ -34,7 +34,10 @@ func TestPulseToolExecutor_ExecuteListBackups(t *testing.T) { backupProv.On("GetBackups").Return(expectedBackups) backupProv.On("GetPBSInstances").Return([]models.PBSInstance{}) - result, err := exec.ExecuteTool(context.Background(), "pulse_list_backups", map[string]interface{}{}) + // Use consolidated pulse_storage tool with type: "backups" + result, err := exec.ExecuteTool(context.Background(), "pulse_storage", map[string]interface{}{ + "type": "backups", + }) assert.NoError(t, err) assert.False(t, result.IsError) } @@ -66,7 +69,9 @@ func TestPulseToolExecutor_ExecuteControlGuest(t *testing.T) { ExitCode: 0, }, nil) - result, err := exec.ExecuteTool(context.Background(), "pulse_control_guest", map[string]interface{}{ + // Use consolidated pulse_control tool with type: "guest" + result, err := exec.ExecuteTool(context.Background(), "pulse_control", map[string]interface{}{ + "type": "guest", "guest_id": "100", "action": "stop", }) @@ -107,9 +112,11 @@ func TestPulseToolExecutor_ExecuteControlDocker(t *testing.T) { ExitCode: 0, }, nil) - result, err := exec.ExecuteTool(context.Background(), "pulse_control_docker", map[string]interface{}{ + // Use consolidated pulse_docker tool with action: "control" + result, err := exec.ExecuteTool(context.Background(), "pulse_docker", map[string]interface{}{ + "action": "control", "container": "nginx", - "action": "restart", + "operation": "restart", }) assert.NoError(t, err) assert.Contains(t, result.Content[0].Text, "Successfully executed 'docker restart'") diff --git a/internal/ai/tools/data_types.go b/internal/ai/tools/data_types.go index 7dc92a8c2..bdc794f69 100644 --- a/internal/ai/tools/data_types.go +++ b/internal/ai/tools/data_types.go @@ -147,14 +147,16 @@ type ResourceSearchResponse struct { // ResourceMatch is a compact match result for pulse_search_resources type ResourceMatch struct { - Type string `json:"type"` // "node", "vm", "container", "docker", "docker_host" - ID string `json:"id,omitempty"` - Name string `json:"name"` - Status string `json:"status,omitempty"` - Node string `json:"node,omitempty"` - Host string `json:"host,omitempty"` - VMID int `json:"vmid,omitempty"` - Image string `json:"image,omitempty"` + Type string `json:"type"` // "node", "vm", "container", "docker", "docker_host" + ID string `json:"id,omitempty"` + Name string `json:"name"` + Status string `json:"status,omitempty"` + Node string `json:"node,omitempty"` // Proxmox node this resource is on + NodeHasAgent bool `json:"node_has_agent,omitempty"` // True if the Proxmox node has a connected agent + Host string `json:"host,omitempty"` // Docker host for docker containers + VMID int `json:"vmid,omitempty"` + Image string `json:"image,omitempty"` + AgentConnected bool `json:"agent_connected,omitempty"` // True if this specific resource has a connected agent } // NodeSummary is a summarized node for list responses @@ -324,6 +326,35 @@ type ResourceResponse struct { UpdateAvailable bool `json:"update_available,omitempty"` } +// GuestConfigResponse is returned by pulse_get_guest_config. +type GuestConfigResponse struct { + GuestType string `json:"guest_type"` + VMID int `json:"vmid"` + Name string `json:"name,omitempty"` + Node string `json:"node,omitempty"` + Instance string `json:"instance,omitempty"` + Hostname string `json:"hostname,omitempty"` + OSType string `json:"os_type,omitempty"` + Onboot *bool `json:"onboot,omitempty"` + RootFS string `json:"rootfs,omitempty"` + Mounts []GuestMountConfig `json:"mounts,omitempty"` + Disks []GuestDiskConfig `json:"disks,omitempty"` + Raw map[string]string `json:"raw,omitempty"` +} + +// GuestMountConfig summarizes a container mount. +type GuestMountConfig struct { + Key string `json:"key"` + Source string `json:"source"` + Mountpoint string `json:"mountpoint,omitempty"` +} + +// GuestDiskConfig summarizes a VM disk definition. +type GuestDiskConfig struct { + Key string `json:"key"` + Value string `json:"value"` +} + // ResourceCPU describes CPU usage type ResourceCPU struct { Percent float64 `json:"percent"` @@ -476,12 +507,37 @@ type StorageResponse struct { Pagination *PaginationInfo `json:"pagination,omitempty"` } +// StorageConfigResponse is returned by pulse_get_storage_config +type StorageConfigResponse struct { + Storages []StorageConfigSummary `json:"storages,omitempty"` +} + +// StorageConfigSummary is a summarized storage config entry +type StorageConfigSummary struct { + ID string `json:"id"` + Name string `json:"name"` + Instance string `json:"instance,omitempty"` + Type string `json:"type,omitempty"` + Content string `json:"content,omitempty"` + Nodes []string `json:"nodes,omitempty"` + Path string `json:"path,omitempty"` + Shared bool `json:"shared"` + Enabled bool `json:"enabled"` + Active bool `json:"active"` +} + // StoragePoolSummary is a summarized storage pool type StoragePoolSummary struct { ID string `json:"id"` Name string `json:"name"` + Node string `json:"node,omitempty"` + Instance string `json:"instance,omitempty"` + Nodes []string `json:"nodes,omitempty"` Type string `json:"type"` Status string `json:"status"` + Enabled bool `json:"enabled"` + Active bool `json:"active"` + Path string `json:"path,omitempty"` UsagePercent float64 `json:"usage_percent"` UsedGB float64 `json:"used_gb"` TotalGB float64 `json:"total_gb"` diff --git a/internal/ai/tools/executor.go b/internal/ai/tools/executor.go index 6905ea632..e5596c742 100644 --- a/internal/ai/tools/executor.go +++ b/internal/ai/tools/executor.go @@ -80,6 +80,16 @@ type StorageProvider interface { GetCephClusters() []models.CephCluster } +// StorageConfigProvider provides storage configuration data. +type StorageConfigProvider interface { + GetStorageConfig(instance string) ([]StorageConfigSummary, error) +} + +// GuestConfigProvider provides guest configuration data (VM/LXC). +type GuestConfigProvider interface { + GetGuestConfig(guestType, instance, node string, vmid int) (map[string]interface{}, error) +} + // DiskHealthProvider provides disk health information from host agents type DiskHealthProvider interface { GetHosts() []models.Host @@ -101,36 +111,148 @@ type DiscoveryProvider interface { ListDiscoveriesByType(resourceType string) ([]*ResourceDiscoveryInfo, error) ListDiscoveriesByHost(hostID string) ([]*ResourceDiscoveryInfo, error) FormatForAIContext(discoveries []*ResourceDiscoveryInfo) string + // TriggerDiscovery initiates discovery for a resource and returns the result + TriggerDiscovery(ctx context.Context, resourceType, hostID, resourceID string) (*ResourceDiscoveryInfo, error) } +// ResolvedResourceInfo contains the minimal information needed for tool validation. +// This is an interface to avoid import cycles with the chat package. +type ResolvedResourceInfo interface { + GetResourceID() string + GetResourceType() string + GetTargetHost() string + GetAgentID() string + GetAdapter() string + GetVMID() int + GetNode() string + GetAllowedActions() []string + // New structured identity methods + GetProviderUID() string + GetKind() string + GetAliases() []string +} + +// ResourceRegistration contains all fields needed to register a discovered resource. +// This structured approach replaces the long parameter list for clarity. +type ResourceRegistration struct { + // Identity + Kind string // Resource type: "node", "vm", "lxc", "docker_container", etc. + ProviderUID string // Stable provider ID (container ID, VMID, pod UID) + Name string // Primary display name + Aliases []string // Additional names that resolve to this resource + + // Scope + HostUID string + HostName string + ParentUID string + ParentKind string + ClusterUID string + Namespace string + + // Legacy fields (for backwards compatibility) + VMID int + Node string + LocationChain []string + + // Executor paths + Executors []ExecutorRegistration +} + +// ExecutorRegistration describes how an executor can reach a resource. +type ExecutorRegistration struct { + ExecutorID string + Adapter string + Actions []string + Priority int +} + +// ResolvedContextProvider provides session-scoped resource resolution. +// Query and discovery tools add resources; action tools validate against them. +// This interface is implemented by the chat package's ResolvedContext. +type ResolvedContextProvider interface { + // AddResolvedResource adds a resource that was found via query/discovery. + // Uses the new structured registration format. + AddResolvedResource(reg ResourceRegistration) + + // GetResolvedResourceByID retrieves a resource by its canonical ID (kind:provider_uid) + GetResolvedResourceByID(resourceID string) (ResolvedResourceInfo, bool) + + // GetResolvedResourceByAlias retrieves a resource by any of its aliases + GetResolvedResourceByAlias(alias string) (ResolvedResourceInfo, bool) + + // ValidateResourceForAction checks if a resource can perform an action + // Returns the resource if valid, error if not found or action not allowed + ValidateResourceForAction(resourceID, action string) (ResolvedResourceInfo, error) + + // HasAnyResources returns true if at least one resource has been discovered + HasAnyResources() bool + + // WasRecentlyAccessed checks if a resource was accessed within the given time window. + // Used for routing validation to distinguish "this turn" from "session-wide" context. + WasRecentlyAccessed(resourceID string, window time.Duration) bool + + // GetRecentlyAccessedResources returns resource IDs accessed within the given time window. + GetRecentlyAccessedResources(window time.Duration) []string + + // MarkExplicitAccess marks a resource as recently accessed, indicating user intent. + // Call this for single-resource operations (get, explicit select) but NOT for bulk + // operations (list, search) to avoid poisoning routing validation. + MarkExplicitAccess(resourceID string) +} + +// RecentAccessWindow is the time window used to determine "recently referenced" resources. +// Resources accessed within this window are considered to be from the current turn/exchange. +const RecentAccessWindow = 30 * time.Second + // ResourceDiscoveryInfo represents discovered information about a resource type ResourceDiscoveryInfo struct { - ID string `json:"id"` - ResourceType string `json:"resource_type"` - ResourceID string `json:"resource_id"` - HostID string `json:"host_id"` - Hostname string `json:"hostname"` - ServiceType string `json:"service_type"` - ServiceName string `json:"service_name"` - ServiceVersion string `json:"service_version"` - Category string `json:"category"` - CLIAccess string `json:"cli_access"` - Facts []DiscoveryFact `json:"facts"` - ConfigPaths []string `json:"config_paths"` - DataPaths []string `json:"data_paths"` - UserNotes string `json:"user_notes,omitempty"` - Confidence float64 `json:"confidence"` - AIReasoning string `json:"ai_reasoning,omitempty"` - DiscoveredAt time.Time `json:"discovered_at"` - UpdatedAt time.Time `json:"updated_at"` + ID string `json:"id"` + ResourceType string `json:"resource_type"` + ResourceID string `json:"resource_id"` + HostID string `json:"host_id"` + Hostname string `json:"hostname"` + ServiceType string `json:"service_type"` + ServiceName string `json:"service_name"` + ServiceVersion string `json:"service_version"` + Category string `json:"category"` + CLIAccess string `json:"cli_access"` + Facts []DiscoveryFact `json:"facts"` + ConfigPaths []string `json:"config_paths"` + DataPaths []string `json:"data_paths"` + LogPaths []string `json:"log_paths,omitempty"` // Log file paths or commands (e.g., journalctl) + Ports []DiscoveryPortInfo `json:"ports"` + BindMounts []DiscoveryMount `json:"bind_mounts,omitempty"` // For Docker: host->container path mappings + UserNotes string `json:"user_notes,omitempty"` + Confidence float64 `json:"confidence"` + AIReasoning string `json:"ai_reasoning,omitempty"` + DiscoveredAt time.Time `json:"discovered_at"` + UpdatedAt time.Time `json:"updated_at"` +} + +// DiscoveryPortInfo represents a listening port discovered on a resource +type DiscoveryPortInfo struct { + Port int `json:"port"` + Protocol string `json:"protocol"` + Process string `json:"process,omitempty"` + Address string `json:"address,omitempty"` +} + +// DiscoveryMount represents a bind mount (host path -> container path) +type DiscoveryMount struct { + ContainerName string `json:"container_name,omitempty"` // Docker container name (for Docker inside LXC/VM) + Source string `json:"source"` // Host path (where to actually write files) + Destination string `json:"destination"` // Container path (what the service sees) + Type string `json:"type,omitempty"` // Mount type: bind, volume, tmpfs + ReadOnly bool `json:"read_only,omitempty"` } // DiscoveryFact represents a discovered fact about a resource type DiscoveryFact struct { - Category string `json:"category"` - Key string `json:"key"` - Value string `json:"value"` - Source string `json:"source,omitempty"` + Category string `json:"category"` + Key string `json:"key"` + Value string `json:"value"` + Source string `json:"source,omitempty"` + Confidence float64 `json:"confidence,omitempty"` // 0-1 confidence for this fact } // ControlLevel represents the AI's permission level for infrastructure control @@ -160,10 +282,12 @@ type ExecutorConfig struct { FindingsProvider FindingsProvider // Optional providers - infrastructure - BackupProvider BackupProvider - StorageProvider StorageProvider - DiskHealthProvider DiskHealthProvider - UpdatesProvider UpdatesProvider + BackupProvider BackupProvider + StorageProvider StorageProvider + StorageConfigProvider StorageConfigProvider + GuestConfigProvider GuestConfigProvider + DiskHealthProvider DiskHealthProvider + UpdatesProvider UpdatesProvider // Optional providers - management MetadataUpdater MetadataUpdater @@ -199,10 +323,12 @@ type PulseToolExecutor struct { findingsProvider FindingsProvider // Infrastructure context providers - backupProvider BackupProvider - storageProvider StorageProvider - diskHealthProvider DiskHealthProvider - updatesProvider UpdatesProvider + backupProvider BackupProvider + storageProvider StorageProvider + storageConfigProvider StorageConfigProvider + guestConfigProvider GuestConfigProvider + diskHealthProvider DiskHealthProvider + updatesProvider UpdatesProvider // Management providers metadataUpdater MetadataUpdater @@ -227,10 +353,34 @@ type PulseToolExecutor struct { targetID string isAutonomous bool + // Session-scoped resolved context for resource validation + // This is set per-session by the agentic loop before tool execution + resolvedContext ResolvedContextProvider + + // Telemetry callback for recording metrics + // This is optional - if nil, no telemetry is recorded + telemetryCallback TelemetryCallback + // Tool registry registry *ToolRegistry } +// TelemetryCallback is called when the executor needs to record telemetry. +// This allows the chat layer to handle metrics without import cycles. +type TelemetryCallback interface { + // RecordStrictResolutionBlock records when strict resolution blocks an action + RecordStrictResolutionBlock(tool, action string) + // RecordAutoRecoveryAttempt records an auto-recovery attempt + RecordAutoRecoveryAttempt(errorCode, tool string) + // RecordAutoRecoverySuccess records a successful auto-recovery + RecordAutoRecoverySuccess(errorCode, tool string) + // RecordRoutingMismatchBlock records when routing validation blocks an operation + // that targeted a parent host when a child resource was recently referenced. + // targetKind: "node" (the kind being targeted) + // childKind: "lxc", "vm", "docker_container" (the kind of the more specific resource) + RecordRoutingMismatchBlock(tool, targetKind, childKind string) +} + // NewPulseToolExecutor creates a new Pulse tool executor with the given configuration func NewPulseToolExecutor(cfg ExecutorConfig) *PulseToolExecutor { e := &PulseToolExecutor{ @@ -244,6 +394,8 @@ func NewPulseToolExecutor(cfg ExecutorConfig) *PulseToolExecutor { findingsProvider: cfg.FindingsProvider, backupProvider: cfg.BackupProvider, storageProvider: cfg.StorageProvider, + storageConfigProvider: cfg.StorageConfigProvider, + guestConfigProvider: cfg.GuestConfigProvider, diskHealthProvider: cfg.DiskHealthProvider, updatesProvider: cfg.UpdatesProvider, metadataUpdater: cfg.MetadataUpdater, @@ -329,6 +481,16 @@ func (e *PulseToolExecutor) SetStorageProvider(provider StorageProvider) { e.storageProvider = provider } +// SetStorageConfigProvider sets the storage config provider +func (e *PulseToolExecutor) SetStorageConfigProvider(provider StorageConfigProvider) { + e.storageConfigProvider = provider +} + +// SetGuestConfigProvider sets the guest config provider +func (e *PulseToolExecutor) SetGuestConfigProvider(provider GuestConfigProvider) { + e.guestConfigProvider = provider +} + // SetDiskHealthProvider sets the disk health provider func (e *PulseToolExecutor) SetDiskHealthProvider(provider DiskHealthProvider) { e.diskHealthProvider = provider @@ -364,11 +526,27 @@ func (e *PulseToolExecutor) SetKnowledgeStoreProvider(provider KnowledgeStorePro e.knowledgeStoreProvider = provider } -// SetDiscoveryProvider sets the discovery provider for AI-powered discovery +// SetDiscoveryProvider sets the discovery provider for infrastructure discovery func (e *PulseToolExecutor) SetDiscoveryProvider(provider DiscoveryProvider) { e.discoveryProvider = provider } +// SetResolvedContext sets the session-scoped resolved context for resource validation. +// This should be called by the agentic loop before executing tools for a session. +func (e *PulseToolExecutor) SetResolvedContext(ctx ResolvedContextProvider) { + e.resolvedContext = ctx +} + +// SetTelemetryCallback sets the telemetry callback for recording metrics +func (e *PulseToolExecutor) SetTelemetryCallback(cb TelemetryCallback) { + e.telemetryCallback = cb +} + +// GetResolvedContext returns the current resolved context (may be nil) +func (e *PulseToolExecutor) GetResolvedContext() ResolvedContextProvider { + return e.resolvedContext +} + // ListTools returns the list of available tools func (e *PulseToolExecutor) ListTools() []Tool { tools := e.registry.ListTools(e.controlLevel) @@ -387,50 +565,31 @@ func (e *PulseToolExecutor) ListTools() []Tool { func (e *PulseToolExecutor) isToolAvailable(name string) bool { switch name { - case "pulse_get_capabilities", "pulse_get_url_content", "pulse_get_agent_scope": - return true - case "pulse_run_command": + // Consolidated tools - check based on primary requirements + case "pulse_query": + return e.stateProvider != nil + case "pulse_metrics": + return e.stateProvider != nil || e.metricsHistory != nil || e.baselineProvider != nil || e.patternProvider != nil + case "pulse_storage": + return e.stateProvider != nil || e.storageProvider != nil || e.backupProvider != nil || e.storageConfigProvider != nil || e.diskHealthProvider != nil + case "pulse_docker": + return e.stateProvider != nil || e.updatesProvider != nil + case "pulse_kubernetes": + return e.stateProvider != nil + case "pulse_alerts": + return e.alertProvider != nil || e.findingsProvider != nil || e.findingsManager != nil || e.stateProvider != nil + case "pulse_read": return e.agentServer != nil - case "pulse_control_guest", "pulse_control_docker": + case "pulse_control": return e.agentServer != nil && e.stateProvider != nil - case "pulse_set_agent_scope": - return e.agentProfileManager != nil - case "pulse_set_resource_url": - return e.metadataUpdater != nil - case "pulse_get_metrics": - return e.metricsHistory != nil - case "pulse_get_baselines": - return e.baselineProvider != nil - case "pulse_get_patterns": - return e.patternProvider != nil - case "pulse_list_alerts": - return e.alertProvider != nil - case "pulse_list_findings": - return e.findingsProvider != nil - case "pulse_resolve_finding", "pulse_dismiss_finding": - return e.findingsManager != nil - case "pulse_list_backups": - return e.backupProvider != nil - case "pulse_list_storage": - return e.storageProvider != nil - case "pulse_get_disk_health": - return e.diskHealthProvider != nil || e.storageProvider != nil - case "pulse_get_host_raid_status", "pulse_get_host_ceph_details": - return e.diskHealthProvider != nil - case "pulse_list_docker_updates", "pulse_check_docker_updates": - return e.updatesProvider != nil - case "pulse_update_docker_container": - return e.updatesProvider != nil && e.stateProvider != nil - case "pulse_get_incident_window": - return e.incidentRecorderProvider != nil - case "pulse_correlate_events": - return e.eventCorrelatorProvider != nil - case "pulse_get_relationship_graph": - return e.topologyProvider != nil - case "pulse_remember", "pulse_recall": - return e.knowledgeStoreProvider != nil - case "pulse_get_discovery", "pulse_list_discoveries": + case "pulse_file_edit": + return e.agentServer != nil + case "pulse_discovery": return e.discoveryProvider != nil + case "pulse_knowledge": + return e.knowledgeStoreProvider != nil || e.incidentRecorderProvider != nil || e.eventCorrelatorProvider != nil || e.topologyProvider != nil + case "pulse_pmg": + return e.stateProvider != nil default: return e.stateProvider != nil } @@ -448,30 +607,44 @@ func (e *PulseToolExecutor) ExecuteTool(ctx context.Context, name string, args m // registerTools registers all available tools func (e *PulseToolExecutor) registerTools() { - // Query tools (always available) + // Consolidated tools (49 tools -> 10 tools) + // See plan at /Users/rcourtman/.claude/plans/atomic-wobbling-rose.md + + // pulse_query - search, get, config, topology, list, health e.registerQueryTools() - // Kubernetes tools (always available) + // pulse_metrics - performance, temperatures, network, diskio, disks, baselines, patterns + e.registerMetricsTools() + + // pulse_storage - pools, config, backups, snapshots, ceph, replication, pbs_jobs, raid, disk_health, resource_disks + e.registerStorageTools() + + // pulse_docker - control, updates, check_updates, update, services, tasks, swarm + e.registerDockerTools() + + // pulse_kubernetes - clusters, nodes, pods, deployments e.registerKubernetesTools() - // Patrol context tools (always available) - e.registerPatrolTools() + // pulse_alerts - list, findings, resolved, resolve, dismiss + e.registerAlertsTools() - // Infrastructure tools (always available) - e.registerInfrastructureTools() + // pulse_read - read-only operations (exec, file, find, tail, logs) + // This is ALWAYS classified as ToolKindRead and never triggers VERIFYING + e.registerReadTools() - // PMG (Mail Gateway) tools (always available) - e.registerPMGTools() + // pulse_control - guest control, run commands (requires control permission) + // NOTE: For read-only command execution, use pulse_read instead + e.registerControlToolsConsolidated() - // Profile tools - read operations always available - e.registerProfileTools() + // pulse_file_edit - read, append, write files (requires control permission) + e.registerFileTools() - // Intelligence tools (incident analysis, knowledge management) - e.registerIntelligenceTools() + // pulse_discovery - get, list discoveries + e.registerDiscoveryToolsConsolidated() - // Discovery tools (AI-powered infrastructure discovery) - e.registerDiscoveryTools() + // pulse_knowledge - remember, recall, incidents, correlate, relationships + e.registerKnowledgeTools() - // Control tools (conditional on control level) - e.registerControlTools() + // pulse_pmg - status, mail_stats, queues, spam + e.registerPMGToolsConsolidated() } diff --git a/internal/ai/tools/executor_setters_test.go b/internal/ai/tools/executor_setters_test.go index 7ff955782..469882d9e 100644 --- a/internal/ai/tools/executor_setters_test.go +++ b/internal/ai/tools/executor_setters_test.go @@ -28,6 +28,12 @@ func (s *stubAgentProfileManager) GetAgentScope(ctx context.Context, agentID str return &AgentScope{AgentID: agentID}, nil } +type stubStorageConfigProvider struct{} + +func (s *stubStorageConfigProvider) GetStorageConfig(instance string) ([]StorageConfigSummary, error) { + return nil, nil +} + func TestPulseToolExecutor_Setters(t *testing.T) { exec := NewPulseToolExecutor(ExecutorConfig{}) @@ -78,6 +84,10 @@ func TestPulseToolExecutor_Setters(t *testing.T) { exec.SetStorageProvider(storageProvider) assert.Equal(t, storageProvider, exec.storageProvider) + storageConfigProvider := &stubStorageConfigProvider{} + exec.SetStorageConfigProvider(storageConfigProvider) + assert.Equal(t, storageConfigProvider, exec.storageConfigProvider) + diskHealthProvider := &mockDiskHealthProvider{} exec.SetDiskHealthProvider(diskHealthProvider) assert.Equal(t, diskHealthProvider, exec.diskHealthProvider) @@ -94,23 +104,31 @@ func TestPulseToolExecutor_Setters(t *testing.T) { func TestPulseToolExecutor_ListTools(t *testing.T) { exec := NewPulseToolExecutor(ExecutorConfig{}) tools := exec.ListTools() - assert.True(t, containsTool(tools, "pulse_get_capabilities")) - assert.False(t, containsTool(tools, "pulse_get_topology")) + // pulse_query requires state provider, so it should not be available without one + assert.False(t, containsTool(tools, "pulse_query")) execWithState := NewPulseToolExecutor(ExecutorConfig{StateProvider: &mockStateProvider{}}) stateTools := execWithState.ListTools() - assert.True(t, containsTool(stateTools, "pulse_get_topology")) + // With state provider, pulse_query should be available + assert.True(t, containsTool(stateTools, "pulse_query")) } func TestPulseToolExecutor_IsToolAvailable(t *testing.T) { exec := NewPulseToolExecutor(ExecutorConfig{}) - assert.False(t, exec.isToolAvailable("pulse_get_metrics")) - assert.False(t, exec.isToolAvailable("pulse_set_agent_scope")) + // pulse_metrics requires metrics provider or state provider + assert.False(t, exec.isToolAvailable("pulse_metrics")) + // pulse_query requires state provider + assert.False(t, exec.isToolAvailable("pulse_query")) - exec.SetMetricsHistory(&mockMetricsHistoryProvider{}) - exec.SetAgentProfileManager(&stubAgentProfileManager{}) - assert.True(t, exec.isToolAvailable("pulse_get_metrics")) - assert.True(t, exec.isToolAvailable("pulse_set_agent_scope")) + // Create new executor with state provider and metrics history + execWithProviders := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{}, + MetricsHistory: &mockMetricsHistoryProvider{}, + }) + // Now pulse_metrics should be available with metrics history + assert.True(t, execWithProviders.isToolAvailable("pulse_metrics")) + // And pulse_query should be available with state provider + assert.True(t, execWithProviders.isToolAvailable("pulse_query")) } func TestToolRegistry_ListTools(t *testing.T) { diff --git a/internal/ai/tools/file_docker_test.go b/internal/ai/tools/file_docker_test.go new file mode 100644 index 000000000..e1cece46f --- /dev/null +++ b/internal/ai/tools/file_docker_test.go @@ -0,0 +1,427 @@ +package tools + +import ( + "context" + "encoding/base64" + "encoding/json" + "strings" + "testing" + + "github.com/rcourtman/pulse-go-rewrite/internal/agentexec" + "github.com/rcourtman/pulse-go-rewrite/internal/models" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +func TestExecuteFileEditDockerContainerValidation(t *testing.T) { + ctx := context.Background() + + t.Run("InvalidDockerContainerName", func(t *testing.T) { + exec := NewPulseToolExecutor(ExecutorConfig{StateProvider: &mockStateProvider{state: models.StateSnapshot{}}}) + result, err := exec.executeFileEdit(ctx, map[string]interface{}{ + "action": "read", + "path": "/config/test.json", + "target_host": "tower", + "docker_container": "my container", // space is invalid + }) + require.NoError(t, err) + assert.True(t, result.IsError) + assert.Contains(t, result.Content[0].Text, "invalid character") + }) + + t.Run("ValidDockerContainerName", func(t *testing.T) { + // This should pass validation but fail on agent lookup + exec := NewPulseToolExecutor(ExecutorConfig{StateProvider: &mockStateProvider{state: models.StateSnapshot{}}}) + result, err := exec.executeFileEdit(ctx, map[string]interface{}{ + "action": "read", + "path": "/config/test.json", + "target_host": "tower", + "docker_container": "my-container_v1.2", + }) + require.NoError(t, err) + // Should fail with "no agent" not "invalid character" + assert.NotContains(t, result.Content[0].Text, "invalid character") + }) +} + +func TestExecuteFileReadDocker(t *testing.T) { + ctx := context.Background() + + t.Run("ReadFromDockerContainer", func(t *testing.T) { + agents := []agentexec.ConnectedAgent{{AgentID: "agent-1", Hostname: "tower"}} + mockAgent := &mockAgentServer{} + mockAgent.On("GetConnectedAgents").Return(agents) + mockAgent.On("ExecuteCommand", mock.Anything, "agent-1", mock.MatchedBy(func(cmd agentexec.ExecuteCommandPayload) bool { + // Should wrap with docker exec + return strings.Contains(cmd.Command, "docker exec") && + strings.Contains(cmd.Command, "jellyfin") && + strings.Contains(cmd.Command, "cat") && + strings.Contains(cmd.Command, "/config/settings.json") + })).Return(&agentexec.CommandResultPayload{ + ExitCode: 0, + Stdout: `{"setting": "value"}`, + }, nil) + + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: models.StateSnapshot{}}, + AgentServer: mockAgent, + ControlLevel: ControlLevelAutonomous, + }) + result, err := exec.executeFileRead(ctx, "/config/settings.json", "tower", "jellyfin") + require.NoError(t, err) + + var resp map[string]interface{} + require.NoError(t, json.Unmarshal([]byte(result.Content[0].Text), &resp)) + assert.True(t, resp["success"].(bool)) + assert.Equal(t, "/config/settings.json", resp["path"]) + assert.Equal(t, "jellyfin", resp["docker_container"]) + assert.Equal(t, `{"setting": "value"}`, resp["content"]) + mockAgent.AssertExpectations(t) + }) + + t.Run("ReadFromHostWithoutDocker", func(t *testing.T) { + agents := []agentexec.ConnectedAgent{{AgentID: "agent-1", Hostname: "tower"}} + mockAgent := &mockAgentServer{} + mockAgent.On("GetConnectedAgents").Return(agents) + mockAgent.On("ExecuteCommand", mock.Anything, "agent-1", mock.MatchedBy(func(cmd agentexec.ExecuteCommandPayload) bool { + // Should NOT wrap with docker exec + return !strings.Contains(cmd.Command, "docker exec") && + strings.Contains(cmd.Command, "cat") && + strings.Contains(cmd.Command, "/etc/hostname") + })).Return(&agentexec.CommandResultPayload{ + ExitCode: 0, + Stdout: "tower", + }, nil) + + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: models.StateSnapshot{}}, + AgentServer: mockAgent, + ControlLevel: ControlLevelAutonomous, + }) + result, err := exec.executeFileRead(ctx, "/etc/hostname", "tower", "") // empty docker_container + require.NoError(t, err) + + var resp map[string]interface{} + require.NoError(t, json.Unmarshal([]byte(result.Content[0].Text), &resp)) + assert.True(t, resp["success"].(bool)) + assert.Nil(t, resp["docker_container"]) // should not be in response + mockAgent.AssertExpectations(t) + }) + + t.Run("DockerContainerNotFound", func(t *testing.T) { + agents := []agentexec.ConnectedAgent{{AgentID: "agent-1", Hostname: "tower"}} + mockAgent := &mockAgentServer{} + mockAgent.On("GetConnectedAgents").Return(agents) + mockAgent.On("ExecuteCommand", mock.Anything, "agent-1", mock.Anything).Return(&agentexec.CommandResultPayload{ + ExitCode: 1, + Stderr: "Error: No such container: nonexistent", + }, nil) + + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: models.StateSnapshot{}}, + AgentServer: mockAgent, + ControlLevel: ControlLevelAutonomous, + }) + result, err := exec.executeFileRead(ctx, "/config/test.json", "tower", "nonexistent") + require.NoError(t, err) + assert.Contains(t, result.Content[0].Text, "Failed to read file from container 'nonexistent'") + assert.Contains(t, result.Content[0].Text, "No such container") + mockAgent.AssertExpectations(t) + }) +} + +func TestExecuteFileWriteDocker(t *testing.T) { + ctx := context.Background() + + t.Run("WriteToDockerContainer", func(t *testing.T) { + content := `{"new": "config"}` + encodedContent := base64.StdEncoding.EncodeToString([]byte(content)) + + agents := []agentexec.ConnectedAgent{{AgentID: "agent-1", Hostname: "tower"}} + mockAgent := &mockAgentServer{} + mockAgent.On("GetConnectedAgents").Return(agents) + mockAgent.On("ExecuteCommand", mock.Anything, "agent-1", mock.MatchedBy(func(cmd agentexec.ExecuteCommandPayload) bool { + // Should wrap with docker exec and use base64 + return strings.Contains(cmd.Command, "docker exec") && + strings.Contains(cmd.Command, "nginx") && + strings.Contains(cmd.Command, "sh -c") && + strings.Contains(cmd.Command, encodedContent) && + strings.Contains(cmd.Command, "base64 -d") && + strings.Contains(cmd.Command, "/etc/nginx/nginx.conf") + })).Return(&agentexec.CommandResultPayload{ + ExitCode: 0, + Stdout: "", + }, nil) + + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: models.StateSnapshot{}}, + AgentServer: mockAgent, + ControlLevel: ControlLevelAutonomous, + }) + result, err := exec.executeFileWrite(ctx, "/etc/nginx/nginx.conf", content, "tower", "nginx", map[string]interface{}{}) + require.NoError(t, err) + + var resp map[string]interface{} + require.NoError(t, json.Unmarshal([]byte(result.Content[0].Text), &resp)) + assert.True(t, resp["success"].(bool)) + assert.Equal(t, "write", resp["action"]) + assert.Equal(t, "nginx", resp["docker_container"]) + assert.Equal(t, float64(len(content)), resp["bytes_written"]) + mockAgent.AssertExpectations(t) + }) + + t.Run("WriteControlledRequiresApproval", func(t *testing.T) { + agents := []agentexec.ConnectedAgent{{AgentID: "agent-1", Hostname: "tower"}} + mockAgent := &mockAgentServer{} + mockAgent.On("GetConnectedAgents").Return(agents) + + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: models.StateSnapshot{}}, + AgentServer: mockAgent, + ControlLevel: ControlLevelControlled, + }) + result, err := exec.executeFileWrite(ctx, "/config/test.json", "test", "tower", "mycontainer", map[string]interface{}{}) + require.NoError(t, err) + assert.Contains(t, result.Content[0].Text, "APPROVAL_REQUIRED") + assert.Contains(t, result.Content[0].Text, "container: mycontainer") + }) +} + +func TestExecuteFileAppendDocker(t *testing.T) { + ctx := context.Background() + + t.Run("AppendToDockerContainer", func(t *testing.T) { + content := "\nnew line" + encodedContent := base64.StdEncoding.EncodeToString([]byte(content)) + + agents := []agentexec.ConnectedAgent{{AgentID: "agent-1", Hostname: "tower"}} + mockAgent := &mockAgentServer{} + mockAgent.On("GetConnectedAgents").Return(agents) + mockAgent.On("ExecuteCommand", mock.Anything, "agent-1", mock.MatchedBy(func(cmd agentexec.ExecuteCommandPayload) bool { + // Should use >> for append + return strings.Contains(cmd.Command, "docker exec") && + strings.Contains(cmd.Command, "logcontainer") && + strings.Contains(cmd.Command, encodedContent) && + strings.Contains(cmd.Command, ">>") && + strings.Contains(cmd.Command, "/var/log/app.log") + })).Return(&agentexec.CommandResultPayload{ + ExitCode: 0, + Stdout: "", + }, nil) + + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: models.StateSnapshot{}}, + AgentServer: mockAgent, + ControlLevel: ControlLevelAutonomous, + }) + result, err := exec.executeFileAppend(ctx, "/var/log/app.log", content, "tower", "logcontainer", map[string]interface{}{}) + require.NoError(t, err) + + var resp map[string]interface{} + require.NoError(t, json.Unmarshal([]byte(result.Content[0].Text), &resp)) + assert.True(t, resp["success"].(bool)) + assert.Equal(t, "append", resp["action"]) + assert.Equal(t, "logcontainer", resp["docker_container"]) + mockAgent.AssertExpectations(t) + }) +} + +func TestExecuteFileWriteLXCVMTargets(t *testing.T) { + ctx := context.Background() + + t.Run("WriteToLXCRoutedCorrectly", func(t *testing.T) { + // Test that file writes to LXC are routed with correct target type/ID + // Agent handles sh -c wrapping, so tool sends raw pipeline command + content := "test content" + encodedContent := base64.StdEncoding.EncodeToString([]byte(content)) + + agents := []agentexec.ConnectedAgent{{AgentID: "proxmox-agent", Hostname: "delly"}} + mockAgent := &mockAgentServer{} + mockAgent.On("GetConnectedAgents").Return(agents) + mockAgent.On("ExecuteCommand", mock.Anything, "proxmox-agent", mock.MatchedBy(func(cmd agentexec.ExecuteCommandPayload) bool { + // Tool sends raw pipeline, agent wraps in sh -c for LXC + return cmd.TargetType == "container" && + cmd.TargetID == "141" && + strings.Contains(cmd.Command, encodedContent) && + strings.Contains(cmd.Command, "| base64 -d >") && + !strings.Contains(cmd.Command, "docker exec") + })).Return(&agentexec.CommandResultPayload{ + ExitCode: 0, + Stdout: "", + }, nil) + + state := models.StateSnapshot{ + Containers: []models.Container{ + {VMID: 141, Name: "homepage-docker", Node: "delly"}, + }, + } + + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: state}, + AgentServer: mockAgent, + ControlLevel: ControlLevelAutonomous, + }) + result, err := exec.executeFileWrite(ctx, "/opt/test/config.yaml", content, "homepage-docker", "", map[string]interface{}{}) + require.NoError(t, err) + + var resp map[string]interface{} + require.NoError(t, json.Unmarshal([]byte(result.Content[0].Text), &resp)) + assert.True(t, resp["success"].(bool)) + assert.Equal(t, "write", resp["action"]) + assert.Nil(t, resp["docker_container"]) // No Docker container + mockAgent.AssertExpectations(t) + }) + + t.Run("WriteToVMRoutedCorrectly", func(t *testing.T) { + // Test that file writes to VMs are routed with correct target type/ID + content := "vm config" + encodedContent := base64.StdEncoding.EncodeToString([]byte(content)) + + agents := []agentexec.ConnectedAgent{{AgentID: "proxmox-agent", Hostname: "delly"}} + mockAgent := &mockAgentServer{} + mockAgent.On("GetConnectedAgents").Return(agents) + mockAgent.On("ExecuteCommand", mock.Anything, "proxmox-agent", mock.MatchedBy(func(cmd agentexec.ExecuteCommandPayload) bool { + return cmd.TargetType == "vm" && + cmd.TargetID == "100" && + strings.Contains(cmd.Command, encodedContent) + })).Return(&agentexec.CommandResultPayload{ + ExitCode: 0, + Stdout: "", + }, nil) + + state := models.StateSnapshot{ + VMs: []models.VM{ + {VMID: 100, Name: "test-vm", Node: "delly"}, + }, + } + + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: state}, + AgentServer: mockAgent, + ControlLevel: ControlLevelAutonomous, + }) + result, err := exec.executeFileWrite(ctx, "/etc/test.conf", content, "test-vm", "", map[string]interface{}{}) + require.NoError(t, err) + + var resp map[string]interface{} + require.NoError(t, json.Unmarshal([]byte(result.Content[0].Text), &resp)) + assert.True(t, resp["success"].(bool)) + mockAgent.AssertExpectations(t) + }) + + t.Run("WriteToDirectHost", func(t *testing.T) { + // Direct host writes use raw pipeline command + content := "host config" + encodedContent := base64.StdEncoding.EncodeToString([]byte(content)) + + agents := []agentexec.ConnectedAgent{{AgentID: "host-agent", Hostname: "tower"}} + mockAgent := &mockAgentServer{} + mockAgent.On("GetConnectedAgents").Return(agents) + mockAgent.On("ExecuteCommand", mock.Anything, "host-agent", mock.MatchedBy(func(cmd agentexec.ExecuteCommandPayload) bool { + return cmd.TargetType == "host" && + strings.Contains(cmd.Command, encodedContent) && + strings.Contains(cmd.Command, "| base64 -d >") + })).Return(&agentexec.CommandResultPayload{ + ExitCode: 0, + Stdout: "", + }, nil) + + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: models.StateSnapshot{}}, + AgentServer: mockAgent, + ControlLevel: ControlLevelAutonomous, + }) + result, err := exec.executeFileWrite(ctx, "/tmp/test.txt", content, "tower", "", map[string]interface{}{}) + require.NoError(t, err) + + var resp map[string]interface{} + require.NoError(t, json.Unmarshal([]byte(result.Content[0].Text), &resp)) + assert.True(t, resp["success"].(bool)) + mockAgent.AssertExpectations(t) + }) + + t.Run("AppendToLXCRoutedCorrectly", func(t *testing.T) { + // Append operations to LXC are routed with correct target type/ID + content := "\nnew line" + encodedContent := base64.StdEncoding.EncodeToString([]byte(content)) + + agents := []agentexec.ConnectedAgent{{AgentID: "proxmox-agent", Hostname: "delly"}} + mockAgent := &mockAgentServer{} + mockAgent.On("GetConnectedAgents").Return(agents) + mockAgent.On("ExecuteCommand", mock.Anything, "proxmox-agent", mock.MatchedBy(func(cmd agentexec.ExecuteCommandPayload) bool { + return cmd.TargetType == "container" && + cmd.TargetID == "141" && + strings.Contains(cmd.Command, encodedContent) && + strings.Contains(cmd.Command, ">>") // append uses >> + })).Return(&agentexec.CommandResultPayload{ + ExitCode: 0, + Stdout: "", + }, nil) + + state := models.StateSnapshot{ + Containers: []models.Container{ + {VMID: 141, Name: "homepage-docker", Node: "delly"}, + }, + } + + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: state}, + AgentServer: mockAgent, + ControlLevel: ControlLevelAutonomous, + }) + result, err := exec.executeFileAppend(ctx, "/var/log/app.log", content, "homepage-docker", "", map[string]interface{}{}) + require.NoError(t, err) + + var resp map[string]interface{} + require.NoError(t, json.Unmarshal([]byte(result.Content[0].Text), &resp)) + assert.True(t, resp["success"].(bool)) + assert.Equal(t, "append", resp["action"]) + mockAgent.AssertExpectations(t) + }) +} + +func TestExecuteFileEditDockerNestedRouting(t *testing.T) { + ctx := context.Background() + + t.Run("DockerInsideLXC", func(t *testing.T) { + // Test case: Docker running inside an LXC container + // target_host="homepage-docker" (LXC), docker_container="nginx" + // Command should route through Proxmox node agent with LXC target type + + agents := []agentexec.ConnectedAgent{{AgentID: "proxmox-agent", Hostname: "pve-node"}} + mockAgent := &mockAgentServer{} + mockAgent.On("GetConnectedAgents").Return(agents) + mockAgent.On("ExecuteCommand", mock.Anything, "proxmox-agent", mock.MatchedBy(func(cmd agentexec.ExecuteCommandPayload) bool { + // Should have container target type for LXC routing + // and command should include docker exec + return cmd.TargetType == "container" && + cmd.TargetID == "141" && + strings.Contains(cmd.Command, "docker exec") && + strings.Contains(cmd.Command, "nginx") + })).Return(&agentexec.CommandResultPayload{ + ExitCode: 0, + Stdout: "file content", + }, nil) + + state := models.StateSnapshot{ + Containers: []models.Container{ + {VMID: 141, Name: "homepage-docker", Node: "pve-node"}, + }, + } + + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: state}, + AgentServer: mockAgent, + ControlLevel: ControlLevelAutonomous, + }) + result, err := exec.executeFileRead(ctx, "/config/test.json", "homepage-docker", "nginx") + require.NoError(t, err) + + var resp map[string]interface{} + require.NoError(t, json.Unmarshal([]byte(result.Content[0].Text), &resp)) + assert.True(t, resp["success"].(bool)) + assert.Equal(t, "nginx", resp["docker_container"]) + mockAgent.AssertExpectations(t) + }) +} diff --git a/internal/ai/tools/infrastructure_test.go b/internal/ai/tools/infrastructure_test.go index b77953e6d..f2fd4c922 100644 --- a/internal/ai/tools/infrastructure_test.go +++ b/internal/ai/tools/infrastructure_test.go @@ -20,7 +20,10 @@ func TestExecuteGetDiskHealth(t *testing.T) { } diskHealthProv.On("GetHosts").Return(expectedHosts) - result, err := exec.ExecuteTool(context.Background(), "pulse_get_disk_health", map[string]interface{}{}) + // Use consolidated pulse_storage tool with type: "disk_health" + result, err := exec.ExecuteTool(context.Background(), "pulse_storage", map[string]interface{}{ + "type": "disk_health", + }) assert.NoError(t, err) assert.False(t, result.IsError) } @@ -41,7 +44,10 @@ func TestExecuteGetTemperatures(t *testing.T) { } stateProv.On("GetState").Return(state) - result, err := exec.ExecuteTool(context.Background(), "pulse_get_temperatures", map[string]interface{}{}) + // Use consolidated pulse_metrics tool with type: "temperatures" + result, err := exec.ExecuteTool(context.Background(), "pulse_metrics", map[string]interface{}{ + "type": "temperatures", + }) assert.NoError(t, err) assert.False(t, result.IsError) } diff --git a/internal/ai/tools/kubernetes_control_test.go b/internal/ai/tools/kubernetes_control_test.go new file mode 100644 index 000000000..e1318d282 --- /dev/null +++ b/internal/ai/tools/kubernetes_control_test.go @@ -0,0 +1,523 @@ +package tools + +import ( + "context" + "testing" + + "github.com/rcourtman/pulse-go-rewrite/internal/agentexec" + "github.com/rcourtman/pulse-go-rewrite/internal/models" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +func TestValidateKubernetesResourceID(t *testing.T) { + tests := []struct { + name string + value string + wantErr bool + }{ + {"valid simple", "nginx", false}, + {"valid with dash", "my-app", false}, + {"valid with dot", "my.app", false}, + {"valid with numbers", "app123", false}, + {"valid complex", "my-app-v1.2.3", false}, + {"empty", "", true}, + {"uppercase", "MyApp", true}, + {"underscore", "my_app", true}, + {"space", "my app", true}, + {"special char", "my@app", true}, + {"too long", string(make([]byte, 254)), true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := validateKubernetesResourceID(tt.value) + if tt.wantErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + }) + } +} + +func TestFindAgentForKubernetesCluster(t *testing.T) { + t.Run("NoStateProvider", func(t *testing.T) { + exec := NewPulseToolExecutor(ExecutorConfig{}) + agentID, cluster, err := exec.findAgentForKubernetesCluster("test") + assert.Error(t, err) + assert.Empty(t, agentID) + assert.Nil(t, cluster) + assert.Contains(t, err.Error(), "state provider not available") + }) + + t.Run("ClusterNotFound", func(t *testing.T) { + state := models.StateSnapshot{ + KubernetesClusters: []models.KubernetesCluster{ + {ID: "c1", Name: "cluster-1"}, + }, + } + exec := NewPulseToolExecutor(ExecutorConfig{StateProvider: &mockStateProvider{state: state}}) + agentID, cluster, err := exec.findAgentForKubernetesCluster("nonexistent") + assert.Error(t, err) + assert.Empty(t, agentID) + assert.Nil(t, cluster) + assert.Contains(t, err.Error(), "not found") + }) + + t.Run("ClusterNoAgent", func(t *testing.T) { + state := models.StateSnapshot{ + KubernetesClusters: []models.KubernetesCluster{ + {ID: "c1", Name: "cluster-1", AgentID: ""}, + }, + } + exec := NewPulseToolExecutor(ExecutorConfig{StateProvider: &mockStateProvider{state: state}}) + agentID, cluster, err := exec.findAgentForKubernetesCluster("cluster-1") + assert.Error(t, err) + assert.Empty(t, agentID) + assert.Nil(t, cluster) + assert.Contains(t, err.Error(), "no agent configured") + }) + + t.Run("FoundByID", func(t *testing.T) { + state := models.StateSnapshot{ + KubernetesClusters: []models.KubernetesCluster{ + {ID: "c1", Name: "cluster-1", AgentID: "agent-1"}, + }, + } + exec := NewPulseToolExecutor(ExecutorConfig{StateProvider: &mockStateProvider{state: state}}) + agentID, cluster, err := exec.findAgentForKubernetesCluster("c1") + assert.NoError(t, err) + assert.Equal(t, "agent-1", agentID) + assert.NotNil(t, cluster) + assert.Equal(t, "cluster-1", cluster.Name) + }) + + t.Run("FoundByDisplayName", func(t *testing.T) { + state := models.StateSnapshot{ + KubernetesClusters: []models.KubernetesCluster{ + {ID: "c1", Name: "cluster-1", DisplayName: "Production", AgentID: "agent-1"}, + }, + } + exec := NewPulseToolExecutor(ExecutorConfig{StateProvider: &mockStateProvider{state: state}}) + agentID, _, err := exec.findAgentForKubernetesCluster("Production") + assert.NoError(t, err) + assert.Equal(t, "agent-1", agentID) + }) + + t.Run("FoundByCustomDisplayName", func(t *testing.T) { + state := models.StateSnapshot{ + KubernetesClusters: []models.KubernetesCluster{ + {ID: "c1", Name: "cluster-1", CustomDisplayName: "My Cluster", AgentID: "agent-1"}, + }, + } + exec := NewPulseToolExecutor(ExecutorConfig{StateProvider: &mockStateProvider{state: state}}) + agentID, _, err := exec.findAgentForKubernetesCluster("My Cluster") + assert.NoError(t, err) + assert.Equal(t, "agent-1", agentID) + }) +} + +func TestExecuteKubernetesScale(t *testing.T) { + ctx := context.Background() + + t.Run("MissingCluster", func(t *testing.T) { + exec := NewPulseToolExecutor(ExecutorConfig{StateProvider: &mockStateProvider{state: models.StateSnapshot{}}}) + result, err := exec.executeKubernetesScale(ctx, map[string]interface{}{}) + require.NoError(t, err) + assert.True(t, result.IsError) + assert.Contains(t, result.Content[0].Text, "cluster is required") + }) + + t.Run("MissingDeployment", func(t *testing.T) { + exec := NewPulseToolExecutor(ExecutorConfig{StateProvider: &mockStateProvider{state: models.StateSnapshot{}}}) + result, err := exec.executeKubernetesScale(ctx, map[string]interface{}{ + "cluster": "test", + }) + require.NoError(t, err) + assert.True(t, result.IsError) + assert.Contains(t, result.Content[0].Text, "deployment is required") + }) + + t.Run("MissingReplicas", func(t *testing.T) { + exec := NewPulseToolExecutor(ExecutorConfig{StateProvider: &mockStateProvider{state: models.StateSnapshot{}}}) + result, err := exec.executeKubernetesScale(ctx, map[string]interface{}{ + "cluster": "test", + "deployment": "nginx", + }) + require.NoError(t, err) + assert.True(t, result.IsError) + assert.Contains(t, result.Content[0].Text, "replicas is required") + }) + + t.Run("InvalidNamespace", func(t *testing.T) { + exec := NewPulseToolExecutor(ExecutorConfig{StateProvider: &mockStateProvider{state: models.StateSnapshot{}}}) + result, err := exec.executeKubernetesScale(ctx, map[string]interface{}{ + "cluster": "test", + "deployment": "nginx", + "replicas": 3, + "namespace": "Invalid_NS", + }) + require.NoError(t, err) + assert.True(t, result.IsError) + assert.Contains(t, result.Content[0].Text, "invalid namespace") + }) + + t.Run("ReadOnlyMode", func(t *testing.T) { + state := models.StateSnapshot{ + KubernetesClusters: []models.KubernetesCluster{ + {ID: "c1", Name: "cluster-1", AgentID: "agent-1"}, + }, + } + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: state}, + ControlLevel: ControlLevelReadOnly, + }) + result, err := exec.executeKubernetesScale(ctx, map[string]interface{}{ + "cluster": "cluster-1", + "deployment": "nginx", + "replicas": 3, + }) + require.NoError(t, err) + assert.Contains(t, result.Content[0].Text, "not available in read-only mode") + }) + + t.Run("ControlledRequiresApproval", func(t *testing.T) { + state := models.StateSnapshot{ + KubernetesClusters: []models.KubernetesCluster{ + {ID: "c1", Name: "cluster-1", AgentID: "agent-1", DisplayName: "Cluster One"}, + }, + } + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: state}, + ControlLevel: ControlLevelControlled, + }) + result, err := exec.executeKubernetesScale(ctx, map[string]interface{}{ + "cluster": "cluster-1", + "deployment": "nginx", + "replicas": 3, + }) + require.NoError(t, err) + assert.Contains(t, result.Content[0].Text, "APPROVAL_REQUIRED") + assert.Contains(t, result.Content[0].Text, "scale") + }) + + t.Run("ExecuteSuccess", func(t *testing.T) { + mockAgent := &mockAgentServer{ + agents: []agentexec.ConnectedAgent{{AgentID: "agent-1", Hostname: "k8s-host"}}, + } + mockAgent.On("ExecuteCommand", mock.Anything, "agent-1", mock.MatchedBy(func(cmd agentexec.ExecuteCommandPayload) bool { + return cmd.Command == "kubectl -n default scale deployment nginx --replicas=3" && + cmd.TargetType == "host" + })).Return(&agentexec.CommandResultPayload{ + ExitCode: 0, + Stdout: "deployment.apps/nginx scaled", + }, nil) + + state := models.StateSnapshot{ + KubernetesClusters: []models.KubernetesCluster{ + {ID: "c1", Name: "cluster-1", AgentID: "agent-1"}, + }, + } + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: state}, + AgentServer: mockAgent, + ControlLevel: ControlLevelAutonomous, + }) + result, err := exec.executeKubernetesScale(ctx, map[string]interface{}{ + "cluster": "cluster-1", + "deployment": "nginx", + "replicas": 3, + }) + require.NoError(t, err) + assert.Contains(t, result.Content[0].Text, "Successfully scaled") + assert.Contains(t, result.Content[0].Text, "nginx") + mockAgent.AssertExpectations(t) + }) +} + +func TestExecuteKubernetesRestart(t *testing.T) { + ctx := context.Background() + + t.Run("MissingDeployment", func(t *testing.T) { + exec := NewPulseToolExecutor(ExecutorConfig{StateProvider: &mockStateProvider{state: models.StateSnapshot{}}}) + result, err := exec.executeKubernetesRestart(ctx, map[string]interface{}{ + "cluster": "test", + }) + require.NoError(t, err) + assert.True(t, result.IsError) + assert.Contains(t, result.Content[0].Text, "deployment is required") + }) + + t.Run("ExecuteSuccess", func(t *testing.T) { + mockAgent := &mockAgentServer{ + agents: []agentexec.ConnectedAgent{{AgentID: "agent-1", Hostname: "k8s-host"}}, + } + mockAgent.On("ExecuteCommand", mock.Anything, "agent-1", mock.MatchedBy(func(cmd agentexec.ExecuteCommandPayload) bool { + return cmd.Command == "kubectl -n default rollout restart deployment/nginx" + })).Return(&agentexec.CommandResultPayload{ + ExitCode: 0, + Stdout: "deployment.apps/nginx restarted", + }, nil) + + state := models.StateSnapshot{ + KubernetesClusters: []models.KubernetesCluster{ + {ID: "c1", Name: "cluster-1", AgentID: "agent-1"}, + }, + } + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: state}, + AgentServer: mockAgent, + ControlLevel: ControlLevelAutonomous, + }) + result, err := exec.executeKubernetesRestart(ctx, map[string]interface{}{ + "cluster": "cluster-1", + "deployment": "nginx", + }) + require.NoError(t, err) + assert.Contains(t, result.Content[0].Text, "Successfully initiated rollout restart") + mockAgent.AssertExpectations(t) + }) +} + +func TestExecuteKubernetesDeletePod(t *testing.T) { + ctx := context.Background() + + t.Run("MissingPod", func(t *testing.T) { + exec := NewPulseToolExecutor(ExecutorConfig{StateProvider: &mockStateProvider{state: models.StateSnapshot{}}}) + result, err := exec.executeKubernetesDeletePod(ctx, map[string]interface{}{ + "cluster": "test", + }) + require.NoError(t, err) + assert.True(t, result.IsError) + assert.Contains(t, result.Content[0].Text, "pod is required") + }) + + t.Run("ExecuteSuccess", func(t *testing.T) { + mockAgent := &mockAgentServer{ + agents: []agentexec.ConnectedAgent{{AgentID: "agent-1", Hostname: "k8s-host"}}, + } + mockAgent.On("ExecuteCommand", mock.Anything, "agent-1", mock.MatchedBy(func(cmd agentexec.ExecuteCommandPayload) bool { + return cmd.Command == "kubectl -n default delete pod nginx-abc123" + })).Return(&agentexec.CommandResultPayload{ + ExitCode: 0, + Stdout: "pod \"nginx-abc123\" deleted", + }, nil) + + state := models.StateSnapshot{ + KubernetesClusters: []models.KubernetesCluster{ + {ID: "c1", Name: "cluster-1", AgentID: "agent-1"}, + }, + } + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: state}, + AgentServer: mockAgent, + ControlLevel: ControlLevelAutonomous, + }) + result, err := exec.executeKubernetesDeletePod(ctx, map[string]interface{}{ + "cluster": "cluster-1", + "pod": "nginx-abc123", + }) + require.NoError(t, err) + assert.Contains(t, result.Content[0].Text, "Successfully deleted pod") + mockAgent.AssertExpectations(t) + }) +} + +func TestExecuteKubernetesExec(t *testing.T) { + ctx := context.Background() + + t.Run("MissingCommand", func(t *testing.T) { + exec := NewPulseToolExecutor(ExecutorConfig{StateProvider: &mockStateProvider{state: models.StateSnapshot{}}}) + result, err := exec.executeKubernetesExec(ctx, map[string]interface{}{ + "cluster": "test", + "pod": "nginx", + }) + require.NoError(t, err) + assert.True(t, result.IsError) + assert.Contains(t, result.Content[0].Text, "command is required") + }) + + t.Run("ExecuteWithoutContainer", func(t *testing.T) { + mockAgent := &mockAgentServer{ + agents: []agentexec.ConnectedAgent{{AgentID: "agent-1", Hostname: "k8s-host"}}, + } + mockAgent.On("ExecuteCommand", mock.Anything, "agent-1", mock.MatchedBy(func(cmd agentexec.ExecuteCommandPayload) bool { + return cmd.Command == "kubectl -n default exec nginx-pod -- cat /etc/nginx/nginx.conf" + })).Return(&agentexec.CommandResultPayload{ + ExitCode: 0, + Stdout: "server { listen 80; }", + }, nil) + + state := models.StateSnapshot{ + KubernetesClusters: []models.KubernetesCluster{ + {ID: "c1", Name: "cluster-1", AgentID: "agent-1"}, + }, + } + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: state}, + AgentServer: mockAgent, + ControlLevel: ControlLevelAutonomous, + }) + result, err := exec.executeKubernetesExec(ctx, map[string]interface{}{ + "cluster": "cluster-1", + "pod": "nginx-pod", + "command": "cat /etc/nginx/nginx.conf", + }) + require.NoError(t, err) + assert.Contains(t, result.Content[0].Text, "Command executed") + assert.Contains(t, result.Content[0].Text, "server { listen 80; }") + mockAgent.AssertExpectations(t) + }) + + t.Run("ExecuteWithContainer", func(t *testing.T) { + mockAgent := &mockAgentServer{ + agents: []agentexec.ConnectedAgent{{AgentID: "agent-1", Hostname: "k8s-host"}}, + } + mockAgent.On("ExecuteCommand", mock.Anything, "agent-1", mock.MatchedBy(func(cmd agentexec.ExecuteCommandPayload) bool { + return cmd.Command == "kubectl -n kube-system exec coredns-pod -c coredns -- cat /etc/coredns/Corefile" + })).Return(&agentexec.CommandResultPayload{ + ExitCode: 0, + Stdout: ".:53 { forward . /etc/resolv.conf }", + }, nil) + + state := models.StateSnapshot{ + KubernetesClusters: []models.KubernetesCluster{ + {ID: "c1", Name: "cluster-1", AgentID: "agent-1"}, + }, + } + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: state}, + AgentServer: mockAgent, + ControlLevel: ControlLevelAutonomous, + }) + result, err := exec.executeKubernetesExec(ctx, map[string]interface{}{ + "cluster": "cluster-1", + "namespace": "kube-system", + "pod": "coredns-pod", + "container": "coredns", + "command": "cat /etc/coredns/Corefile", + }) + require.NoError(t, err) + assert.Contains(t, result.Content[0].Text, "Command executed") + mockAgent.AssertExpectations(t) + }) +} + +func TestExecuteKubernetesLogs(t *testing.T) { + ctx := context.Background() + + t.Run("MissingPod", func(t *testing.T) { + exec := NewPulseToolExecutor(ExecutorConfig{StateProvider: &mockStateProvider{state: models.StateSnapshot{}}}) + result, err := exec.executeKubernetesLogs(ctx, map[string]interface{}{ + "cluster": "test", + }) + require.NoError(t, err) + assert.True(t, result.IsError) + assert.Contains(t, result.Content[0].Text, "pod is required") + }) + + t.Run("LogsNoApprovalNeeded", func(t *testing.T) { + // Logs should work even in controlled mode without approval + mockAgent := &mockAgentServer{ + agents: []agentexec.ConnectedAgent{{AgentID: "agent-1", Hostname: "k8s-host"}}, + } + mockAgent.On("ExecuteCommand", mock.Anything, "agent-1", mock.MatchedBy(func(cmd agentexec.ExecuteCommandPayload) bool { + return cmd.Command == "kubectl -n default logs nginx-pod --tail=50" + })).Return(&agentexec.CommandResultPayload{ + ExitCode: 0, + Stdout: "2024-01-01 10:00:00 Request received\n2024-01-01 10:00:01 Response sent", + }, nil) + + state := models.StateSnapshot{ + KubernetesClusters: []models.KubernetesCluster{ + {ID: "c1", Name: "cluster-1", AgentID: "agent-1"}, + }, + } + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: state}, + AgentServer: mockAgent, + ControlLevel: ControlLevelControlled, // Even in controlled mode + }) + result, err := exec.executeKubernetesLogs(ctx, map[string]interface{}{ + "cluster": "cluster-1", + "pod": "nginx-pod", + "lines": 50, + }) + require.NoError(t, err) + // Should NOT require approval since logs is read-only + assert.NotContains(t, result.Content[0].Text, "APPROVAL_REQUIRED") + assert.Contains(t, result.Content[0].Text, "Logs from pod") + mockAgent.AssertExpectations(t) + }) + + t.Run("LogsWithContainer", func(t *testing.T) { + mockAgent := &mockAgentServer{ + agents: []agentexec.ConnectedAgent{{AgentID: "agent-1", Hostname: "k8s-host"}}, + } + mockAgent.On("ExecuteCommand", mock.Anything, "agent-1", mock.MatchedBy(func(cmd agentexec.ExecuteCommandPayload) bool { + return cmd.Command == "kubectl -n default logs nginx-pod -c sidecar --tail=100" + })).Return(&agentexec.CommandResultPayload{ + ExitCode: 0, + Stdout: "Sidecar logs here", + }, nil) + + state := models.StateSnapshot{ + KubernetesClusters: []models.KubernetesCluster{ + {ID: "c1", Name: "cluster-1", AgentID: "agent-1"}, + }, + } + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: state}, + AgentServer: mockAgent, + ControlLevel: ControlLevelAutonomous, + }) + result, err := exec.executeKubernetesLogs(ctx, map[string]interface{}{ + "cluster": "cluster-1", + "pod": "nginx-pod", + "container": "sidecar", + }) + require.NoError(t, err) + assert.Contains(t, result.Content[0].Text, "Logs from pod") + mockAgent.AssertExpectations(t) + }) + + t.Run("EmptyLogs", func(t *testing.T) { + mockAgent := &mockAgentServer{ + agents: []agentexec.ConnectedAgent{{AgentID: "agent-1", Hostname: "k8s-host"}}, + } + mockAgent.On("ExecuteCommand", mock.Anything, "agent-1", mock.Anything).Return(&agentexec.CommandResultPayload{ + ExitCode: 0, + Stdout: "", + }, nil) + + state := models.StateSnapshot{ + KubernetesClusters: []models.KubernetesCluster{ + {ID: "c1", Name: "cluster-1", AgentID: "agent-1"}, + }, + } + exec := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: state}, + AgentServer: mockAgent, + ControlLevel: ControlLevelAutonomous, + }) + result, err := exec.executeKubernetesLogs(ctx, map[string]interface{}{ + "cluster": "cluster-1", + "pod": "nginx-pod", + }) + require.NoError(t, err) + assert.Contains(t, result.Content[0].Text, "No logs found") + mockAgent.AssertExpectations(t) + }) +} + +func TestFormatKubernetesApprovalNeeded(t *testing.T) { + result := formatKubernetesApprovalNeeded("scale", "nginx", "default", "production", "kubectl scale...", "approval-123") + assert.Contains(t, result, "APPROVAL_REQUIRED") + assert.Contains(t, result, "scale") + assert.Contains(t, result, "nginx") + assert.Contains(t, result, "default") + assert.Contains(t, result, "production") + assert.Contains(t, result, "approval-123") +} diff --git a/internal/ai/tools/protocol.go b/internal/ai/tools/protocol.go index ec5d072b4..20049cc32 100644 --- a/internal/ai/tools/protocol.go +++ b/internal/ai/tools/protocol.go @@ -206,6 +206,82 @@ type PromptMessage struct { Content Content `json:"content"` } +// ToolResponse is a consistent envelope for tool results. +// All tool results should use this structure for predictable parsing. +type ToolResponse struct { + OK bool `json:"ok"` // true if tool succeeded + Data interface{} `json:"data,omitempty"` // result data if ok=true + Error *ToolError `json:"error,omitempty"` // error details if ok=false + Meta map[string]interface{} `json:"meta,omitempty"` // optional metadata +} + +// ToolError provides consistent error structure for tool failures. +// Use Blocked=true for policy/validation blocks, Failed=true for runtime errors. +type ToolError struct { + Code string `json:"code"` // Error code (e.g., "STRICT_RESOLUTION", "NOT_FOUND") + Message string `json:"message"` // Human-readable message + Blocked bool `json:"blocked,omitempty"` // True if blocked by policy/validation + Failed bool `json:"failed,omitempty"` // True if runtime failure + Retryable bool `json:"retryable,omitempty"` // True if auto-retry might succeed + Details map[string]interface{} `json:"details,omitempty"` // Additional context +} + +// Common error codes +const ( + ErrCodeStrictResolution = "STRICT_RESOLUTION" + ErrCodeNotFound = "NOT_FOUND" + ErrCodeActionNotAllowed = "ACTION_NOT_ALLOWED" + ErrCodePolicyBlocked = "POLICY_BLOCKED" + ErrCodeApprovalRequired = "APPROVAL_REQUIRED" + ErrCodeInvalidInput = "INVALID_INPUT" + ErrCodeExecutionFailed = "EXECUTION_FAILED" + ErrCodeNoAgent = "NO_AGENT" +) + +// NewToolSuccess creates a successful tool response +func NewToolSuccess(data interface{}) ToolResponse { + return ToolResponse{ + OK: true, + Data: data, + } +} + +// NewToolSuccessWithMeta creates a successful tool response with metadata +func NewToolSuccessWithMeta(data interface{}, meta map[string]interface{}) ToolResponse { + return ToolResponse{ + OK: true, + Data: data, + Meta: meta, + } +} + +// NewToolBlockedError creates a policy/validation blocked error +func NewToolBlockedError(code, message string, details map[string]interface{}) ToolResponse { + return ToolResponse{ + OK: false, + Error: &ToolError{ + Code: code, + Message: message, + Blocked: true, + Details: details, + }, + } +} + +// NewToolFailedError creates a runtime failure error +func NewToolFailedError(code, message string, retryable bool, details map[string]interface{}) ToolResponse { + return ToolResponse{ + OK: false, + Error: &ToolError{ + Code: code, + Message: message, + Failed: true, + Retryable: retryable, + Details: details, + }, + } +} + // Helper functions // NewTextContent creates a text content object @@ -244,3 +320,16 @@ func NewJSONResult(data interface{}) CallToolResult { IsError: false, } } + +// NewToolResponseResult creates a CallToolResult from a ToolResponse +// This provides the consistent envelope while maintaining MCP protocol compatibility +func NewToolResponseResult(resp ToolResponse) CallToolResult { + b, err := json.Marshal(resp) + if err != nil { + return NewErrorResult(err) + } + return CallToolResult{ + Content: []Content{NewTextContent(string(b))}, + IsError: !resp.OK, + } +} diff --git a/internal/ai/tools/tools_alerts.go b/internal/ai/tools/tools_alerts.go new file mode 100644 index 000000000..0be52ec2c --- /dev/null +++ b/internal/ai/tools/tools_alerts.go @@ -0,0 +1,114 @@ +package tools + +import ( + "context" + "fmt" +) + +// registerAlertsTools registers the consolidated pulse_alerts tool +func (e *PulseToolExecutor) registerAlertsTools() { + e.registry.Register(RegisteredTool{ + Definition: Tool{ + Name: "pulse_alerts", + Description: `Manage alerts and AI patrol findings. + +Actions: +- list: List active threshold alerts (CPU > 80%, disk full, etc.) +- findings: List AI patrol findings (detected issues) +- resolved: List recently resolved alerts +- resolve: Mark a finding as resolved +- dismiss: Dismiss a finding as not an issue + +Examples: +- List critical alerts: action="list", severity="critical" +- List all findings: action="findings" +- List resolved: action="resolved" +- Resolve finding: action="resolve", finding_id="abc123", resolution_note="Fixed by restarting service" +- Dismiss finding: action="dismiss", finding_id="abc123", reason="expected_behavior", note="This is normal during maintenance"`, + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]PropertySchema{ + "action": { + Type: "string", + Description: "Alert action to perform", + Enum: []string{"list", "findings", "resolved", "resolve", "dismiss"}, + }, + "severity": { + Type: "string", + Description: "Filter by severity: critical, warning, info (for list, findings)", + Enum: []string{"critical", "warning", "info"}, + }, + "resource_type": { + Type: "string", + Description: "Filter by resource type: vm, container, node, docker (for findings)", + }, + "resource_id": { + Type: "string", + Description: "Filter by resource ID (for findings)", + }, + "finding_id": { + Type: "string", + Description: "Finding ID (for resolve, dismiss)", + }, + "resolution_note": { + Type: "string", + Description: "Resolution note (for resolve action)", + }, + "note": { + Type: "string", + Description: "Explanation note (for dismiss action)", + }, + "reason": { + Type: "string", + Description: "Dismissal reason: not_an_issue, expected_behavior, will_fix_later", + Enum: []string{"not_an_issue", "expected_behavior", "will_fix_later"}, + }, + "include_dismissed": { + Type: "boolean", + Description: "Include previously dismissed findings (for findings)", + }, + "type": { + Type: "string", + Description: "Filter resolved alerts by type", + }, + "level": { + Type: "string", + Description: "Filter resolved alerts by level: critical, warning", + }, + "limit": { + Type: "integer", + Description: "Maximum number of results (default: 100)", + }, + "offset": { + Type: "integer", + Description: "Number of results to skip", + }, + }, + Required: []string{"action"}, + }, + }, + Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { + return exec.executeAlerts(ctx, args) + }, + }) +} + +// executeAlerts routes to the appropriate alerts handler based on action +// All handler functions are implemented in tools_patrol.go +func (e *PulseToolExecutor) executeAlerts(ctx context.Context, args map[string]interface{}) (CallToolResult, error) { + action, _ := args["action"].(string) + switch action { + case "list": + return e.executeListAlerts(ctx, args) + case "findings": + return e.executeListFindings(ctx, args) + case "resolved": + return e.executeListResolvedAlerts(ctx, args) + case "resolve": + return e.executeResolveFinding(ctx, args) + case "dismiss": + return e.executeDismissFinding(ctx, args) + default: + return NewErrorResult(fmt.Errorf("unknown action: %s. Use: list, findings, resolved, resolve, dismiss", action)), nil + } +} diff --git a/internal/ai/tools/tools_control.go b/internal/ai/tools/tools_control.go index 59e0da3d2..5f4690983 100644 --- a/internal/ai/tools/tools_control.go +++ b/internal/ai/tools/tools_control.go @@ -21,17 +21,11 @@ func (e *PulseToolExecutor) registerControlTools() { Name: "pulse_run_command", Description: `Execute a shell command on infrastructure via a connected agent. -Returns: Command output (stdout/stderr) and exit code. Exit code 0 = success. +This tool has built-in user approval - just call it directly when requested. +Prefer query tools first. If multiple agents exist and target is unclear, ask which host. -Use when: User explicitly asks to run a command, or monitoring data is insufficient for a targeted diagnosis. - -Prefer: Pulse monitoring tools (pulse_list_infrastructure, pulse_search_resources, pulse_get_topology, pulse_list_alerts, pulse_get_metrics) before running commands. - -Scope: Target a single host/agent. If multiple agents are connected and target_host is unclear, ask the user to choose. - -Do NOT use for: Checking if something is running (use pulse_get_topology), or starting/stopping VMs/containers (use pulse_control_guest or pulse_control_docker). - -Note: Commands run on the HOST, not inside VMs/containers. To run inside an LXC, use: pct exec -- `, +Routing: target_host can be a Proxmox host (delly), an LXC name (homepage-docker), or a VM name. +Commands targeting LXCs/VMs are automatically routed through the Proxmox host agent.`, InputSchema: InputSchema{ Type: "object", Properties: map[string]PropertySchema{ @@ -60,17 +54,12 @@ Note: Commands run on the HOST, not inside VMs/containers. To run inside an LXC, e.registry.Register(RegisteredTool{ Definition: Tool{ Name: "pulse_control_guest", - Description: `Start, stop, or restart Proxmox VMs and LXC containers. + Description: `Start, stop, restart, or delete Proxmox VMs and LXC containers. -Returns: Success message with VM/container name, or error if failed. - -Use when: User asks to start, stop, restart, or shutdown a VM or LXC container. - -Prefer: Use pulse_get_topology or pulse_search_resources to confirm the guest and node before control actions. - -Do NOT use for: Docker containers (use pulse_control_docker), or checking status (use pulse_get_topology). - -Note: These are LXC containers managed by Proxmox, NOT Docker containers. Uses 'pct' commands internally.`, +This tool has built-in user approval - just call it directly when requested. +Use pulse_search_resources to find the guest first if needed. +For Docker containers, use pulse_control_docker instead. +Delete requires the guest to be stopped first.`, InputSchema: InputSchema{ Type: "object", Properties: map[string]PropertySchema{ @@ -80,8 +69,8 @@ Note: These are LXC containers managed by Proxmox, NOT Docker containers. Uses ' }, "action": { Type: "string", - Description: "start, stop (immediate), shutdown (graceful), or restart", - Enum: []string{"start", "stop", "shutdown", "restart"}, + Description: "start, stop (immediate), shutdown (graceful), restart, or delete (permanent removal - guest must be stopped first)", + Enum: []string{"start", "stop", "shutdown", "restart", "delete"}, }, "force": { Type: "boolean", @@ -147,6 +136,32 @@ func (e *PulseToolExecutor) executeRunCommand(ctx context.Context, args map[stri return NewErrorResult(fmt.Errorf("command is required")), nil } + // Validate resource is in resolved context + // Uses command risk classification: read-only commands bypass strict mode + // With PULSE_STRICT_RESOLUTION=true, write commands are blocked on undiscovered resources + if targetHost != "" { + validation := e.validateResolvedResourceForExec(targetHost, command, true) + if validation.IsBlocked() { + // Hard validation failure - return consistent error envelope + return NewToolResponseResult(validation.StrictError.ToToolResponse()), nil + } + if validation.ErrorMsg != "" { + // Soft validation - log warning but allow operation + log.Warn(). + Str("target", targetHost). + Str("command", command). + Str("validation_error", validation.ErrorMsg). + Msg("[Control] Target resource not in resolved context - may indicate model hallucination") + } + + // Validate routing context - block if targeting a Proxmox host when child resources exist + // This prevents accidentally executing commands on the host when user meant to target an LXC/VM + routingResult := e.validateRoutingContext(targetHost) + if routingResult.IsBlocked() { + return NewToolResponseResult(routingResult.RoutingError.ToToolResponse()), nil + } + } + // Note: Control level read_only check is now centralized in registry.Execute() // Check if this is a pre-approved execution (agentic loop re-executing after user approval) @@ -197,20 +212,35 @@ func (e *PulseToolExecutor) executeRunCommand(ctx context.Context, args map[stri return NewErrorResult(fmt.Errorf("no agent server available")), nil } - agentID := e.findAgentForCommand(runOnHost, targetHost) - if agentID == "" { + // Resolve target to the correct agent and routing info (with full provenance) + // If targetHost is an LXC/VM name, this routes to the Proxmox host agent + // with the correct TargetType and TargetID for pct exec / qm guest exec + routing := e.resolveTargetForCommandFull(targetHost) + if routing.AgentID == "" { + if targetHost != "" { + if routing.TargetType == "container" || routing.TargetType == "vm" { + return NewErrorResult(fmt.Errorf("'%s' is a %s but no agent is available on its Proxmox host. Install Pulse Unified Agent on the Proxmox node.", targetHost, routing.TargetType)), nil + } + return NewErrorResult(fmt.Errorf("no agent available for target '%s'. Specify a valid hostname with a connected agent.", targetHost)), nil + } return NewErrorResult(fmt.Errorf("no agent available for target")), nil } - targetType := "container" - if runOnHost { - targetType = "host" - } + log.Debug(). + Str("target_host", targetHost). + Str("agent_id", routing.AgentID). + Str("agent_host", routing.AgentHostname). + Str("resolved_kind", routing.ResolvedKind). + Str("resolved_node", routing.ResolvedNode). + Str("transport", routing.Transport). + Str("target_type", routing.TargetType). + Str("target_id", routing.TargetID). + Msg("[pulse_control] Routing command execution") - result, err := e.agentServer.ExecuteCommand(ctx, agentID, agentexec.ExecuteCommandPayload{ + result, err := e.agentServer.ExecuteCommand(ctx, routing.AgentID, agentexec.ExecuteCommandPayload{ Command: command, - TargetType: targetType, - TargetID: e.targetID, + TargetType: routing.TargetType, + TargetID: routing.TargetID, }) if err != nil { return NewErrorResult(err), nil @@ -224,11 +254,12 @@ func (e *PulseToolExecutor) executeRunCommand(ctx context.Context, args map[stri return NewTextResult(fmt.Sprintf("Command failed (exit code %d):\n%s", result.ExitCode, output)), nil } - // Success - include guidance to prevent unnecessary verification + // Success - always show output explicitly to prevent LLM hallucination + // When output is empty, we must be explicit about it so the LLM doesn't fabricate results if output == "" { - return NewTextResult("✓ Command completed successfully (exit code 0). No verification needed."), nil + return NewTextResult("Command completed successfully (exit code 0).\n\nOutput:\n(no output)"), nil } - return NewTextResult(fmt.Sprintf("✓ Command completed successfully (exit code 0). No verification needed.\n%s", output)), nil + return NewTextResult(fmt.Sprintf("Command completed successfully (exit code 0).\n\nOutput:\n%s", output)), nil } func (e *PulseToolExecutor) executeControlGuest(ctx context.Context, args map[string]interface{}) (CallToolResult, error) { @@ -243,9 +274,25 @@ func (e *PulseToolExecutor) executeControlGuest(ctx context.Context, args map[st return NewErrorResult(fmt.Errorf("action is required")), nil } - validActions := map[string]bool{"start": true, "stop": true, "shutdown": true, "restart": true} + validActions := map[string]bool{"start": true, "stop": true, "shutdown": true, "restart": true, "delete": true} if !validActions[action] { - return NewErrorResult(fmt.Errorf("invalid action: %s. Use start, stop, shutdown, or restart", action)), nil + return NewErrorResult(fmt.Errorf("invalid action: %s. Use start, stop, shutdown, restart, or delete", action)), nil + } + + // Validate resource is in resolved context + // With PULSE_STRICT_RESOLUTION=true, this blocks execution on undiscovered resources + validation := e.validateResolvedResource(guestID, action, true) + if validation.IsBlocked() { + // Hard validation failure - return consistent error envelope + return NewToolResponseResult(validation.StrictError.ToToolResponse()), nil + } + if validation.ErrorMsg != "" { + // Soft validation - log warning but allow operation + log.Warn(). + Str("guest_id", guestID). + Str("action", action). + Str("validation_error", validation.ErrorMsg). + Msg("[ControlGuest] Guest not in resolved context - may indicate model hallucination") } // Note: Control level read_only check is now centralized in registry.Execute() @@ -269,6 +316,11 @@ func (e *PulseToolExecutor) executeControlGuest(ctx context.Context, args map[st cmdTool = "qm" } + // For delete action, verify guest is stopped first + if action == "delete" && guest.Status != "stopped" { + return NewTextResult(fmt.Sprintf("Cannot delete %s (VMID %d) - it is currently %s. Stop it first, then try deleting again.", guest.Name, guest.VMID, guest.Status)), nil + } + var command string switch action { case "start": @@ -279,6 +331,9 @@ func (e *PulseToolExecutor) executeControlGuest(ctx context.Context, args map[st command = fmt.Sprintf("%s shutdown %d", cmdTool, guest.VMID) case "restart": command = fmt.Sprintf("%s reboot %d", cmdTool, guest.VMID) + case "delete": + // Delete uses 'destroy' subcommand with --purge to also remove associated storage + command = fmt.Sprintf("%s destroy %d --purge", cmdTool, guest.VMID) } if force && action == "stop" { @@ -355,6 +410,23 @@ func (e *PulseToolExecutor) executeControlDocker(ctx context.Context, args map[s return NewErrorResult(fmt.Errorf("invalid action: %s. Use start, stop, or restart", action)), nil } + // Validate resource is in resolved context + // With PULSE_STRICT_RESOLUTION=true, this blocks execution on undiscovered resources + validation := e.validateResolvedResource(containerName, action, true) + if validation.IsBlocked() { + // Hard validation failure - return consistent error envelope + return NewToolResponseResult(validation.StrictError.ToToolResponse()), nil + } + if validation.ErrorMsg != "" { + // Soft validation - log warning but allow operation + log.Warn(). + Str("container", containerName). + Str("action", action). + Str("host", hostName). + Str("validation_error", validation.ErrorMsg). + Msg("[ControlDocker] Container not in resolved context - may indicate model hallucination") + } + // Note: Control level read_only check is now centralized in registry.Execute() // Check if this is a pre-approved execution (agentic loop re-executing after user approval) @@ -392,15 +464,30 @@ func (e *PulseToolExecutor) executeControlDocker(ctx context.Context, args map[s return NewErrorResult(fmt.Errorf("no agent server available")), nil } - agentID := e.findAgentForDockerHost(dockerHost) - if agentID == "" { + // Resolve the Docker host to the correct agent and routing info (with full provenance) + routing := e.resolveDockerHostRoutingFull(dockerHost) + if routing.AgentID == "" { + if routing.TargetType == "container" || routing.TargetType == "vm" { + return NewTextResult(fmt.Sprintf("Docker host '%s' is a %s but no agent is available on its Proxmox host. Install Pulse Unified Agent on the Proxmox node.", dockerHost.Hostname, routing.TargetType)), nil + } return NewTextResult(fmt.Sprintf("No agent available on Docker host '%s'. Install Pulse Unified Agent on the host to enable control.", dockerHost.Hostname)), nil } - result, err := e.agentServer.ExecuteCommand(ctx, agentID, agentexec.ExecuteCommandPayload{ + log.Debug(). + Str("docker_host", dockerHost.Hostname). + Str("agent_id", routing.AgentID). + Str("agent_host", routing.AgentHostname). + Str("resolved_kind", routing.ResolvedKind). + Str("resolved_node", routing.ResolvedNode). + Str("transport", routing.Transport). + Str("target_type", routing.TargetType). + Str("target_id", routing.TargetID). + Msg("[pulse_control docker] Routing docker command execution") + + result, err := e.agentServer.ExecuteCommand(ctx, routing.AgentID, agentexec.ExecuteCommandPayload{ Command: command, - TargetType: "host", - TargetID: "", + TargetType: routing.TargetType, + TargetID: routing.TargetID, }) if err != nil { return NewErrorResult(err), nil @@ -420,29 +507,194 @@ func (e *PulseToolExecutor) executeControlDocker(ctx context.Context, args map[s // Helper methods for control tools -func (e *PulseToolExecutor) findAgentForCommand(runOnHost bool, targetHost string) string { +// CommandRoutingResult contains full routing information for command execution. +// This provides the provenance needed to verify where commands actually run. +type CommandRoutingResult struct { + // Routing info for agent + AgentID string // The agent that will execute the command + TargetType string // "host", "container", or "vm" + TargetID string // VMID for LXC/VM, empty for host + + // Provenance info + AgentHostname string // Hostname of the agent + ResolvedKind string // What kind of resource we resolved to: "node", "lxc", "vm", "docker", "host" + ResolvedNode string // Proxmox node name (if applicable) + Transport string // How command will be executed: "direct", "pct_exec", "qm_guest_exec" +} + +// resolveTargetForCommandFull resolves a target_host to full routing info including provenance. +// Use this for write operations where you need to verify execution context. +// +// CRITICAL ORDERING: Topology resolution (state.ResolveResource) happens FIRST. +// Agent hostname matching is a FALLBACK only when the state doesn't know the resource. +// This prevents the "hostname collision" bug where an agent with hostname matching an LXC name +// causes commands to execute on the node instead of inside the LXC via pct exec. +func (e *PulseToolExecutor) resolveTargetForCommandFull(targetHost string) CommandRoutingResult { + result := CommandRoutingResult{ + TargetType: "host", + Transport: "direct", + } + if e.agentServer == nil { - return "" + return result } agents := e.agentServer.GetConnectedAgents() if len(agents) == 0 { - return "" + return result } - if targetHost != "" { - for _, agent := range agents { - if agent.Hostname == targetHost || agent.AgentID == targetHost { - return agent.AgentID + if targetHost == "" { + // No target_host specified - require exactly one agent or fail + if len(agents) > 1 { + return result + } + result.AgentID = agents[0].AgentID + result.AgentHostname = agents[0].Hostname + result.ResolvedKind = "host" + return result + } + + // STEP 1: Consult topology (state) FIRST — this is authoritative. + // If the state knows about this resource, use topology-based routing. + // This prevents hostname collisions from masquerading as host targets. + if e.stateProvider != nil { + state := e.stateProvider.GetState() + loc := state.ResolveResource(targetHost) + + if loc.Found { + // Route based on resource type + switch loc.ResourceType { + case "node": + // Direct Proxmox node + nodeAgentID := e.findAgentForNode(loc.Node) + result.AgentID = nodeAgentID + result.ResolvedKind = "node" + result.ResolvedNode = loc.Node + for _, agent := range agents { + if agent.AgentID == nodeAgentID { + result.AgentHostname = agent.Hostname + break + } + } + return result + + case "lxc": + // LXC container - route through Proxmox node agent via pct exec + nodeAgentID := e.findAgentForNode(loc.Node) + result.ResolvedKind = "lxc" + result.ResolvedNode = loc.Node + result.TargetType = "container" + result.TargetID = fmt.Sprintf("%d", loc.VMID) + result.Transport = "pct_exec" + if nodeAgentID != "" { + result.AgentID = nodeAgentID + for _, agent := range agents { + if agent.AgentID == nodeAgentID { + result.AgentHostname = agent.Hostname + break + } + } + } + return result + + case "vm": + // VM - route through Proxmox node agent via qm guest exec + nodeAgentID := e.findAgentForNode(loc.Node) + result.ResolvedKind = "vm" + result.ResolvedNode = loc.Node + result.TargetType = "vm" + result.TargetID = fmt.Sprintf("%d", loc.VMID) + result.Transport = "qm_guest_exec" + if nodeAgentID != "" { + result.AgentID = nodeAgentID + for _, agent := range agents { + if agent.AgentID == nodeAgentID { + result.AgentHostname = agent.Hostname + break + } + } + } + return result + + case "docker", "dockerhost": + // Docker container or Docker host + result.ResolvedKind = loc.ResourceType + result.ResolvedNode = loc.Node + + if loc.DockerHostType == "lxc" { + nodeAgentID := e.findAgentForNode(loc.Node) + result.TargetType = "container" + result.TargetID = fmt.Sprintf("%d", loc.DockerHostVMID) + result.Transport = "pct_exec" + if nodeAgentID != "" { + result.AgentID = nodeAgentID + for _, agent := range agents { + if agent.AgentID == nodeAgentID { + result.AgentHostname = agent.Hostname + break + } + } + } + return result + } + if loc.DockerHostType == "vm" { + nodeAgentID := e.findAgentForNode(loc.Node) + result.TargetType = "vm" + result.TargetID = fmt.Sprintf("%d", loc.DockerHostVMID) + result.Transport = "qm_guest_exec" + if nodeAgentID != "" { + result.AgentID = nodeAgentID + for _, agent := range agents { + if agent.AgentID == nodeAgentID { + result.AgentHostname = agent.Hostname + break + } + } + } + return result + } + // Standalone Docker host - find agent directly + for _, agent := range agents { + if agent.Hostname == loc.TargetHost || agent.AgentID == loc.TargetHost { + result.AgentID = agent.AgentID + result.AgentHostname = agent.Hostname + return result + } + } } } } - if targetHost == "" && len(agents) > 1 { - return "" + // STEP 2: FALLBACK — agent hostname match. + // Only used when the state doesn't know about this resource at all. + // This handles standalone hosts without Proxmox topology. + for _, agent := range agents { + if agent.Hostname == targetHost || agent.AgentID == targetHost { + result.AgentID = agent.AgentID + result.AgentHostname = agent.Hostname + result.ResolvedKind = "host" + return result + } } - return agents[0].AgentID + return result +} + +// resolveTargetForCommand resolves a target_host to the correct agent and routing info. +// Uses the authoritative ResolveResource function from models.StateSnapshot. +// Returns: agentID, targetType ("host", "container", or "vm"), targetID (vmid for LXC/VM) +// +// CRITICAL ORDERING: Same as resolveTargetForCommandFull — topology first, agent fallback second. +func (e *PulseToolExecutor) resolveTargetForCommand(targetHost string) (agentID string, targetType string, targetID string) { + // Delegate to the full resolver and extract the triple + r := e.resolveTargetForCommandFull(targetHost) + return r.AgentID, r.TargetType, r.TargetID +} + +func (e *PulseToolExecutor) findAgentForCommand(runOnHost bool, targetHost string) string { + agentID, _, _ := e.resolveTargetForCommand(targetHost) + return agentID } func (e *PulseToolExecutor) resolveGuest(guestID string) (*GuestInfo, error) { @@ -627,6 +879,96 @@ func (e *PulseToolExecutor) getAgentHostnameForDockerHost(dockerHost *models.Doc return dockerHost.Hostname } +// resolveDockerHostRoutingFull resolves a Docker host to the correct agent and routing info +// with full provenance metadata. If the Docker host is actually an LXC or VM, it routes +// through the Proxmox host agent with the correct TargetType and TargetID so commands +// are executed inside the guest. +func (e *PulseToolExecutor) resolveDockerHostRoutingFull(dockerHost *models.DockerHost) CommandRoutingResult { + result := CommandRoutingResult{ + TargetType: "host", + Transport: "direct", + } + + if e.agentServer == nil { + return result + } + + // STEP 1: Check topology — is the Docker host actually an LXC or VM? + if e.stateProvider != nil { + state := e.stateProvider.GetState() + + // Check LXCs + for _, ct := range state.Containers { + if ct.Name == dockerHost.Hostname { + result.ResolvedKind = "lxc" + result.ResolvedNode = ct.Node + result.TargetType = "container" + result.TargetID = fmt.Sprintf("%d", ct.VMID) + result.Transport = "pct_exec" + nodeAgentID := e.findAgentForNode(ct.Node) + if nodeAgentID != "" { + result.AgentID = nodeAgentID + result.AgentHostname = ct.Node + log.Debug(). + Str("docker_host", dockerHost.Hostname). + Str("node", ct.Node). + Int("vmid", ct.VMID). + Str("agent", nodeAgentID). + Str("transport", result.Transport). + Msg("Resolved Docker host as LXC, routing through Proxmox agent") + } + return result + } + } + + // Check VMs + for _, vm := range state.VMs { + if vm.Name == dockerHost.Hostname { + result.ResolvedKind = "vm" + result.ResolvedNode = vm.Node + result.TargetType = "vm" + result.TargetID = fmt.Sprintf("%d", vm.VMID) + result.Transport = "qm_guest_exec" + nodeAgentID := e.findAgentForNode(vm.Node) + if nodeAgentID != "" { + result.AgentID = nodeAgentID + result.AgentHostname = vm.Node + log.Debug(). + Str("docker_host", dockerHost.Hostname). + Str("node", vm.Node). + Int("vmid", vm.VMID). + Str("agent", nodeAgentID). + Str("transport", result.Transport). + Msg("Resolved Docker host as VM, routing through Proxmox agent") + } + return result + } + } + } + + // STEP 2: Docker host is not an LXC/VM — use direct agent routing + agentID := e.findAgentForDockerHost(dockerHost) + result.AgentID = agentID + result.ResolvedKind = "dockerhost" + if agentID != "" { + // Try to get agent hostname + agents := e.agentServer.GetConnectedAgents() + for _, a := range agents { + if a.AgentID == agentID { + result.AgentHostname = a.Hostname + break + } + } + } + return result +} + +// resolveDockerHostRouting delegates to resolveDockerHostRoutingFull for backwards compatibility. +func (e *PulseToolExecutor) resolveDockerHostRouting(dockerHost *models.DockerHost) (agentID string, targetType string, targetID string) { + r := e.resolveDockerHostRoutingFull(dockerHost) + return r.AgentID, r.TargetType, r.TargetID +} + // createApprovalRecord creates an approval record in the store and returns the approval ID. // Returns empty string if store is not available (approvals will still work, just without persistence). func createApprovalRecord(command, targetType, targetID, targetName, context string) string { diff --git a/internal/ai/tools/tools_control_consolidated.go b/internal/ai/tools/tools_control_consolidated.go new file mode 100644 index 000000000..9d2b9d618 --- /dev/null +++ b/internal/ai/tools/tools_control_consolidated.go @@ -0,0 +1,96 @@ +package tools + +import ( + "context" + "fmt" +) + +// registerControlToolsConsolidated registers the consolidated pulse_control tool +func (e *PulseToolExecutor) registerControlToolsConsolidated() { + e.registry.Register(RegisteredTool{ + Definition: Tool{ + Name: "pulse_control", + Description: `Control Proxmox VMs/LXC containers or execute WRITE commands on infrastructure. + +IMPORTANT: For READ operations (grep, cat, tail, logs, ps, status checks), use pulse_read instead. +This tool is for WRITE operations that modify state. + +Types: +- guest: Start, stop, restart, shutdown, or delete VMs and LXC containers +- command: Execute commands that MODIFY state (restart services, write files, etc.) + +USE pulse_control FOR: +- Guest control: start/stop/restart/delete VMs and LXCs +- Service management: systemctl restart, service start/stop +- Package management: apt install, yum update +- File modification: echo > file, sed -i, rm, mv, cp + +DO NOT use pulse_control for: +- Reading logs → use pulse_read action=exec or action=logs +- Checking status → use pulse_read action=exec +- Reading files → use pulse_read action=file +- Finding files → use pulse_read action=find + +Examples: +- Restart VM: type="guest", guest_id="101", action="restart" +- Restart service: type="command", command="systemctl restart nginx", target_host="webserver" + +For Docker container control, use pulse_docker. +Note: Delete requires the guest to be stopped first.`, + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]PropertySchema{ + "type": { + Type: "string", + Description: "Control type: guest or command", + Enum: []string{"guest", "command"}, + }, + "guest_id": { + Type: "string", + Description: "For guest: VMID or name", + }, + "action": { + Type: "string", + Description: "For guest: start, stop, shutdown, restart, delete", + Enum: []string{"start", "stop", "shutdown", "restart", "delete"}, + }, + "command": { + Type: "string", + Description: "For command type: the shell command to execute", + }, + "target_host": { + Type: "string", + Description: "For command type: hostname to run command on", + }, + "run_on_host": { + Type: "boolean", + Description: "For command type: run on host (default true)", + }, + "force": { + Type: "boolean", + Description: "For guest stop: force stop without graceful shutdown", + }, + }, + Required: []string{"type"}, + }, + }, + Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { + return exec.executeControl(ctx, args) + }, + RequireControl: true, + }) +} + +// executeControl routes to the appropriate control handler based on type +// Handler functions are implemented in tools_control.go +func (e *PulseToolExecutor) executeControl(ctx context.Context, args map[string]interface{}) (CallToolResult, error) { + controlType, _ := args["type"].(string) + switch controlType { + case "guest": + return e.executeControlGuest(ctx, args) + case "command": + return e.executeRunCommand(ctx, args) + default: + return NewErrorResult(fmt.Errorf("unknown type: %s. Use: guest, command", controlType)), nil + } +} diff --git a/internal/ai/tools/tools_discovery.go b/internal/ai/tools/tools_discovery.go index 64fdab37f..36547a45f 100644 --- a/internal/ai/tools/tools_discovery.go +++ b/internal/ai/tools/tools_discovery.go @@ -3,9 +3,147 @@ package tools import ( "context" "fmt" + "strconv" "strings" ) +// getCommandContext returns information about how to run commands on a resource. +// This helps the AI understand what commands to use with pulse_control. +type CommandContext struct { + // How to run commands: "direct" (agent on resource), "via_host" (agent on parent host) + Method string `json:"method"` + // The target_host value to use with pulse_control + TargetHost string `json:"target_host"` + // Example command pattern (what to pass to pulse_control) + Example string `json:"example"` + // For containers running inside this resource (e.g., Docker in LXC) + NestedExample string `json:"nested_example,omitempty"` +} + +// getCLIAccessPattern returns context about the resource type. +// Does NOT prescribe how to access - the AI should determine that based on available agents. +func getCLIAccessPattern(resourceType, hostID, resourceID string) string { + switch resourceType { + case "lxc": + return fmt.Sprintf("LXC container on Proxmox node '%s' (VMID %s)", hostID, resourceID) + case "vm": + return fmt.Sprintf("VM on Proxmox node '%s' (VMID %s)", hostID, resourceID) + case "docker": + return fmt.Sprintf("Docker container '%s' on host '%s'", resourceID, hostID) + case "host": + return fmt.Sprintf("Host '%s'", hostID) + default: + return "" + } +} + +// commonServicePaths contains typical log/config paths for well-known services +// These are fallbacks when discovery doesn't find specific paths +var commonServicePaths = map[string]struct { + LogPaths []string + ConfigPaths []string + DataPaths []string +}{ + "jellyfin": { + LogPaths: []string{"/var/log/jellyfin/", "/config/log/"}, + ConfigPaths: []string{"/etc/jellyfin/", "/config/"}, + DataPaths: []string{"/var/lib/jellyfin/", "/config/data/"}, + }, + "plex": { + LogPaths: []string{"/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Logs/"}, + ConfigPaths: []string{"/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/"}, + DataPaths: []string{"/var/lib/plexmediaserver/"}, + }, + "sonarr": { + LogPaths: []string{"/config/logs/"}, + ConfigPaths: []string{"/config/"}, + DataPaths: []string{"/config/"}, + }, + "radarr": { + LogPaths: []string{"/config/logs/"}, + ConfigPaths: []string{"/config/"}, + DataPaths: []string{"/config/"}, + }, + "prowlarr": { + LogPaths: []string{"/config/logs/"}, + ConfigPaths: []string{"/config/"}, + DataPaths: []string{"/config/"}, + }, + "lidarr": { + LogPaths: []string{"/config/logs/"}, + ConfigPaths: []string{"/config/"}, + DataPaths: []string{"/config/"}, + }, + "postgresql": { + LogPaths: []string{"/var/log/postgresql/", "/var/lib/postgresql/data/log/"}, + ConfigPaths: []string{"/etc/postgresql/", "/var/lib/postgresql/data/"}, + DataPaths: []string{"/var/lib/postgresql/data/"}, + }, + "mysql": { + LogPaths: []string{"/var/log/mysql/", "/var/lib/mysql/"}, + ConfigPaths: []string{"/etc/mysql/"}, + DataPaths: []string{"/var/lib/mysql/"}, + }, + "mariadb": { + LogPaths: []string{"/var/log/mysql/", "/var/lib/mysql/"}, + ConfigPaths: []string{"/etc/mysql/"}, + DataPaths: []string{"/var/lib/mysql/"}, + }, + "nginx": { + LogPaths: []string{"/var/log/nginx/"}, + ConfigPaths: []string{"/etc/nginx/"}, + DataPaths: []string{"/var/www/"}, + }, + "homeassistant": { + LogPaths: []string{"/config/home-assistant.log"}, + ConfigPaths: []string{"/config/"}, + DataPaths: []string{"/config/"}, + }, + "frigate": { + LogPaths: []string{"/config/logs/"}, + ConfigPaths: []string{"/config/"}, + DataPaths: []string{"/media/frigate/"}, + }, + "redis": { + LogPaths: []string{"/var/log/redis/"}, + ConfigPaths: []string{"/etc/redis/"}, + DataPaths: []string{"/var/lib/redis/"}, + }, + "mongodb": { + LogPaths: []string{"/var/log/mongodb/"}, + ConfigPaths: []string{"/etc/mongod.conf"}, + DataPaths: []string{"/var/lib/mongodb/"}, + }, + "grafana": { + LogPaths: []string{"/var/log/grafana/"}, + ConfigPaths: []string{"/etc/grafana/"}, + DataPaths: []string{"/var/lib/grafana/"}, + }, + "prometheus": { + LogPaths: []string{"/var/log/prometheus/"}, + ConfigPaths: []string{"/etc/prometheus/"}, + DataPaths: []string{"/var/lib/prometheus/"}, + }, + "influxdb": { + LogPaths: []string{"/var/log/influxdb/"}, + ConfigPaths: []string{"/etc/influxdb/"}, + DataPaths: []string{"/var/lib/influxdb/"}, + }, +} + +// getCommonServicePaths returns fallback paths for a service type +func getCommonServicePaths(serviceType string) (logPaths, configPaths, dataPaths []string) { + // Normalize service type (lowercase, remove version numbers) + normalized := strings.ToLower(serviceType) + // Try to match against known services + for key, paths := range commonServicePaths { + if strings.Contains(normalized, key) { + return paths.LogPaths, paths.ConfigPaths, paths.DataPaths + } + } + return nil, nil, nil +} + // registerDiscoveryTools registers AI-powered infrastructure discovery tools func (e *PulseToolExecutor) registerDiscoveryTools() { e.registry.Register(RegisteredTool{ @@ -40,10 +178,10 @@ This information is critical for proposing correct remediation commands that mat }, "host_id": { Type: "string", - Description: "Optional: Host/node ID where the resource runs (required for Docker containers)", + Description: "Node/host where the resource runs. For VM/LXC: the PVE node from 'node' field. For Docker: the Docker host. For host type: same as resource_id.", }, }, - Required: []string{"resource_type", "resource_id"}, + Required: []string{"resource_type", "resource_id", "host_id"}, }, }, Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { @@ -93,9 +231,9 @@ Filters: }) } -func (e *PulseToolExecutor) executeGetDiscovery(_ context.Context, args map[string]interface{}) (CallToolResult, error) { +func (e *PulseToolExecutor) executeGetDiscovery(ctx context.Context, args map[string]interface{}) (CallToolResult, error) { if e.discoveryProvider == nil { - return NewTextResult("Discovery service not available. Run a discovery scan first."), nil + return NewTextResult("Discovery service not available."), nil } resourceType, _ := args["resource_type"].(string) @@ -108,21 +246,105 @@ func (e *PulseToolExecutor) executeGetDiscovery(_ context.Context, args map[stri if resourceID == "" { return NewErrorResult(fmt.Errorf("resource_id is required")), nil } + if hostID == "" { + return NewErrorResult(fmt.Errorf("host_id is required - use the 'node' field from search or get_resource results")), nil + } + // For LXC and VM types, resourceID should be a numeric VMID. + // If a name was passed, try to resolve it to a VMID. + if (resourceType == "lxc" || resourceType == "vm") && e.stateProvider != nil { + if _, err := strconv.Atoi(resourceID); err != nil { + // Not a number - try to resolve the name to a VMID + state := e.stateProvider.GetState() + resolved := false + + if resourceType == "lxc" { + for _, c := range state.Containers { + if strings.EqualFold(c.Name, resourceID) && c.Node == hostID { + resourceID = fmt.Sprintf("%d", c.VMID) + resolved = true + break + } + } + } else if resourceType == "vm" { + for _, vm := range state.VMs { + if strings.EqualFold(vm.Name, resourceID) && vm.Node == hostID { + resourceID = fmt.Sprintf("%d", vm.VMID) + resolved = true + break + } + } + } + + if !resolved { + return NewErrorResult(fmt.Errorf("could not resolve resource name '%s' to a VMID on host '%s'", resourceID, hostID)), nil + } + } + } + + // First try to get existing discovery discovery, err := e.discoveryProvider.GetDiscoveryByResource(resourceType, hostID, resourceID) if err != nil { return NewErrorResult(fmt.Errorf("failed to get discovery: %w", err)), nil } + // Compute CLI access pattern (always useful, even if discovery fails) + cliAccess := getCLIAccessPattern(resourceType, hostID, resourceID) + + // If no discovery exists, trigger one if discovery == nil { + discovery, err = e.discoveryProvider.TriggerDiscovery(ctx, resourceType, hostID, resourceID) + if err != nil { + // Even on failure, provide cli_access so AI can investigate manually + return NewJSONResult(map[string]interface{}{ + "found": false, + "resource_type": resourceType, + "resource_id": resourceID, + "host_id": hostID, + "cli_access": cliAccess, + "message": fmt.Sprintf("Discovery failed: %v", err), + "hint": "Use pulse_control with type='command' to investigate. Try checking /var/log/ for logs.", + }), nil + } + } + + if discovery == nil { + // No discovery but provide cli_access for manual investigation return NewJSONResult(map[string]interface{}{ "found": false, "resource_type": resourceType, "resource_id": resourceID, - "message": "No discovery data found for this resource. Run a discovery scan to gather information.", + "host_id": hostID, + "cli_access": cliAccess, + "message": "Discovery returned no data. The resource may not be accessible.", + "hint": "Use pulse_control with type='command' to investigate. Try listing /var/log/ or checking running processes.", }), nil } + // Use fallback cli_access if discovery didn't provide one + responseCLIAccess := discovery.CLIAccess + if responseCLIAccess == "" { + responseCLIAccess = cliAccess + } + + // Use fallback paths for known services if discovery didn't find specific ones + responseConfigPaths := discovery.ConfigPaths + responseDataPaths := discovery.DataPaths + var responseLogPaths []string + + if discovery.ServiceType != "" { + fallbackLogPaths, fallbackConfigPaths, fallbackDataPaths := getCommonServicePaths(discovery.ServiceType) + if len(responseConfigPaths) == 0 && len(fallbackConfigPaths) > 0 { + responseConfigPaths = fallbackConfigPaths + } + if len(responseDataPaths) == 0 && len(fallbackDataPaths) > 0 { + responseDataPaths = fallbackDataPaths + } + if len(fallbackLogPaths) > 0 { + responseLogPaths = fallbackLogPaths + } + } + // Return the discovery information response := map[string]interface{}{ "found": true, @@ -135,14 +357,19 @@ func (e *PulseToolExecutor) executeGetDiscovery(_ context.Context, args map[stri "service_name": discovery.ServiceName, "service_version": discovery.ServiceVersion, "category": discovery.Category, - "cli_access": discovery.CLIAccess, - "config_paths": discovery.ConfigPaths, - "data_paths": discovery.DataPaths, + "cli_access": responseCLIAccess, + "config_paths": responseConfigPaths, + "data_paths": responseDataPaths, "confidence": discovery.Confidence, "discovered_at": discovery.DiscoveredAt, "updated_at": discovery.UpdatedAt, } + // Add log paths if we have them (from fallback or discovery) + if len(responseLogPaths) > 0 { + response["log_paths"] = responseLogPaths + } + // Add facts if present if len(discovery.Facts) > 0 { facts := make([]map[string]string, 0, len(discovery.Facts)) @@ -166,6 +393,25 @@ func (e *PulseToolExecutor) executeGetDiscovery(_ context.Context, args map[stri response["ai_reasoning"] = discovery.AIReasoning } + // Add listening ports if present + if len(discovery.Ports) > 0 { + ports := make([]map[string]interface{}, 0, len(discovery.Ports)) + for _, p := range discovery.Ports { + port := map[string]interface{}{ + "port": p.Port, + "protocol": p.Protocol, + } + if p.Process != "" { + port["process"] = p.Process + } + if p.Address != "" { + port["address"] = p.Address + } + ports = append(ports, port) + } + response["ports"] = ports + } + return NewJSONResult(response), nil } @@ -236,6 +482,11 @@ func (e *PulseToolExecutor) executeListDiscoveries(_ context.Context, args map[s result["facts_count"] = len(d.Facts) } + // Add ports count + if len(d.Ports) > 0 { + result["ports_count"] = len(d.Ports) + } + results = append(results, result) } diff --git a/internal/ai/tools/tools_discovery_consolidated.go b/internal/ai/tools/tools_discovery_consolidated.go new file mode 100644 index 000000000..325738712 --- /dev/null +++ b/internal/ai/tools/tools_discovery_consolidated.go @@ -0,0 +1,87 @@ +package tools + +import ( + "context" + "fmt" +) + +// registerDiscoveryToolsConsolidated registers the consolidated pulse_discovery tool +func (e *PulseToolExecutor) registerDiscoveryToolsConsolidated() { + e.registry.Register(RegisteredTool{ + Definition: Tool{ + Name: "pulse_discovery", + Description: `Get deep AI-discovered information about services (log paths, config locations, service details). + +Actions: +- get: Trigger discovery and get detailed info for a specific resource. Use this when you need deep context about a container/VM (where logs are, config paths, service details). Requires resource_type, resource_id, and host_id - use pulse_query action="search" first if you don't know these. +- list: Search existing discoveries only. Will NOT find resources that haven't been discovered yet. Use action="get" to trigger discovery for new resources. + +Workflow for investigating applications: +1. Use pulse_query action="search" to find the resource by name +2. Use pulse_discovery action="get" with the resource details to get deep context (log paths, config locations) +3. Use pulse_control type="command" to run commands (check logs, query app state, etc.) + +Examples: +- Trigger discovery: action="get", resource_type="docker", resource_id="jellyfin", host_id="docker-host-1" +- Search existing: action="list", service_type="postgresql"`, + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]PropertySchema{ + "action": { + Type: "string", + Description: "Discovery action: get or list", + Enum: []string{"get", "list"}, + }, + "resource_type": { + Type: "string", + Description: "For get: resource type (vm, lxc, docker, host)", + Enum: []string{"vm", "lxc", "docker", "host"}, + }, + "resource_id": { + Type: "string", + Description: "For get: resource identifier (VMID, container name, hostname)", + }, + "host_id": { + Type: "string", + Description: "For get: node/host where resource runs", + }, + "type": { + Type: "string", + Description: "For list: filter by resource type", + Enum: []string{"vm", "lxc", "docker", "host"}, + }, + "host": { + Type: "string", + Description: "For list: filter by host/node ID", + }, + "service_type": { + Type: "string", + Description: "For list: filter by service type (e.g., frigate, postgresql)", + }, + "limit": { + Type: "integer", + Description: "For list: maximum results (default: 50)", + }, + }, + Required: []string{"action"}, + }, + }, + Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { + return exec.executeDiscovery(ctx, args) + }, + }) +} + +// executeDiscovery routes to the appropriate discovery handler based on action +// Handler functions are implemented in tools_discovery.go +func (e *PulseToolExecutor) executeDiscovery(ctx context.Context, args map[string]interface{}) (CallToolResult, error) { + action, _ := args["action"].(string) + switch action { + case "get": + return e.executeGetDiscovery(ctx, args) + case "list": + return e.executeListDiscoveries(ctx, args) + default: + return NewErrorResult(fmt.Errorf("unknown action: %s. Use: get, list", action)), nil + } +} diff --git a/internal/ai/tools/tools_docker.go b/internal/ai/tools/tools_docker.go new file mode 100644 index 000000000..6b6f6fb6b --- /dev/null +++ b/internal/ai/tools/tools_docker.go @@ -0,0 +1,201 @@ +package tools + +import ( + "context" + "fmt" + + "github.com/rcourtman/pulse-go-rewrite/internal/agentexec" + "github.com/rs/zerolog/log" +) + +// registerDockerTools registers the consolidated pulse_docker tool +func (e *PulseToolExecutor) registerDockerTools() { + e.registry.Register(RegisteredTool{ + Definition: Tool{ + Name: "pulse_docker", + Description: `Manage Docker containers, updates, and Swarm services. + +Actions: +- control: Start, stop, or restart containers +- updates: List containers with pending image updates +- check_updates: Trigger update check on a host +- update: Update a container to latest image (requires control permission) +- services: List Docker Swarm services +- tasks: List Docker Swarm tasks +- swarm: Get Swarm cluster status + +To check Docker container logs or run commands inside containers, use pulse_control with type="command": + command="docker logs jellyfin --tail 100" + command="docker exec jellyfin cat /config/log/log.txt" + +Examples: +- Restart container: action="control", container="nginx", operation="restart" +- List updates: action="updates", host="Tower" +- Update container: action="update", container="nginx", host="Tower"`, + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]PropertySchema{ + "action": { + Type: "string", + Description: "Docker action to perform", + Enum: []string{"control", "updates", "check_updates", "update", "services", "tasks", "swarm"}, + }, + "container": { + Type: "string", + Description: "Container name or ID (for control, update)", + }, + "host": { + Type: "string", + Description: "Docker host name or ID", + }, + "operation": { + Type: "string", + Description: "Control operation: start, stop, restart (for action: control)", + Enum: []string{"start", "stop", "restart"}, + }, + "service": { + Type: "string", + Description: "Filter by service name or ID (for tasks)", + }, + "stack": { + Type: "string", + Description: "Filter by stack name (for services)", + }, + }, + Required: []string{"action"}, + }, + }, + Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { + return exec.executeDocker(ctx, args) + }, + }) +} + +// executeDocker routes to the appropriate docker handler based on action +func (e *PulseToolExecutor) executeDocker(ctx context.Context, args map[string]interface{}) (CallToolResult, error) { + action, _ := args["action"].(string) + switch action { + case "control": + return e.executeDockerControl(ctx, args) + case "updates": + // Uses existing function from tools_infrastructure.go + return e.executeListDockerUpdates(ctx, args) + case "check_updates": + // Uses existing function from tools_infrastructure.go + return e.executeCheckDockerUpdates(ctx, args) + case "update": + // Uses existing function from tools_infrastructure.go + return e.executeUpdateDockerContainer(ctx, args) + case "services": + // Uses existing function from tools_infrastructure.go + return e.executeListDockerServices(ctx, args) + case "tasks": + // Uses existing function from tools_infrastructure.go + return e.executeListDockerTasks(ctx, args) + case "swarm": + // Uses existing function from tools_infrastructure.go + return e.executeGetSwarmStatus(ctx, args) + default: + return NewErrorResult(fmt.Errorf("unknown action: %s. Use: control, updates, check_updates, update, services, tasks, swarm", action)), nil + } +} + +// executeDockerControl handles start/stop/restart of Docker containers +// This is a new consolidated handler that merges pulse_control_docker functionality +func (e *PulseToolExecutor) executeDockerControl(ctx context.Context, args map[string]interface{}) (CallToolResult, error) { + containerName, _ := args["container"].(string) + hostName, _ := args["host"].(string) + operation, _ := args["operation"].(string) + + if containerName == "" { + return NewErrorResult(fmt.Errorf("container name is required")), nil + } + if operation == "" { + return NewErrorResult(fmt.Errorf("operation is required (start, stop, restart)")), nil + } + + validOperations := map[string]bool{"start": true, "stop": true, "restart": true} + if !validOperations[operation] { + return NewErrorResult(fmt.Errorf("invalid operation: %s. Use start, stop, or restart", operation)), nil + } + + // Check if read-only mode + if e.controlLevel == ControlLevelReadOnly { + return NewTextResult("Docker control actions are not available in read-only mode."), nil + } + + // Check if this is a pre-approved execution + preApproved := isPreApproved(args) + + container, dockerHost, err := e.resolveDockerContainer(containerName, hostName) + if err != nil { + return NewTextResult(fmt.Sprintf("Could not find Docker container '%s': %v", containerName, err)), nil + } + + command := fmt.Sprintf("docker %s %s", operation, container.Name) + + // Get the agent hostname for approval records + agentHostname := e.getAgentHostnameForDockerHost(dockerHost) + + // Skip approval checks if pre-approved + if !preApproved && e.policy != nil { + decision := e.policy.Evaluate(command) + if decision == agentexec.PolicyBlock { + return NewTextResult(formatPolicyBlocked(command, "This command is blocked by security policy")), nil + } + if decision == agentexec.PolicyRequireApproval && !e.isAutonomous { + approvalID := createApprovalRecord(command, "docker", container.Name, agentHostname, fmt.Sprintf("%s Docker container %s", operation, container.Name)) + return NewTextResult(formatDockerApprovalNeeded(container.Name, dockerHost.Hostname, operation, command, approvalID)), nil + } + } + + // Check control level + if !preApproved && e.controlLevel == ControlLevelControlled { + approvalID := createApprovalRecord(command, "docker", container.Name, agentHostname, fmt.Sprintf("%s Docker container %s", operation, container.Name)) + return NewTextResult(formatDockerApprovalNeeded(container.Name, dockerHost.Hostname, operation, command, approvalID)), nil + } + + if e.agentServer == nil { + return NewErrorResult(fmt.Errorf("no agent server available")), nil + } + + // Resolve the Docker host to the correct agent and routing info (with full provenance) + routing := e.resolveDockerHostRoutingFull(dockerHost) + if routing.AgentID == "" { + if routing.TargetType == "container" || routing.TargetType == "vm" { + return NewTextResult(fmt.Sprintf("Docker host '%s' is a %s but no agent is available on its Proxmox host. Install Pulse Unified Agent on the Proxmox node.", dockerHost.Hostname, routing.TargetType)), nil + } + return NewTextResult(fmt.Sprintf("No agent available on Docker host '%s'. Install Pulse Unified Agent on the host to enable control.", dockerHost.Hostname)), nil + } + + log.Debug(). + Str("docker_host", dockerHost.Hostname). + Str("agent_id", routing.AgentID). + Str("agent_host", routing.AgentHostname). + Str("resolved_kind", routing.ResolvedKind). + Str("resolved_node", routing.ResolvedNode). + Str("transport", routing.Transport). + Str("target_type", routing.TargetType). + Str("target_id", routing.TargetID). + Msg("[pulse_docker] Routing docker command execution") + + result, err := e.agentServer.ExecuteCommand(ctx, routing.AgentID, agentexec.ExecuteCommandPayload{ + Command: command, + TargetType: routing.TargetType, + TargetID: routing.TargetID, + }) + if err != nil { + return NewErrorResult(err), nil + } + + output := result.Stdout + if result.Stderr != "" { + output += "\n" + result.Stderr + } + + if result.ExitCode == 0 { + return NewTextResult(fmt.Sprintf("Successfully executed 'docker %s' on container '%s' (host: %s). State updates in ~10s.\n%s", operation, container.Name, dockerHost.Hostname, output)), nil + } + + return NewTextResult(fmt.Sprintf("Command failed (exit code %d):\n%s", result.ExitCode, output)), nil +} diff --git a/internal/ai/tools/tools_file.go b/internal/ai/tools/tools_file.go new file mode 100644 index 000000000..75539ed55 --- /dev/null +++ b/internal/ai/tools/tools_file.go @@ -0,0 +1,551 @@ +package tools + +import ( + "context" + "encoding/base64" + "fmt" + "strings" + + "github.com/rcourtman/pulse-go-rewrite/internal/agentexec" + "github.com/rs/zerolog/log" +) + +// ExecutionProvenance tracks where a command actually executed. +// This makes it observable whether a command ran on the intended target +// or fell back to a different execution context. +type ExecutionProvenance struct { + // What the model requested + RequestedTargetHost string `json:"requested_target_host"` + + // What we resolved it to + ResolvedKind string `json:"resolved_kind"` // "host", "lxc", "vm", "docker" + ResolvedNode string `json:"resolved_node"` // Proxmox node name (if applicable) + ResolvedUID string `json:"resolved_uid"` // VMID or container ID + + // How we executed it + AgentHost string `json:"agent_host"` // Hostname of the agent that executed + Transport string `json:"transport"` // "direct", "pct_exec", "qm_guest_exec" +} + +// registerFileTools registers the file editing tool +func (e *PulseToolExecutor) registerFileTools() { + e.registry.Register(RegisteredTool{ + Definition: Tool{ + Name: "pulse_file_edit", + Description: `Read and edit files on remote hosts, LXC containers, VMs, and Docker containers safely. + +Actions: +- read: Read the contents of a file +- append: Append content to the end of a file +- write: Write/overwrite a file with new content (creates if doesn't exist) + +This tool handles escaping automatically - just provide the content as-is. +Use this instead of shell commands for editing config files (YAML, JSON, etc.) + +Routing: target_host can be a Proxmox host (delly), an LXC name (homepage-docker), or a VM name. Commands are automatically routed through the appropriate agent. + +Docker container support: Use docker_container to access files INSIDE a Docker container. The target_host specifies where Docker is running. + +Examples: +- Read from LXC: action="read", path="/opt/app/config.yaml", target_host="homepage-docker" +- Write to host: action="write", path="/tmp/test.txt", content="hello", target_host="delly" +- Read from Docker: action="read", path="/config/settings.json", target_host="tower", docker_container="jellyfin" +- Write to Docker: action="write", path="/tmp/test.txt", content="hello", target_host="tower", docker_container="nginx"`, + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]PropertySchema{ + "action": { + Type: "string", + Description: "File action: read, append, or write", + Enum: []string{"read", "append", "write"}, + }, + "path": { + Type: "string", + Description: "Absolute path to the file", + }, + "content": { + Type: "string", + Description: "Content to write or append (for append/write actions)", + }, + "target_host": { + Type: "string", + Description: "Hostname where the file exists (or where Docker is running)", + }, + "docker_container": { + Type: "string", + Description: "Docker container name (for files inside containers)", + }, + }, + Required: []string{"action", "path", "target_host"}, + }, + }, + Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { + return exec.executeFileEdit(ctx, args) + }, + RequireControl: true, + }) +} + +// executeFileEdit handles file read/write operations +func (e *PulseToolExecutor) executeFileEdit(ctx context.Context, args map[string]interface{}) (CallToolResult, error) { + action, _ := args["action"].(string) + path, _ := args["path"].(string) + content, _ := args["content"].(string) + targetHost, _ := args["target_host"].(string) + dockerContainer, _ := args["docker_container"].(string) + + if path == "" { + return NewErrorResult(fmt.Errorf("path is required")), nil + } + if targetHost == "" { + return NewErrorResult(fmt.Errorf("target_host is required")), nil + } + + // Validate path - must be absolute + if !strings.HasPrefix(path, "/") { + return NewErrorResult(fmt.Errorf("path must be absolute (start with /)")), nil + } + + // Validate docker_container if provided (simple alphanumeric + _ + -) + if dockerContainer != "" { + for _, c := range dockerContainer { + if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || c == '-' || c == '.') { + return NewErrorResult(fmt.Errorf("invalid character '%c' in docker_container name", c)), nil + } + } + } + + // Check control level + if e.controlLevel == ControlLevelReadOnly && action != "read" { + return NewTextResult("File editing is not available in read-only mode."), nil + } + + switch action { + case "read": + return e.executeFileRead(ctx, path, targetHost, dockerContainer) + case "append": + if content == "" { + return NewErrorResult(fmt.Errorf("content is required for append action")), nil + } + return e.executeFileAppend(ctx, path, content, targetHost, dockerContainer, args) + case "write": + if content == "" { + return NewErrorResult(fmt.Errorf("content is required for write action")), nil + } + return e.executeFileWrite(ctx, path, content, targetHost, dockerContainer, args) + default: + return NewErrorResult(fmt.Errorf("unknown action: %s. Use: read, append, write", action)), nil + } +} + +// executeFileRead reads a file's contents +func (e *PulseToolExecutor) executeFileRead(ctx context.Context, path, targetHost, dockerContainer string) (CallToolResult, error) { + if e.agentServer == nil { + return NewErrorResult(fmt.Errorf("no agent server available")), nil + } + + // Validate routing context - block if targeting a Proxmox host when child resources exist + // This prevents accidentally reading files from the host when user meant to read from an LXC/VM + routingResult := e.validateRoutingContext(targetHost) + if routingResult.IsBlocked() { + return NewToolResponseResult(routingResult.RoutingError.ToToolResponse()), nil + } + + // Use full routing resolution - includes provenance for debugging + routing := e.resolveTargetForCommandFull(targetHost) + if routing.AgentID == "" { + if routing.TargetType == "container" || routing.TargetType == "vm" { + return NewTextResult(fmt.Sprintf("'%s' is a %s but no agent is available on its Proxmox host. Install Pulse Unified Agent on the Proxmox node.", targetHost, routing.TargetType)), nil + } + return NewTextResult(fmt.Sprintf("No agent found for host '%s'. Check that the hostname is correct and an agent is connected.", targetHost)), nil + } + + var command string + if dockerContainer != "" { + // File is inside Docker container + command = fmt.Sprintf("docker exec %s cat %s", shellEscape(dockerContainer), shellEscape(path)) + } else { + // File is on host filesystem (existing behavior) + command = fmt.Sprintf("cat %s", shellEscape(path)) + } + + result, err := e.agentServer.ExecuteCommand(ctx, routing.AgentID, agentexec.ExecuteCommandPayload{ + Command: command, + TargetType: routing.TargetType, + TargetID: routing.TargetID, + }) + if err != nil { + return NewErrorResult(fmt.Errorf("failed to read file: %w", err)), nil + } + + if result.ExitCode != 0 { + errMsg := result.Stderr + if errMsg == "" { + errMsg = result.Stdout + } + if dockerContainer != "" { + return NewTextResult(fmt.Sprintf("Failed to read file from container '%s' (exit code %d): %s", dockerContainer, result.ExitCode, errMsg)), nil + } + return NewTextResult(fmt.Sprintf("Failed to read file (exit code %d): %s", result.ExitCode, errMsg)), nil + } + + response := map[string]interface{}{ + "success": true, + "path": path, + "content": result.Stdout, + "host": targetHost, + "size": len(result.Stdout), + } + if dockerContainer != "" { + response["docker_container"] = dockerContainer + } + // Include execution provenance for observability + response["execution"] = buildExecutionProvenance(targetHost, routing) + return NewJSONResult(response), nil +} + +// executeFileAppend appends content to a file +func (e *PulseToolExecutor) executeFileAppend(ctx context.Context, path, content, targetHost, dockerContainer string, args map[string]interface{}) (CallToolResult, error) { + if e.agentServer == nil { + return NewErrorResult(fmt.Errorf("no agent server available")), nil + } + + // Validate routing context - block if targeting a Proxmox host when child resources exist + // This prevents accidentally writing files to the host when user meant to write to an LXC/VM + routingResult := e.validateRoutingContext(targetHost) + if routingResult.IsBlocked() { + return NewToolResponseResult(routingResult.RoutingError.ToToolResponse()), nil + } + + // Validate resource is in resolved context (write operation) + // With PULSE_STRICT_RESOLUTION=true, this blocks execution on undiscovered resources + validation := e.validateResolvedResource(targetHost, "append", true) + if validation.IsBlocked() { + // Hard validation failure - return consistent error envelope + return NewToolResponseResult(validation.StrictError.ToToolResponse()), nil + } + // Soft validation warnings are logged inside validateResolvedResource + + // Use full routing resolution - includes provenance for debugging + routing := e.resolveTargetForCommandFull(targetHost) + if routing.AgentID == "" { + if routing.TargetType == "container" || routing.TargetType == "vm" { + return NewTextResult(fmt.Sprintf("'%s' is a %s but no agent is available on its Proxmox host. Install Pulse Unified Agent on the Proxmox node.", targetHost, routing.TargetType)), nil + } + return NewTextResult(fmt.Sprintf("No agent found for host '%s'. Check that the hostname is correct and an agent is connected.", targetHost)), nil + } + + // INVARIANT: If the target resolves to a child resource (LXC/VM), writes MUST execute + // inside that context via pct_exec/qm_guest_exec. No silent node fallback. + if err := e.validateWriteExecutionContext(targetHost, routing); err != nil { + return NewToolResponseResult(err.ToToolResponse()), nil + } + + // Check if pre-approved + preApproved := isPreApproved(args) + + // Skip approval checks if pre-approved or in autonomous mode + if !preApproved && !e.isAutonomous && e.controlLevel == ControlLevelControlled { + target := targetHost + if dockerContainer != "" { + target = fmt.Sprintf("%s (container: %s)", targetHost, dockerContainer) + } + approvalID := createApprovalRecord( + fmt.Sprintf("Append to file: %s", path), + "file", + path, + target, + fmt.Sprintf("Append %d bytes to %s", len(content), path), + ) + return NewTextResult(formatFileApprovalNeeded(path, target, "append", len(content), approvalID)), nil + } + + // Use base64 encoding to safely transfer content + encoded := base64.StdEncoding.EncodeToString([]byte(content)) + var command string + if dockerContainer != "" { + // Append inside Docker container - docker exec needs its own sh -c + command = fmt.Sprintf("docker exec %s sh -c 'echo %s | base64 -d >> %s'", + shellEscape(dockerContainer), encoded, shellEscape(path)) + } else { + // For host/LXC/VM targets - agent handles sh -c wrapping for LXC/VM + command = fmt.Sprintf("echo '%s' | base64 -d >> %s", encoded, shellEscape(path)) + } + + result, err := e.agentServer.ExecuteCommand(ctx, routing.AgentID, agentexec.ExecuteCommandPayload{ + Command: command, + TargetType: routing.TargetType, + TargetID: routing.TargetID, + }) + if err != nil { + return NewErrorResult(fmt.Errorf("failed to append to file: %w", err)), nil + } + + if result.ExitCode != 0 { + errMsg := result.Stderr + if errMsg == "" { + errMsg = result.Stdout + } + if dockerContainer != "" { + return NewTextResult(fmt.Sprintf("Failed to append to file in container '%s' (exit code %d): %s", dockerContainer, result.ExitCode, errMsg)), nil + } + return NewTextResult(fmt.Sprintf("Failed to append to file (exit code %d): %s", result.ExitCode, errMsg)), nil + } + + response := map[string]interface{}{ + "success": true, + "action": "append", + "path": path, + "host": targetHost, + "bytes_written": len(content), + } + if dockerContainer != "" { + response["docker_container"] = dockerContainer + } + // Include execution provenance for observability + response["execution"] = buildExecutionProvenance(targetHost, routing) + return NewJSONResult(response), nil +} + +// executeFileWrite writes content to a file (overwrites) +func (e *PulseToolExecutor) executeFileWrite(ctx context.Context, path, content, targetHost, dockerContainer string, args map[string]interface{}) (CallToolResult, error) { + if e.agentServer == nil { + return NewErrorResult(fmt.Errorf("no agent server available")), nil + } + + // Validate routing context - block if targeting a Proxmox host when child resources exist + // This prevents accidentally writing files to the host when user meant to write to an LXC/VM + routingResult := e.validateRoutingContext(targetHost) + if routingResult.IsBlocked() { + return NewToolResponseResult(routingResult.RoutingError.ToToolResponse()), nil + } + + // Validate resource is in resolved context (write operation) + // With PULSE_STRICT_RESOLUTION=true, this blocks execution on undiscovered resources + validation := e.validateResolvedResource(targetHost, "write", true) + if validation.IsBlocked() { + // Hard validation failure - return consistent error envelope + return NewToolResponseResult(validation.StrictError.ToToolResponse()), nil + } + // Soft validation warnings are logged inside validateResolvedResource + + // Use full routing resolution - includes provenance for debugging + routing := e.resolveTargetForCommandFull(targetHost) + if routing.AgentID == "" { + if routing.TargetType == "container" || routing.TargetType == "vm" { + return NewTextResult(fmt.Sprintf("'%s' is a %s but no agent is available on its Proxmox host. Install Pulse Unified Agent on the Proxmox node.", targetHost, routing.TargetType)), nil + } + return NewTextResult(fmt.Sprintf("No agent found for host '%s'. Check that the hostname is correct and an agent is connected.", targetHost)), nil + } + + // INVARIANT: If the target resolves to a child resource (LXC/VM), writes MUST execute + // inside that context via pct_exec/qm_guest_exec. No silent node fallback. + if err := e.validateWriteExecutionContext(targetHost, routing); err != nil { + return NewToolResponseResult(err.ToToolResponse()), nil + } + + // Check if pre-approved + preApproved := isPreApproved(args) + + // Skip approval checks if pre-approved or in autonomous mode + if !preApproved && !e.isAutonomous && e.controlLevel == ControlLevelControlled { + target := targetHost + if dockerContainer != "" { + target = fmt.Sprintf("%s (container: %s)", targetHost, dockerContainer) + } + approvalID := createApprovalRecord( + fmt.Sprintf("Write file: %s", path), + "file", + path, + target, + fmt.Sprintf("Write %d bytes to %s", len(content), path), + ) + return NewTextResult(formatFileApprovalNeeded(path, target, "write", len(content), approvalID)), nil + } + + // Use base64 encoding to safely transfer content + encoded := base64.StdEncoding.EncodeToString([]byte(content)) + var command string + if dockerContainer != "" { + // Write inside Docker container - docker exec needs its own sh -c + command = fmt.Sprintf("docker exec %s sh -c 'echo %s | base64 -d > %s'", + shellEscape(dockerContainer), encoded, shellEscape(path)) + } else { + // For host/LXC/VM targets - agent handles sh -c wrapping for LXC/VM + command = fmt.Sprintf("echo '%s' | base64 -d > %s", encoded, shellEscape(path)) + } + + result, err := e.agentServer.ExecuteCommand(ctx, routing.AgentID, agentexec.ExecuteCommandPayload{ + Command: command, + TargetType: routing.TargetType, + TargetID: routing.TargetID, + }) + if err != nil { + return NewErrorResult(fmt.Errorf("failed to write file: %w", err)), nil + } + + if result.ExitCode != 0 { + errMsg := result.Stderr + if errMsg == "" { + errMsg = result.Stdout + } + if dockerContainer != "" { + return NewTextResult(fmt.Sprintf("Failed to write file in container '%s' (exit code %d): %s", dockerContainer, result.ExitCode, errMsg)), nil + } + return NewTextResult(fmt.Sprintf("Failed to write file (exit code %d): %s", result.ExitCode, errMsg)), nil + } + + response := map[string]interface{}{ + "success": true, + "action": "write", + "path": path, + "host": targetHost, + "bytes_written": len(content), + } + if dockerContainer != "" { + response["docker_container"] = dockerContainer + } + // Include execution provenance for observability + response["execution"] = buildExecutionProvenance(targetHost, routing) + return NewJSONResult(response), nil +} + +// ErrExecutionContextUnavailable is returned when a write operation targets a child resource +// (LXC/VM) but the execution cannot be properly routed into that resource context. +// This prevents silent fallback to node-level execution, which would write files on the +// Proxmox host instead of inside the LXC/VM. +type ErrExecutionContextUnavailable struct { + TargetHost string // What the model requested + ResolvedKind string // What the state says it is (lxc, vm) + ResolvedNode string // Which Proxmox node it's on + Transport string // What transport we got (should be pct_exec but might be "direct") + Message string +} + +func (e *ErrExecutionContextUnavailable) Error() string { + return e.Message +} + +func (e *ErrExecutionContextUnavailable) ToToolResponse() ToolResponse { + return NewToolBlockedError("EXECUTION_CONTEXT_UNAVAILABLE", e.Message, map[string]interface{}{ + "target_host": e.TargetHost, + "resolved_kind": e.ResolvedKind, + "resolved_node": e.ResolvedNode, + "transport": e.Transport, + "auto_recoverable": false, + "recovery_hint": "Cannot write files to this target. The execution context (LXC/VM) is not reachable via pct exec/qm guest exec. Verify the agent is installed on the Proxmox node and the target is running.", + }) +} + +// validateWriteExecutionContext ensures write operations execute inside the correct context. +// +// INVARIANT: If state.ResolveResource says the target is an LXC/VM, writes MUST use +// pct_exec/qm_guest_exec to run inside that container. A "direct" transport on a child +// resource means we'd write to the Proxmox host's filesystem instead — which is always wrong. +// +// This catches the scenario where: +// 1. target_host="homepage-docker" (an LXC) +// 2. An agent on the node matches "homepage-docker" as a direct hostname +// 3. Command runs on the node without pct exec → writes to node filesystem +func (e *PulseToolExecutor) validateWriteExecutionContext(targetHost string, routing CommandRoutingResult) *ErrExecutionContextUnavailable { + if e.stateProvider == nil { + return nil // Can't validate without state + } + + state := e.stateProvider.GetState() + loc := state.ResolveResource(targetHost) + if !loc.Found { + return nil // Unknown resource, nothing to validate + } + + // Only validate for child resources (LXC/VM) + isChildResource := loc.ResourceType == "lxc" || loc.ResourceType == "vm" + if !isChildResource { + return nil + } + + // For child resources, the routing MUST use pct_exec or qm_guest_exec + // If it resolved as "direct" (host type), that means we'd execute on the node, not inside the LXC/VM + if routing.Transport == "direct" && routing.TargetType == "host" { + log.Warn(). + Str("target_host", targetHost). + Str("resolved_kind", loc.ResourceType). + Str("resolved_node", loc.Node). + Str("agent_hostname", routing.AgentHostname). + Str("transport", routing.Transport). + Msg("[FileWrite] BLOCKED: Write would execute on node, not inside child resource. " + + "Agent matched target hostname directly, but state says target is LXC/VM.") + + return &ErrExecutionContextUnavailable{ + TargetHost: targetHost, + ResolvedKind: loc.ResourceType, + ResolvedNode: loc.Node, + Transport: routing.Transport, + Message: fmt.Sprintf( + "'%s' is a %s on node '%s', but the write would execute on the Proxmox node instead of inside the %s. "+ + "This happens when an agent matches the hostname directly instead of routing via pct exec. "+ + "The file would be written to the node's filesystem, not the %s's filesystem.", + targetHost, loc.ResourceType, loc.Node, loc.ResourceType, loc.ResourceType), + } + } + + // Also validate: if resolved as LXC but no agent found for the node + if routing.AgentID == "" { + return &ErrExecutionContextUnavailable{ + TargetHost: targetHost, + ResolvedKind: loc.ResourceType, + ResolvedNode: loc.Node, + Transport: "none", + Message: fmt.Sprintf( + "'%s' is a %s on node '%s', but no agent is available on that Proxmox node. "+ + "Install the Pulse Unified Agent on '%s' to enable file operations inside the %s.", + targetHost, loc.ResourceType, loc.Node, loc.Node, loc.ResourceType), + } + } + + return nil +} + +// buildExecutionProvenance creates provenance metadata for tool responses. +// This makes it observable WHERE a command actually executed. +func buildExecutionProvenance(targetHost string, routing CommandRoutingResult) map[string]interface{} { + return map[string]interface{}{ + "requested_target_host": targetHost, + "resolved_kind": routing.ResolvedKind, + "resolved_node": routing.ResolvedNode, + "agent_host": routing.AgentHostname, + "transport": routing.Transport, + "target_type": routing.TargetType, + "target_id": routing.TargetID, + } +} + +// findAgentByHostname finds an agent ID by hostname +func (e *PulseToolExecutor) findAgentByHostname(hostname string) string { + if e.agentServer == nil { + return "" + } + + agents := e.agentServer.GetConnectedAgents() + hostnameLower := strings.ToLower(hostname) + + for _, agent := range agents { + // Match by hostname (case-insensitive) or by agentID (case-sensitive) + if strings.ToLower(agent.Hostname) == hostnameLower || agent.AgentID == hostname { + return agent.AgentID + } + } + return "" +} + +// shellEscape escapes a string for safe use in shell commands +func shellEscape(s string) string { + // Use single quotes and escape any existing single quotes + return "'" + strings.ReplaceAll(s, "'", "'\"'\"'") + "'" +} + +// formatFileApprovalNeeded formats an approval-required response for file operations +func formatFileApprovalNeeded(path, host, action string, size int, approvalID string) string { + return fmt.Sprintf(`APPROVAL_REQUIRED: {"type":"approval_required","approval_id":"%s","action":"file_%s","path":"%s","host":"%s","size":%d,"message":"File %s operation requires approval"}`, + approvalID, action, path, host, size, action) +} diff --git a/internal/ai/tools/tools_infrastructure.go b/internal/ai/tools/tools_infrastructure.go index 5ea089b75..805af2cc9 100644 --- a/internal/ai/tools/tools_infrastructure.go +++ b/internal/ai/tools/tools_infrastructure.go @@ -63,6 +63,35 @@ func (e *PulseToolExecutor) registerInfrastructureTools() { }, }) + e.registry.Register(RegisteredTool{ + Definition: Tool{ + Name: "pulse_get_storage_config", + Description: `Get Proxmox storage configuration (cluster storage.cfg), including nodes, path, and enabled/active flags. + +Use when: You need to confirm if a storage pool is configured for specific nodes or if it is disabled.`, + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]PropertySchema{ + "storage_id": { + Type: "string", + Description: "Optional: filter by storage ID (e.g. 'local-lvm')", + }, + "instance": { + Type: "string", + Description: "Optional: filter by Proxmox instance or cluster name", + }, + "node": { + Type: "string", + Description: "Optional: filter to storages that include this node", + }, + }, + }, + }, + Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { + return exec.executeGetStorageConfig(ctx, args) + }, + }) + e.registry.Register(RegisteredTool{ Definition: Tool{ Name: "pulse_get_disk_health", @@ -814,8 +843,14 @@ func (e *PulseToolExecutor) executeListStorage(_ context.Context, args map[strin pool := StoragePoolSummary{ ID: s.ID, Name: s.Name, + Node: s.Node, + Instance: s.Instance, + Nodes: s.Nodes, Type: s.Type, Status: s.Status, + Enabled: s.Enabled, + Active: s.Active, + Path: s.Path, UsagePercent: s.Usage * 100, UsedGB: float64(s.Used) / (1024 * 1024 * 1024), TotalGB: float64(s.Total) / (1024 * 1024 * 1024), @@ -867,6 +902,54 @@ func (e *PulseToolExecutor) executeListStorage(_ context.Context, args map[strin return NewJSONResult(response), nil } +func (e *PulseToolExecutor) executeGetStorageConfig(_ context.Context, args map[string]interface{}) (CallToolResult, error) { + storageID, _ := args["storage_id"].(string) + instance, _ := args["instance"].(string) + node, _ := args["node"].(string) + + storageID = strings.TrimSpace(storageID) + instance = strings.TrimSpace(instance) + node = strings.TrimSpace(node) + + if e.storageConfigProvider == nil { + return NewTextResult("Storage configuration not available."), nil + } + + configs, err := e.storageConfigProvider.GetStorageConfig(instance) + if err != nil { + return NewErrorResult(err), nil + } + + response := StorageConfigResponse{} + for _, cfg := range configs { + if storageID != "" && !strings.EqualFold(cfg.ID, storageID) && !strings.EqualFold(cfg.Name, storageID) { + continue + } + if instance != "" && !strings.EqualFold(cfg.Instance, instance) { + continue + } + if node != "" && !storageConfigHasNode(cfg.Nodes, node) { + continue + } + response.Storages = append(response.Storages, cfg) + } + + if response.Storages == nil { + response.Storages = []StorageConfigSummary{} + } + + return NewJSONResult(response), nil +} + +func storageConfigHasNode(nodes []string, node string) bool { + for _, n := range nodes { + if strings.EqualFold(strings.TrimSpace(n), node) { + return true + } + } + return false +} + func (e *PulseToolExecutor) executeGetDiskHealth(_ context.Context, _ map[string]interface{}) (CallToolResult, error) { if e.diskHealthProvider == nil && e.storageProvider == nil { return NewTextResult("Disk health information not available."), nil diff --git a/internal/ai/tools/tools_knowledge.go b/internal/ai/tools/tools_knowledge.go new file mode 100644 index 000000000..1aac6ec7f --- /dev/null +++ b/internal/ai/tools/tools_knowledge.go @@ -0,0 +1,96 @@ +package tools + +import ( + "context" + "fmt" +) + +// registerKnowledgeTools registers the consolidated pulse_knowledge tool +func (e *PulseToolExecutor) registerKnowledgeTools() { + e.registry.Register(RegisteredTool{ + Definition: Tool{ + Name: "pulse_knowledge", + Description: `Manage AI knowledge, notes, and incident analysis. + +Actions: +- remember: Save a note about a resource for future reference +- recall: Retrieve saved notes about a resource +- incidents: Get high-resolution incident recording data +- correlate: Get correlated events around a timestamp +- relationships: Get resource dependency graph + +Examples: +- Save note: action="remember", resource_id="101", note="Production database server", category="purpose" +- Recall: action="recall", resource_id="101" +- Get incidents: action="incidents", resource_id="101" +- Correlate events: action="correlate", resource_id="101", window_minutes=30 +- Get relationships: action="relationships", resource_id="101"`, + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]PropertySchema{ + "action": { + Type: "string", + Description: "Knowledge action to perform", + Enum: []string{"remember", "recall", "incidents", "correlate", "relationships"}, + }, + "resource_id": { + Type: "string", + Description: "Resource ID to operate on", + }, + "note": { + Type: "string", + Description: "For remember: the note to save", + }, + "category": { + Type: "string", + Description: "For remember/recall: note category (purpose, owner, maintenance, issue)", + }, + "window_id": { + Type: "string", + Description: "For incidents: specific incident window ID", + }, + "timestamp": { + Type: "string", + Description: "For correlate: ISO timestamp to center search around (default: now)", + }, + "window_minutes": { + Type: "integer", + Description: "For correlate: time window in minutes (default: 15)", + }, + "depth": { + Type: "integer", + Description: "For relationships: levels to traverse (default: 1, max: 3)", + }, + "limit": { + Type: "integer", + Description: "For incidents: max windows to return (default: 5)", + }, + }, + Required: []string{"action", "resource_id"}, + }, + }, + Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { + return exec.executeKnowledge(ctx, args) + }, + }) +} + +// executeKnowledge routes to the appropriate knowledge handler based on action +// Handler functions are implemented in tools_intelligence.go +func (e *PulseToolExecutor) executeKnowledge(ctx context.Context, args map[string]interface{}) (CallToolResult, error) { + action, _ := args["action"].(string) + switch action { + case "remember": + return e.executeRemember(ctx, args) + case "recall": + return e.executeRecall(ctx, args) + case "incidents": + return e.executeGetIncidentWindow(ctx, args) + case "correlate": + return e.executeCorrelateEvents(ctx, args) + case "relationships": + return e.executeGetRelationshipGraph(ctx, args) + default: + return NewErrorResult(fmt.Errorf("unknown action: %s. Use: remember, recall, incidents, correlate, relationships", action)), nil + } +} diff --git a/internal/ai/tools/tools_kubernetes.go b/internal/ai/tools/tools_kubernetes.go new file mode 100644 index 000000000..dbd037707 --- /dev/null +++ b/internal/ai/tools/tools_kubernetes.go @@ -0,0 +1,861 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + "strings" + + "github.com/rcourtman/pulse-go-rewrite/internal/agentexec" + "github.com/rcourtman/pulse-go-rewrite/internal/models" +) + +// registerKubernetesTools registers the consolidated pulse_kubernetes tool +func (e *PulseToolExecutor) registerKubernetesTools() { + e.registry.Register(RegisteredTool{ + Definition: Tool{ + Name: "pulse_kubernetes", + Description: `Query and control Kubernetes clusters, nodes, pods, and deployments. + +Query types: +- clusters: List all Kubernetes clusters with health summary +- nodes: List nodes in a cluster with capacity and status +- pods: List pods with optional namespace/status filters +- deployments: List deployments with replica status + +Control types (require control permission): +- scale: Scale a deployment (set replicas) +- restart: Restart a deployment (rollout restart) +- delete_pod: Delete a pod +- exec: Execute a command inside a pod +- logs: Get pod logs + +Examples: +- List clusters: type="clusters" +- Get pods: type="pods", cluster="production", namespace="default" +- Scale deployment: type="scale", cluster="production", deployment="nginx", namespace="default", replicas=3 +- Restart deployment: type="restart", cluster="production", deployment="nginx", namespace="default" +- Delete pod: type="delete_pod", cluster="production", pod="nginx-abc123", namespace="default" +- Exec in pod: type="exec", cluster="production", pod="nginx-abc123", namespace="default", command="cat /etc/nginx/nginx.conf" +- Get logs: type="logs", cluster="production", pod="nginx-abc123", namespace="default", lines=100`, + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]PropertySchema{ + "type": { + Type: "string", + Description: "Operation type", + Enum: []string{"clusters", "nodes", "pods", "deployments", "scale", "restart", "delete_pod", "exec", "logs"}, + }, + "cluster": { + Type: "string", + Description: "Cluster name or ID", + }, + "namespace": { + Type: "string", + Description: "Kubernetes namespace (default: 'default')", + }, + "deployment": { + Type: "string", + Description: "Deployment name (for scale, restart)", + }, + "pod": { + Type: "string", + Description: "Pod name (for delete_pod, exec, logs)", + }, + "container": { + Type: "string", + Description: "Container name (for exec, logs - uses first container if omitted)", + }, + "command": { + Type: "string", + Description: "Command to execute (for exec)", + }, + "replicas": { + Type: "integer", + Description: "Desired replica count (for scale)", + }, + "lines": { + Type: "integer", + Description: "Number of log lines to return (for logs, default: 100)", + }, + "status": { + Type: "string", + Description: "Filter by pod phase: Running, Pending, Failed, Succeeded (for pods)", + }, + "limit": { + Type: "integer", + Description: "Maximum number of results (default: 100)", + }, + "offset": { + Type: "integer", + Description: "Number of results to skip", + }, + }, + Required: []string{"type"}, + }, + }, + Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { + return exec.executeKubernetes(ctx, args) + }, + }) +} + +// executeKubernetes routes to the appropriate kubernetes handler based on type +func (e *PulseToolExecutor) executeKubernetes(ctx context.Context, args map[string]interface{}) (CallToolResult, error) { + resourceType, _ := args["type"].(string) + switch resourceType { + case "clusters": + return e.executeGetKubernetesClusters(ctx) + case "nodes": + return e.executeGetKubernetesNodes(ctx, args) + case "pods": + return e.executeGetKubernetesPods(ctx, args) + case "deployments": + return e.executeGetKubernetesDeployments(ctx, args) + // Control operations + case "scale": + return e.executeKubernetesScale(ctx, args) + case "restart": + return e.executeKubernetesRestart(ctx, args) + case "delete_pod": + return e.executeKubernetesDeletePod(ctx, args) + case "exec": + return e.executeKubernetesExec(ctx, args) + case "logs": + return e.executeKubernetesLogs(ctx, args) + default: + return NewErrorResult(fmt.Errorf("unknown type: %s. Use: clusters, nodes, pods, deployments, scale, restart, delete_pod, exec, logs", resourceType)), nil + } +} + +func (e *PulseToolExecutor) executeGetKubernetesClusters(_ context.Context) (CallToolResult, error) { + if e.stateProvider == nil { + return NewTextResult("State provider not available."), nil + } + + state := e.stateProvider.GetState() + + if len(state.KubernetesClusters) == 0 { + return NewTextResult("No Kubernetes clusters found. Kubernetes monitoring may not be configured."), nil + } + + var clusters []KubernetesClusterSummary + for _, c := range state.KubernetesClusters { + readyNodes := 0 + for _, node := range c.Nodes { + if node.Ready { + readyNodes++ + } + } + + displayName := c.DisplayName + if c.CustomDisplayName != "" { + displayName = c.CustomDisplayName + } + + clusters = append(clusters, KubernetesClusterSummary{ + ID: c.ID, + Name: c.Name, + DisplayName: displayName, + Server: c.Server, + Version: c.Version, + Status: c.Status, + NodeCount: len(c.Nodes), + PodCount: len(c.Pods), + DeploymentCount: len(c.Deployments), + ReadyNodes: readyNodes, + }) + } + + response := KubernetesClustersResponse{ + Clusters: clusters, + Total: len(clusters), + } + + return NewJSONResult(response), nil +} + +func (e *PulseToolExecutor) executeGetKubernetesNodes(_ context.Context, args map[string]interface{}) (CallToolResult, error) { + if e.stateProvider == nil { + return NewTextResult("State provider not available."), nil + } + + clusterArg, _ := args["cluster"].(string) + if clusterArg == "" { + return NewErrorResult(fmt.Errorf("cluster is required")), nil + } + + state := e.stateProvider.GetState() + + // Find the cluster (also match CustomDisplayName) + var cluster *KubernetesClusterSummary + for _, c := range state.KubernetesClusters { + if c.ID == clusterArg || c.Name == clusterArg || c.DisplayName == clusterArg || c.CustomDisplayName == clusterArg { + displayName := c.DisplayName + if c.CustomDisplayName != "" { + displayName = c.CustomDisplayName + } + cluster = &KubernetesClusterSummary{ + ID: c.ID, + Name: c.Name, + DisplayName: displayName, + } + + var nodes []KubernetesNodeSummary + for _, node := range c.Nodes { + nodes = append(nodes, KubernetesNodeSummary{ + UID: node.UID, + Name: node.Name, + Ready: node.Ready, + Unschedulable: node.Unschedulable, + Roles: node.Roles, + KubeletVersion: node.KubeletVersion, + ContainerRuntimeVersion: node.ContainerRuntimeVersion, + OSImage: node.OSImage, + Architecture: node.Architecture, + CapacityCPU: node.CapacityCPU, + CapacityMemoryBytes: node.CapacityMemoryBytes, + CapacityPods: node.CapacityPods, + AllocatableCPU: node.AllocCPU, + AllocatableMemoryBytes: node.AllocMemoryBytes, + AllocatablePods: node.AllocPods, + }) + } + + response := KubernetesNodesResponse{ + Cluster: cluster.DisplayName, + Nodes: nodes, + Total: len(nodes), + } + if response.Nodes == nil { + response.Nodes = []KubernetesNodeSummary{} + } + return NewJSONResult(response), nil + } + } + + return NewTextResult(fmt.Sprintf("Kubernetes cluster '%s' not found.", clusterArg)), nil +} + +func (e *PulseToolExecutor) executeGetKubernetesPods(_ context.Context, args map[string]interface{}) (CallToolResult, error) { + if e.stateProvider == nil { + return NewTextResult("State provider not available."), nil + } + + clusterArg, _ := args["cluster"].(string) + if clusterArg == "" { + return NewErrorResult(fmt.Errorf("cluster is required")), nil + } + + namespaceFilter, _ := args["namespace"].(string) + statusFilter, _ := args["status"].(string) + limit := intArg(args, "limit", 100) + offset := intArg(args, "offset", 0) + + state := e.stateProvider.GetState() + + // Find the cluster (also match CustomDisplayName) + for _, c := range state.KubernetesClusters { + if c.ID == clusterArg || c.Name == clusterArg || c.DisplayName == clusterArg || c.CustomDisplayName == clusterArg { + displayName := c.DisplayName + if c.CustomDisplayName != "" { + displayName = c.CustomDisplayName + } + + var pods []KubernetesPodSummary + totalPods := 0 + filteredCount := 0 + + for _, pod := range c.Pods { + // Apply filters + if namespaceFilter != "" && pod.Namespace != namespaceFilter { + continue + } + if statusFilter != "" && !strings.EqualFold(pod.Phase, statusFilter) { + continue + } + + filteredCount++ + + // Apply pagination + if totalPods < offset { + totalPods++ + continue + } + if len(pods) >= limit { + totalPods++ + continue + } + + var containers []KubernetesPodContainerSummary + for _, container := range pod.Containers { + containers = append(containers, KubernetesPodContainerSummary{ + Name: container.Name, + Ready: container.Ready, + State: container.State, + RestartCount: container.RestartCount, + Reason: container.Reason, + }) + } + + pods = append(pods, KubernetesPodSummary{ + UID: pod.UID, + Name: pod.Name, + Namespace: pod.Namespace, + NodeName: pod.NodeName, + Phase: pod.Phase, + Reason: pod.Reason, + Restarts: pod.Restarts, + QoSClass: pod.QoSClass, + OwnerKind: pod.OwnerKind, + OwnerName: pod.OwnerName, + Containers: containers, + }) + totalPods++ + } + + response := KubernetesPodsResponse{ + Cluster: displayName, + Pods: pods, + Total: len(c.Pods), + Filtered: filteredCount, + } + if response.Pods == nil { + response.Pods = []KubernetesPodSummary{} + } + return NewJSONResult(response), nil + } + } + + return NewTextResult(fmt.Sprintf("Kubernetes cluster '%s' not found.", clusterArg)), nil +} + +func (e *PulseToolExecutor) executeGetKubernetesDeployments(_ context.Context, args map[string]interface{}) (CallToolResult, error) { + if e.stateProvider == nil { + return NewTextResult("State provider not available."), nil + } + + clusterArg, _ := args["cluster"].(string) + if clusterArg == "" { + return NewErrorResult(fmt.Errorf("cluster is required")), nil + } + + namespaceFilter, _ := args["namespace"].(string) + limit := intArg(args, "limit", 100) + offset := intArg(args, "offset", 0) + + state := e.stateProvider.GetState() + + // Find the cluster (also match CustomDisplayName) + for _, c := range state.KubernetesClusters { + if c.ID == clusterArg || c.Name == clusterArg || c.DisplayName == clusterArg || c.CustomDisplayName == clusterArg { + displayName := c.DisplayName + if c.CustomDisplayName != "" { + displayName = c.CustomDisplayName + } + + var deployments []KubernetesDeploymentSummary + filteredCount := 0 + count := 0 + + for _, dep := range c.Deployments { + // Apply namespace filter + if namespaceFilter != "" && dep.Namespace != namespaceFilter { + continue + } + + filteredCount++ + + // Apply pagination + if count < offset { + count++ + continue + } + if len(deployments) >= limit { + count++ + continue + } + + deployments = append(deployments, KubernetesDeploymentSummary{ + UID: dep.UID, + Name: dep.Name, + Namespace: dep.Namespace, + DesiredReplicas: dep.DesiredReplicas, + ReadyReplicas: dep.ReadyReplicas, + AvailableReplicas: dep.AvailableReplicas, + UpdatedReplicas: dep.UpdatedReplicas, + }) + count++ + } + + response := KubernetesDeploymentsResponse{ + Cluster: displayName, + Deployments: deployments, + Total: len(c.Deployments), + Filtered: filteredCount, + } + if response.Deployments == nil { + response.Deployments = []KubernetesDeploymentSummary{} + } + return NewJSONResult(response), nil + } + } + + return NewTextResult(fmt.Sprintf("Kubernetes cluster '%s' not found.", clusterArg)), nil +} + +// ========== Kubernetes Control Operations ========== + +// findAgentForKubernetesCluster finds the agent for a Kubernetes cluster +func (e *PulseToolExecutor) findAgentForKubernetesCluster(clusterArg string) (string, *models.KubernetesCluster, error) { + if e.stateProvider == nil { + return "", nil, fmt.Errorf("state provider not available") + } + + state := e.stateProvider.GetState() + + for i := range state.KubernetesClusters { + c := &state.KubernetesClusters[i] + if c.ID == clusterArg || c.Name == clusterArg || c.DisplayName == clusterArg || c.CustomDisplayName == clusterArg { + if c.AgentID == "" { + return "", nil, fmt.Errorf("cluster '%s' has no agent configured - kubectl commands cannot be executed", clusterArg) + } + return c.AgentID, c, nil + } + } + return "", nil, fmt.Errorf("kubernetes cluster '%s' not found", clusterArg) +} + +// validateKubernetesResourceID validates a Kubernetes resource identifier (namespace, pod, deployment, container names) +func validateKubernetesResourceID(value string) error { + if value == "" { + return fmt.Errorf("value cannot be empty") + } + // Kubernetes resource names must be valid DNS subdomains: lowercase, alphanumeric, '-' and '.' + // Max 253 characters + if len(value) > 253 { + return fmt.Errorf("value too long (max 253 characters)") + } + for _, c := range value { + if !((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '-' || c == '.') { + return fmt.Errorf("invalid character '%c' in resource name", c) + } + } + return nil +} + +// executeKubernetesScale scales a deployment +func (e *PulseToolExecutor) executeKubernetesScale(ctx context.Context, args map[string]interface{}) (CallToolResult, error) { + clusterArg, _ := args["cluster"].(string) + namespace, _ := args["namespace"].(string) + deployment, _ := args["deployment"].(string) + replicas := intArg(args, "replicas", -1) + + if clusterArg == "" { + return NewErrorResult(fmt.Errorf("cluster is required")), nil + } + if deployment == "" { + return NewErrorResult(fmt.Errorf("deployment is required")), nil + } + if replicas < 0 { + return NewErrorResult(fmt.Errorf("replicas is required and must be >= 0")), nil + } + if namespace == "" { + namespace = "default" + } + + // Validate identifiers + if err := validateKubernetesResourceID(namespace); err != nil { + return NewErrorResult(fmt.Errorf("invalid namespace: %w", err)), nil + } + if err := validateKubernetesResourceID(deployment); err != nil { + return NewErrorResult(fmt.Errorf("invalid deployment: %w", err)), nil + } + + // Check control level + if e.controlLevel == ControlLevelReadOnly { + return NewTextResult("Kubernetes control operations are not available in read-only mode."), nil + } + + agentID, cluster, err := e.findAgentForKubernetesCluster(clusterArg) + if err != nil { + return NewTextResult(err.Error()), nil + } + + // Check if pre-approved + preApproved := isPreApproved(args) + + // Build command + command := fmt.Sprintf("kubectl -n %s scale deployment %s --replicas=%d", namespace, deployment, replicas) + + // Request approval if needed + if !preApproved && !e.isAutonomous && e.controlLevel == ControlLevelControlled { + displayName := cluster.DisplayName + if cluster.CustomDisplayName != "" { + displayName = cluster.CustomDisplayName + } + approvalID := createApprovalRecord(command, "kubernetes", deployment, displayName, fmt.Sprintf("Scale deployment %s to %d replicas", deployment, replicas)) + return NewTextResult(formatKubernetesApprovalNeeded("scale", deployment, namespace, displayName, command, approvalID)), nil + } + + if e.agentServer == nil { + return NewErrorResult(fmt.Errorf("no agent server available")), nil + } + + result, err := e.agentServer.ExecuteCommand(ctx, agentID, agentexec.ExecuteCommandPayload{ + Command: command, + TargetType: "host", + TargetID: "", + }) + if err != nil { + return NewErrorResult(fmt.Errorf("failed to execute kubectl: %w", err)), nil + } + + output := result.Stdout + if result.Stderr != "" { + output += "\n" + result.Stderr + } + + if result.ExitCode == 0 { + return NewTextResult(fmt.Sprintf("✓ Successfully scaled deployment '%s' to %d replicas in namespace '%s'. Action complete - no verification needed.\n%s", deployment, replicas, namespace, output)), nil + } + + return NewTextResult(fmt.Sprintf("kubectl command failed (exit code %d):\n%s", result.ExitCode, output)), nil +} + +// executeKubernetesRestart restarts a deployment via rollout restart +func (e *PulseToolExecutor) executeKubernetesRestart(ctx context.Context, args map[string]interface{}) (CallToolResult, error) { + clusterArg, _ := args["cluster"].(string) + namespace, _ := args["namespace"].(string) + deployment, _ := args["deployment"].(string) + + if clusterArg == "" { + return NewErrorResult(fmt.Errorf("cluster is required")), nil + } + if deployment == "" { + return NewErrorResult(fmt.Errorf("deployment is required")), nil + } + if namespace == "" { + namespace = "default" + } + + // Validate identifiers + if err := validateKubernetesResourceID(namespace); err != nil { + return NewErrorResult(fmt.Errorf("invalid namespace: %w", err)), nil + } + if err := validateKubernetesResourceID(deployment); err != nil { + return NewErrorResult(fmt.Errorf("invalid deployment: %w", err)), nil + } + + // Check control level + if e.controlLevel == ControlLevelReadOnly { + return NewTextResult("Kubernetes control operations are not available in read-only mode."), nil + } + + agentID, cluster, err := e.findAgentForKubernetesCluster(clusterArg) + if err != nil { + return NewTextResult(err.Error()), nil + } + + // Check if pre-approved + preApproved := isPreApproved(args) + + // Build command + command := fmt.Sprintf("kubectl -n %s rollout restart deployment/%s", namespace, deployment) + + // Request approval if needed + if !preApproved && !e.isAutonomous && e.controlLevel == ControlLevelControlled { + displayName := cluster.DisplayName + if cluster.CustomDisplayName != "" { + displayName = cluster.CustomDisplayName + } + approvalID := createApprovalRecord(command, "kubernetes", deployment, displayName, fmt.Sprintf("Restart deployment %s", deployment)) + return NewTextResult(formatKubernetesApprovalNeeded("restart", deployment, namespace, displayName, command, approvalID)), nil + } + + if e.agentServer == nil { + return NewErrorResult(fmt.Errorf("no agent server available")), nil + } + + result, err := e.agentServer.ExecuteCommand(ctx, agentID, agentexec.ExecuteCommandPayload{ + Command: command, + TargetType: "host", + TargetID: "", + }) + if err != nil { + return NewErrorResult(fmt.Errorf("failed to execute kubectl: %w", err)), nil + } + + output := result.Stdout + if result.Stderr != "" { + output += "\n" + result.Stderr + } + + if result.ExitCode == 0 { + return NewTextResult(fmt.Sprintf("✓ Successfully initiated rollout restart for deployment '%s' in namespace '%s'. Action complete - pods will restart gradually.\n%s", deployment, namespace, output)), nil + } + + return NewTextResult(fmt.Sprintf("kubectl command failed (exit code %d):\n%s", result.ExitCode, output)), nil +} + +// executeKubernetesDeletePod deletes a pod +func (e *PulseToolExecutor) executeKubernetesDeletePod(ctx context.Context, args map[string]interface{}) (CallToolResult, error) { + clusterArg, _ := args["cluster"].(string) + namespace, _ := args["namespace"].(string) + pod, _ := args["pod"].(string) + + if clusterArg == "" { + return NewErrorResult(fmt.Errorf("cluster is required")), nil + } + if pod == "" { + return NewErrorResult(fmt.Errorf("pod is required")), nil + } + if namespace == "" { + namespace = "default" + } + + // Validate identifiers + if err := validateKubernetesResourceID(namespace); err != nil { + return NewErrorResult(fmt.Errorf("invalid namespace: %w", err)), nil + } + if err := validateKubernetesResourceID(pod); err != nil { + return NewErrorResult(fmt.Errorf("invalid pod: %w", err)), nil + } + + // Check control level + if e.controlLevel == ControlLevelReadOnly { + return NewTextResult("Kubernetes control operations are not available in read-only mode."), nil + } + + agentID, cluster, err := e.findAgentForKubernetesCluster(clusterArg) + if err != nil { + return NewTextResult(err.Error()), nil + } + + // Check if pre-approved + preApproved := isPreApproved(args) + + // Build command + command := fmt.Sprintf("kubectl -n %s delete pod %s", namespace, pod) + + // Request approval if needed + if !preApproved && !e.isAutonomous && e.controlLevel == ControlLevelControlled { + displayName := cluster.DisplayName + if cluster.CustomDisplayName != "" { + displayName = cluster.CustomDisplayName + } + approvalID := createApprovalRecord(command, "kubernetes", pod, displayName, fmt.Sprintf("Delete pod %s", pod)) + return NewTextResult(formatKubernetesApprovalNeeded("delete_pod", pod, namespace, displayName, command, approvalID)), nil + } + + if e.agentServer == nil { + return NewErrorResult(fmt.Errorf("no agent server available")), nil + } + + result, err := e.agentServer.ExecuteCommand(ctx, agentID, agentexec.ExecuteCommandPayload{ + Command: command, + TargetType: "host", + TargetID: "", + }) + if err != nil { + return NewErrorResult(fmt.Errorf("failed to execute kubectl: %w", err)), nil + } + + output := result.Stdout + if result.Stderr != "" { + output += "\n" + result.Stderr + } + + if result.ExitCode == 0 { + return NewTextResult(fmt.Sprintf("✓ Successfully deleted pod '%s' in namespace '%s'. If managed by a controller, a new pod will be created.\n%s", pod, namespace, output)), nil + } + + return NewTextResult(fmt.Sprintf("kubectl command failed (exit code %d):\n%s", result.ExitCode, output)), nil +} + +// executeKubernetesExec executes a command inside a pod +func (e *PulseToolExecutor) executeKubernetesExec(ctx context.Context, args map[string]interface{}) (CallToolResult, error) { + clusterArg, _ := args["cluster"].(string) + namespace, _ := args["namespace"].(string) + pod, _ := args["pod"].(string) + container, _ := args["container"].(string) + command, _ := args["command"].(string) + + if clusterArg == "" { + return NewErrorResult(fmt.Errorf("cluster is required")), nil + } + if pod == "" { + return NewErrorResult(fmt.Errorf("pod is required")), nil + } + if command == "" { + return NewErrorResult(fmt.Errorf("command is required")), nil + } + if namespace == "" { + namespace = "default" + } + + // Validate identifiers + if err := validateKubernetesResourceID(namespace); err != nil { + return NewErrorResult(fmt.Errorf("invalid namespace: %w", err)), nil + } + if err := validateKubernetesResourceID(pod); err != nil { + return NewErrorResult(fmt.Errorf("invalid pod: %w", err)), nil + } + if container != "" { + if err := validateKubernetesResourceID(container); err != nil { + return NewErrorResult(fmt.Errorf("invalid container: %w", err)), nil + } + } + + // Check control level + if e.controlLevel == ControlLevelReadOnly { + return NewTextResult("Kubernetes control operations are not available in read-only mode."), nil + } + + agentID, cluster, err := e.findAgentForKubernetesCluster(clusterArg) + if err != nil { + return NewTextResult(err.Error()), nil + } + + // Check if pre-approved + preApproved := isPreApproved(args) + + // Build kubectl command + var kubectlCmd string + if container != "" { + kubectlCmd = fmt.Sprintf("kubectl -n %s exec %s -c %s -- %s", namespace, pod, container, command) + } else { + kubectlCmd = fmt.Sprintf("kubectl -n %s exec %s -- %s", namespace, pod, command) + } + + // Request approval if needed + if !preApproved && !e.isAutonomous && e.controlLevel == ControlLevelControlled { + displayName := cluster.DisplayName + if cluster.CustomDisplayName != "" { + displayName = cluster.CustomDisplayName + } + approvalID := createApprovalRecord(kubectlCmd, "kubernetes", pod, displayName, fmt.Sprintf("Execute command in pod %s", pod)) + return NewTextResult(formatKubernetesApprovalNeeded("exec", pod, namespace, displayName, kubectlCmd, approvalID)), nil + } + + if e.agentServer == nil { + return NewErrorResult(fmt.Errorf("no agent server available")), nil + } + + result, err := e.agentServer.ExecuteCommand(ctx, agentID, agentexec.ExecuteCommandPayload{ + Command: kubectlCmd, + TargetType: "host", + TargetID: "", + }) + if err != nil { + return NewErrorResult(fmt.Errorf("failed to execute kubectl: %w", err)), nil + } + + output := result.Stdout + if result.Stderr != "" { + output += "\n" + result.Stderr + } + + // Always show output explicitly to prevent LLM hallucination + if result.ExitCode == 0 { + if output == "" { + return NewTextResult(fmt.Sprintf("Command executed in pod '%s' (exit code 0).\n\nOutput:\n(no output)", pod)), nil + } + return NewTextResult(fmt.Sprintf("Command executed in pod '%s' (exit code 0).\n\nOutput:\n%s", pod, output)), nil + } + + if output == "" { + return NewTextResult(fmt.Sprintf("Command in pod '%s' exited with code %d.\n\nOutput:\n(no output)", pod, result.ExitCode)), nil + } + return NewTextResult(fmt.Sprintf("Command in pod '%s' exited with code %d.\n\nOutput:\n%s", pod, result.ExitCode, output)), nil +} + +// executeKubernetesLogs retrieves pod logs +func (e *PulseToolExecutor) executeKubernetesLogs(ctx context.Context, args map[string]interface{}) (CallToolResult, error) { + clusterArg, _ := args["cluster"].(string) + namespace, _ := args["namespace"].(string) + pod, _ := args["pod"].(string) + container, _ := args["container"].(string) + lines := intArg(args, "lines", 100) + + if clusterArg == "" { + return NewErrorResult(fmt.Errorf("cluster is required")), nil + } + if pod == "" { + return NewErrorResult(fmt.Errorf("pod is required")), nil + } + if namespace == "" { + namespace = "default" + } + + // Validate identifiers + if err := validateKubernetesResourceID(namespace); err != nil { + return NewErrorResult(fmt.Errorf("invalid namespace: %w", err)), nil + } + if err := validateKubernetesResourceID(pod); err != nil { + return NewErrorResult(fmt.Errorf("invalid pod: %w", err)), nil + } + if container != "" { + if err := validateKubernetesResourceID(container); err != nil { + return NewErrorResult(fmt.Errorf("invalid container: %w", err)), nil + } + } + + // Logs is a read operation, but still requires a connected agent + agentID, _, err := e.findAgentForKubernetesCluster(clusterArg) + if err != nil { + return NewTextResult(err.Error()), nil + } + + // Build kubectl command - logs is read-only so no approval needed + var kubectlCmd string + if container != "" { + kubectlCmd = fmt.Sprintf("kubectl -n %s logs %s -c %s --tail=%d", namespace, pod, container, lines) + } else { + kubectlCmd = fmt.Sprintf("kubectl -n %s logs %s --tail=%d", namespace, pod, lines) + } + + if e.agentServer == nil { + return NewErrorResult(fmt.Errorf("no agent server available")), nil + } + + result, err := e.agentServer.ExecuteCommand(ctx, agentID, agentexec.ExecuteCommandPayload{ + Command: kubectlCmd, + TargetType: "host", + TargetID: "", + }) + if err != nil { + return NewErrorResult(fmt.Errorf("failed to execute kubectl: %w", err)), nil + } + + output := result.Stdout + if result.Stderr != "" && result.ExitCode != 0 { + output += "\n" + result.Stderr + } + + if result.ExitCode == 0 { + if output == "" { + return NewTextResult(fmt.Sprintf("No logs found for pod '%s' in namespace '%s'", pod, namespace)), nil + } + return NewTextResult(fmt.Sprintf("Logs from pod '%s' (last %d lines):\n%s", pod, lines, output)), nil + } + + return NewTextResult(fmt.Sprintf("kubectl logs failed (exit code %d):\n%s", result.ExitCode, output)), nil +} + +// formatKubernetesApprovalNeeded formats an approval-required response for Kubernetes operations +func formatKubernetesApprovalNeeded(action, resource, namespace, cluster, command, approvalID string) string { + payload := map[string]interface{}{ + "type": "approval_required", + "approval_id": approvalID, + "action": action, + "resource": resource, + "namespace": namespace, + "cluster": cluster, + "command": command, + "how_to_approve": "Click the approval button in the chat to execute this action.", + "do_not_retry": true, + } + b, _ := json.Marshal(payload) + return "APPROVAL_REQUIRED: " + string(b) +} diff --git a/internal/ai/tools/tools_metrics.go b/internal/ai/tools/tools_metrics.go new file mode 100644 index 000000000..202d35685 --- /dev/null +++ b/internal/ai/tools/tools_metrics.go @@ -0,0 +1,110 @@ +package tools + +import ( + "context" + "fmt" +) + +// registerMetricsTools registers the consolidated pulse_metrics tool +func (e *PulseToolExecutor) registerMetricsTools() { + e.registry.Register(RegisteredTool{ + Definition: Tool{ + Name: "pulse_metrics", + Description: `Get performance metrics, baselines, and sensor data. + +Types: +- performance: Historical CPU/memory/disk metrics over 24h or 7d +- temperatures: CPU, disk, and sensor temperatures from hosts +- network: Network interface statistics (rx/tx bytes, speed) +- diskio: Disk I/O statistics (read/write bytes, ops) +- disks: Physical disk health (SMART, wearout, temperatures) +- baselines: Learned normal behavior baselines for resources +- patterns: Detected operational patterns and predictions + +Examples: +- Get 24h metrics: type="performance", period="24h" +- Get VM metrics: type="performance", resource_id="101" +- Get host temps: type="temperatures", host="pve01" +- Get disk health: type="disks", node="pve01"`, + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]PropertySchema{ + "type": { + Type: "string", + Description: "Metric type to query", + Enum: []string{"performance", "temperatures", "network", "diskio", "disks", "baselines", "patterns"}, + }, + "resource_id": { + Type: "string", + Description: "Filter by specific resource ID (for performance, baselines)", + }, + "resource_type": { + Type: "string", + Description: "Filter by resource type: vm, container, node (for performance, baselines)", + }, + "host": { + Type: "string", + Description: "Filter by hostname (for temperatures, network, diskio)", + }, + "node": { + Type: "string", + Description: "Filter by Proxmox node (for disks)", + }, + "instance": { + Type: "string", + Description: "Filter by Proxmox instance (for disks)", + }, + "period": { + Type: "string", + Description: "Time period for performance: 24h or 7d (default: 24h)", + Enum: []string{"24h", "7d"}, + }, + "health": { + Type: "string", + Description: "Filter disks by health status: PASSED, FAILED, UNKNOWN", + }, + "disk_type": { + Type: "string", + Description: "Filter disks by type: nvme, sata, sas", + }, + "limit": { + Type: "integer", + Description: "Maximum number of results (default: 100)", + }, + "offset": { + Type: "integer", + Description: "Number of results to skip", + }, + }, + Required: []string{"type"}, + }, + }, + Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { + return exec.executeMetrics(ctx, args) + }, + }) +} + +// executeMetrics routes to the appropriate metrics handler based on type +// All handler functions are implemented in tools_patrol.go and tools_infrastructure.go +func (e *PulseToolExecutor) executeMetrics(ctx context.Context, args map[string]interface{}) (CallToolResult, error) { + metricType, _ := args["type"].(string) + switch metricType { + case "performance": + return e.executeGetMetrics(ctx, args) + case "temperatures": + return e.executeGetTemperatures(ctx, args) + case "network": + return e.executeGetNetworkStats(ctx, args) + case "diskio": + return e.executeGetDiskIOStats(ctx, args) + case "disks": + return e.executeListPhysicalDisks(ctx, args) + case "baselines": + return e.executeGetBaselines(ctx, args) + case "patterns": + return e.executeGetPatterns(ctx, args) + default: + return NewErrorResult(fmt.Errorf("unknown type: %s. Use: performance, temperatures, network, diskio, disks, baselines, patterns", metricType)), nil + } +} diff --git a/internal/ai/tools/tools_pmg_consolidated.go b/internal/ai/tools/tools_pmg_consolidated.go new file mode 100644 index 000000000..6b9fd0d39 --- /dev/null +++ b/internal/ai/tools/tools_pmg_consolidated.go @@ -0,0 +1,64 @@ +package tools + +import ( + "context" + "fmt" +) + +// registerPMGToolsConsolidated registers the consolidated pulse_pmg tool +func (e *PulseToolExecutor) registerPMGToolsConsolidated() { + e.registry.Register(RegisteredTool{ + Definition: Tool{ + Name: "pulse_pmg", + Description: `Query Proxmox Mail Gateway status and statistics. + +Types: +- status: Instance status and health (nodes, uptime, load) +- mail_stats: Mail flow statistics (counts, spam, virus, bounces) +- queues: Mail queue status (active, deferred, hold) +- spam: Spam quarantine statistics and score distribution + +Examples: +- Get status: type="status" +- Get specific instance: type="status", instance="pmg01" +- Get mail stats: type="mail_stats" +- Get queue status: type="queues" +- Get spam stats: type="spam"`, + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]PropertySchema{ + "type": { + Type: "string", + Description: "PMG query type", + Enum: []string{"status", "mail_stats", "queues", "spam"}, + }, + "instance": { + Type: "string", + Description: "Optional: specific PMG instance name or ID", + }, + }, + Required: []string{"type"}, + }, + }, + Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { + return exec.executePMG(ctx, args) + }, + }) +} + +// executePMG routes to the appropriate PMG handler based on type +func (e *PulseToolExecutor) executePMG(ctx context.Context, args map[string]interface{}) (CallToolResult, error) { + pmgType, _ := args["type"].(string) + switch pmgType { + case "status": + return e.executeGetPMGStatus(ctx, args) + case "mail_stats": + return e.executeGetMailStats(ctx, args) + case "queues": + return e.executeGetMailQueues(ctx, args) + case "spam": + return e.executeGetSpamStats(ctx, args) + default: + return NewErrorResult(fmt.Errorf("unknown type: %s. Use: status, mail_stats, queues, spam", pmgType)), nil + } +} diff --git a/internal/ai/tools/tools_query.go b/internal/ai/tools/tools_query.go index 89a4b3391..314cbd056 100644 --- a/internal/ai/tools/tools_query.go +++ b/internal/ai/tools/tools_query.go @@ -3,15 +3,1331 @@ package tools import ( "context" "fmt" - "io" - "net" - "net/http" - "net/url" "os" + "sort" "strings" + "sync" "time" + + "github.com/rcourtman/pulse-go-rewrite/internal/models" + "github.com/rs/zerolog/log" ) +// routingMismatchLogLimiter provides rate limiting for routing mismatch debug logs. +// This prevents log spam while still providing visibility into routing issues. +var routingMismatchLogLimiter = struct { + mu sync.Mutex + lastLog time.Time + interval time.Duration +}{ + interval: 10 * time.Second, // Log at most once per 10 seconds +} + +// ErrStrictResolution is returned when a write operation is attempted on an +// undiscovered resource while PULSE_STRICT_RESOLUTION is enabled. +// Use errors.As() to check for this error type. +type ErrStrictResolution struct { + ResourceID string // The resource identifier that wasn't found + Action string // The action that was attempted + Message string // Human-readable message +} + +func (e *ErrStrictResolution) Error() string { + return e.Message +} + +// Code returns the error code for structured responses +func (e *ErrStrictResolution) Code() string { + return ErrCodeStrictResolution +} + +// ToToolResponse returns a consistent ToolResponse for blocked operations. +// This enables the agentic loop to detect and auto-recover (discover then retry). +func (e *ErrStrictResolution) ToToolResponse() ToolResponse { + return NewToolBlockedError( + ErrCodeStrictResolution, + e.Message, + map[string]interface{}{ + "resource_id": e.ResourceID, + "action": e.Action, + "recovery_hint": "Use pulse_query action=search to discover the resource first", + "auto_recoverable": true, // Signal to agentic loop that auto-discovery can help + }, + ) +} + +// ToStructuredError returns a structured error payload for tool responses +// Deprecated: Use ToToolResponse() for consistent envelope +func (e *ErrStrictResolution) ToStructuredError() map[string]interface{} { + return map[string]interface{}{ + "error_code": e.Code(), + "message": e.Message, + "resource_id": e.ResourceID, + "action": e.Action, + } +} + +// ErrRoutingMismatch is returned when a tool targets a parent host (e.g., Proxmox node) +// but the session has discovered more specific child resources (LXC/VM) on that host. +// This prevents accidentally operating on the host filesystem when the user intended +// to target a container. +type ErrRoutingMismatch struct { + TargetHost string // The host that was targeted + MoreSpecificResources []string // Child resource names that exist on this host + MoreSpecificIDs []string // Canonical resource IDs (kind:host:id) for future ID-based targeting + ChildKinds []string // Resource kinds of children (for telemetry: "lxc", "vm", etc.) + Message string // Human-readable message +} + +func (e *ErrRoutingMismatch) Error() string { + return e.Message +} + +// Code returns the error code for structured responses +func (e *ErrRoutingMismatch) Code() string { + return "ROUTING_MISMATCH" +} + +// ToToolResponse returns a consistent ToolResponse for routing mismatches. +func (e *ErrRoutingMismatch) ToToolResponse() ToolResponse { + details := map[string]interface{}{ + "target_host": e.TargetHost, + "more_specific_resources": e.MoreSpecificResources, + "auto_recoverable": true, + } + + // Include canonical IDs and prefer ID-based targeting in recovery hint + if len(e.MoreSpecificIDs) > 0 { + details["more_specific_resource_ids"] = e.MoreSpecificIDs + details["target_resource_id"] = e.MoreSpecificIDs[0] // Primary suggestion + // Prefer ID-based targeting, with legacy target_host as fallback + details["recovery_hint"] = fmt.Sprintf( + "Retry with target_resource_id='%s' (preferred) or target_host='%s' (legacy)", + e.MoreSpecificIDs[0], e.MoreSpecificResources[0]) + } else { + // Fallback if no IDs available + details["recovery_hint"] = fmt.Sprintf( + "Use target_host='%s' to target the specific resource, not the parent Proxmox host", + e.MoreSpecificResources[0]) + } + + return NewToolBlockedError( + "ROUTING_MISMATCH", + e.Message, + details, + ) +} + +// isStrictResolutionEnabled returns true if hard validation is enabled for write operations. +// Set PULSE_STRICT_RESOLUTION=true to block write operations on undiscovered resources. +func isStrictResolutionEnabled() bool { + val := os.Getenv("PULSE_STRICT_RESOLUTION") + return val == "true" || val == "1" || val == "yes" +} + +// isWriteAction returns true if the action is a write/mutating operation. +// Note: "exec" is treated as write because it can execute arbitrary commands. +// For finer control, use classifyCommandRisk() to distinguish read-only exec commands. +func isWriteAction(action string) bool { + writeActions := map[string]bool{ + "start": true, + "stop": true, + "restart": true, + "delete": true, + "shutdown": true, + "exec": true, + "write": true, + "append": true, + } + return writeActions[action] +} + +// CommandRisk represents the risk level of a shell command +type CommandRisk int + +const ( + CommandRiskReadOnly CommandRisk = 0 // Safe read-only commands + CommandRiskLowWrite CommandRisk = 1 // Low-risk writes (touch, mkdir temp) + CommandRiskMediumWrite CommandRisk = 2 // Medium-risk writes (config changes) + CommandRiskHighWrite CommandRisk = 3 // High-risk writes (rm, systemctl, package managers) +) + +// ExecutionIntent represents whether a command can be proven non-mutating. +// This is the primary abstraction for pulse_read gating decisions. +// +// Invariant: pulse_read may execute commands that are provably non-mutating +// either by construction (known read-only commands) or by bounded inspection +// (self-contained input + no shell composition + no write patterns). Any command +// that depends on external input, shell composition, or ambiguous semantics is +// treated as write-capable and blocked from pulse_read. +type ExecutionIntent int + +const ( + // IntentReadOnlyCertain - command is non-mutating by construction. + // Examples: ls, cat, grep, docker logs, ffprobe, kubectl get + // These cannot mutate regardless of arguments. + IntentReadOnlyCertain ExecutionIntent = iota + + // IntentReadOnlyConditional - command appears read-only by bounded inspection. + // The command is self-contained (no shell composition) and content inspection + // found no write patterns. Examples: sqlite3 "SELECT ...", psql -c "SELECT ..." + // Guardrails: no redirects, no pipes, no subshells, no chaining, inline input only. + IntentReadOnlyConditional + + // IntentWriteOrUnknown - command may mutate or cannot be proven safe. + // Either it matches known write patterns, has shell composition that prevents + // analysis, or is unknown and we fail closed. + IntentWriteOrUnknown +) + +// IntentResult contains the execution intent classification and the reason for it. +type IntentResult struct { + Intent ExecutionIntent + Reason string // Human-readable reason for classification + NonInteractiveBlock *NonInteractiveBlockResult // Non-nil if blocked by NonInteractiveOnly guardrail +} + +// ContentInspector examines command content to determine if it's read-only. +// Different inspectors handle different tool families (SQL, Redis, kubectl, etc.) +type ContentInspector interface { + // Applies returns true if this inspector handles the given command + Applies(cmdLower string) bool + // IsReadOnly returns (true, "") if content is read-only, or (false, reason) if not + IsReadOnly(cmdLower string) (bool, string) +} + +// sqlContentInspector handles SQL CLI tools (sqlite3, mysql, psql, etc.) +type sqlContentInspector struct{} + +func (s *sqlContentInspector) Applies(cmdLower string) bool { + sqlCLIs := []string{"sqlite3 ", "mysql ", "mariadb ", "psql ", "mycli ", "pgcli ", "litecli "} + for _, cli := range sqlCLIs { + if strings.Contains(cmdLower, cli) || strings.HasPrefix(cmdLower, strings.TrimSuffix(cli, " ")) { + return true + } + } + return false +} + +func (s *sqlContentInspector) IsReadOnly(cmdLower string) (bool, string) { + // SQL statements that mutate data or schema. + // Conservative: includes DDL, DML writes, transaction control, and admin commands. + sqlWriteKeywords := []string{ + // DML writes + "insert ", "update ", "delete ", "replace ", + // DDL + "create ", "drop ", "alter ", "truncate ", + "merge ", "upsert ", + // Transaction control (expands attack surface) + "begin", "commit", "rollback", "savepoint", "release ", + // Database management + "attach ", "detach ", + "vacuum", "reindex", + "grant ", "revoke ", + "pragma ", + } + for _, kw := range sqlWriteKeywords { + if strings.Contains(cmdLower, kw) { + return false, fmt.Sprintf("SQL contains write/control keyword: %s", strings.TrimSpace(kw)) + } + } + + // Conservative: if we can't find inline SQL content, assume external input + hasInlineSQL := strings.Contains(cmdLower, `"`) || + strings.Contains(cmdLower, `'`) || + strings.Contains(cmdLower, " .") || // dot commands like .tables, .schema + strings.Contains(cmdLower, " -e ") || // mysql -e + strings.Contains(cmdLower, " -c ") // psql -c + if !hasInlineSQL { + return false, "no inline SQL found; input may be external (piped/interactive)" + } + + return true, "" +} + +// registeredInspectors is the list of content inspectors to try. +// Add new inspectors here for redis-cli, kubectl, etc. +var registeredInspectors = []ContentInspector{ + &sqlContentInspector{}, + // Future: &redisContentInspector{}, + // Future: &kubectlContentInspector{}, +} + +// ClassifyExecutionIntent determines whether a command can be proven non-mutating. +// This is the main entry point for pulse_read gating decisions. +func ClassifyExecutionIntent(command string) IntentResult { + cmdLower := strings.ToLower(command) + + // === PHASE 1: Mutation-capability guards === + // These make ANY command potentially dangerous regardless of the binary. + // Includes: sudo, redirects, tee, subshells, pipes, shell chaining + if reason := checkMutationCapabilityGuards(command, cmdLower); reason != "" { + return IntentResult{Intent: IntentWriteOrUnknown, Reason: reason} + } + + // === PHASE 1.5: NonInteractiveOnly guardrails === + // MUST be checked before Phase 3 (read-only by construction) because even + // read-only commands like `tail -f` and `journalctl -f` can hang indefinitely. + // pulse_read requires commands that terminate deterministically. + if niBlock := checkNonInteractiveGuardrails(command, cmdLower); niBlock != nil { + return IntentResult{ + Intent: IntentWriteOrUnknown, + Reason: niBlock.FormatMessage(), + NonInteractiveBlock: niBlock, + } + } + + // === PHASE 2: Known write patterns === + // Check BEFORE read-only patterns to catch write variants like "sed -i" + // before generic patterns like "sed " match. + if reason := matchesWritePatterns(cmdLower); reason != "" { + return IntentResult{Intent: IntentWriteOrUnknown, Reason: reason} + } + + // === PHASE 3: Known read-only by construction === + // Commands that cannot mutate regardless of arguments. + // Only reached if Phase 2 didn't match any write patterns. + if isReadOnlyByConstruction(cmdLower) { + return IntentResult{Intent: IntentReadOnlyCertain, Reason: "known read-only command"} + } + + // === PHASE 4: Self-contained read candidate check === + // Additional guardrails before content inspection. + if reason := checkSelfContainedGuardrails(command, cmdLower); reason != "" { + return IntentResult{Intent: IntentWriteOrUnknown, Reason: reason} + } + + // === PHASE 5: Content inspection via registered inspectors === + for _, inspector := range registeredInspectors { + if inspector.Applies(cmdLower) { + if isReadOnly, reason := inspector.IsReadOnly(cmdLower); isReadOnly { + return IntentResult{Intent: IntentReadOnlyConditional, Reason: "content inspection: read-only"} + } else { + return IntentResult{Intent: IntentWriteOrUnknown, Reason: "content inspection: " + reason} + } + } + } + + // === PHASE 6: Conservative fallback === + // Unknown command with no inspector match → treat as write + return IntentResult{Intent: IntentWriteOrUnknown, Reason: "unknown command; no inspector matched"} +} + +// checkMutationCapabilityGuards checks for shell patterns that enable mutation +// regardless of the underlying command. Returns reason if any guard fails. +// +// Also includes NonInteractiveOnly guardrails - pulse_read runs non-interactively, +// so commands requiring TTY or indefinite streaming are blocked. +func checkMutationCapabilityGuards(command, cmdLower string) string { + // sudo escalates any command + if strings.Contains(cmdLower, "sudo ") || strings.HasPrefix(cmdLower, "sudo") { + return "sudo escalates command privileges" + } + + // Output redirection can overwrite files + if hasStdoutRedirect(command) { + return "output redirection can overwrite files" + } + if strings.Contains(cmdLower, " tee ") || strings.Contains(cmdLower, "|tee ") { + return "tee can write to files" + } + + // Subshell/command substitution can execute arbitrary commands + if strings.Contains(command, "$(") || strings.Contains(command, "`") { + return "command substitution can execute arbitrary commands" + } + + // Input redirection means we can't inspect the content. + // This catches: < (redirect), << (heredoc), <<< (here-string) + // Examples blocked: + // sqlite3 db < script.sql + // psql < 1 { + // Skip watch and any flags like -n 1 + cmdStart := 1 + for i := 1; i < len(parts); i++ { + if strings.HasPrefix(parts[i], "-") { + cmdStart = i + 1 + if parts[i] == "-n" && i+1 < len(parts) { + cmdStart = i + 2 + } + } else { + break + } + } + if cmdStart < len(parts) { + watchedCmd := strings.Join(strings.Fields(command)[cmdStart:], " ") + return strings.Trim(watchedCmd, "'\"") + } + } + } + return "" +} + +// suggestBoundedStreaming adds --tail/--since bounds to streaming commands. +func suggestBoundedStreaming(command, cmdLower string) string { + parts := strings.Fields(command) + if len(parts) == 0 { + return "" + } + tool := strings.ToLower(parts[0]) + + switch { + case tool == "tail": + // tail -f /var/log/app.log → tail -n 200 --follow=name /var/log/app.log (or just remove -f) + // Simplest: add -n 200 and keep -f for "recent + follow with bound" + // Or suggest removing -f entirely + result := strings.ReplaceAll(command, " -f", " -n 200") + result = strings.ReplaceAll(result, " --follow", " -n 200") + return result + + case tool == "journalctl": + // journalctl -f → journalctl -n 200 --since "10 min ago" + result := command + if strings.Contains(cmdLower, " -f") { + result = strings.ReplaceAll(result, " -f", ` -n 200 --since "10 min ago"`) + } + if strings.Contains(cmdLower, " --follow") { + result = strings.ReplaceAll(result, " --follow", ` -n 200 --since "10 min ago"`) + } + return result + + case strings.HasPrefix(tool, "docker") && strings.Contains(cmdLower, "logs"): + // docker logs -f container → docker logs --tail=200 container + result := strings.ReplaceAll(command, " -f ", " --tail=200 ") + result = strings.ReplaceAll(result, " -f", " --tail=200") + result = strings.ReplaceAll(result, " --follow ", " --tail=200 ") + result = strings.ReplaceAll(result, " --follow", " --tail=200") + return result + + case strings.HasPrefix(tool, "kubectl") && strings.Contains(cmdLower, "logs"): + // kubectl logs -f pod → kubectl logs --tail=200 --since=10m pod + result := strings.ReplaceAll(command, " -f ", " --tail=200 --since=10m ") + result = strings.ReplaceAll(result, " -f", " --tail=200 --since=10m") + result = strings.ReplaceAll(result, " --follow ", " --tail=200 --since=10m ") + result = strings.ReplaceAll(result, " --follow", " --tail=200 --since=10m") + return result + + case tool == "dmesg": + // dmesg -w → dmesg | tail -200 + result := strings.ReplaceAll(command, " -w", "") + result = strings.ReplaceAll(result, " --follow", "") + return result + " | tail -200" + } + return "" +} + +// suggestNonInteractiveREPL suggests non-interactive form for REPL commands. +// Returns empty string for cases needing human judgment (what query to run?). +func suggestNonInteractiveREPL(command, cmdLower string) string { + parts := strings.Fields(command) + if len(parts) == 0 { + return "" + } + tool := strings.ToLower(parts[0]) + + // For SQL CLIs and REPLs, we can't suggest a specific query + // but we can show the pattern + switch tool { + case "mysql", "mariadb": + return fmt.Sprintf("%s -e \"SELECT ...\"", command) + case "psql": + return fmt.Sprintf("%s -c \"SELECT ...\"", command) + case "sqlite3": + return fmt.Sprintf("%s \"SELECT ...\"", command) + case "redis-cli": + return fmt.Sprintf("%s PING", command) + case "python", "python3", "python2": + return fmt.Sprintf("%s -c \"...\"", parts[0]) + case "node", "nodejs": + return fmt.Sprintf("%s -e \"...\"", parts[0]) + case "ssh": + // ssh host → ssh host "command" + return fmt.Sprintf("%s \"ls -la\"", command) + } + return "" +} + +// hasInteractiveTTYFlags detects flags that request interactive/TTY mode. +func hasInteractiveTTYFlags(cmdLower string) bool { + // Only check for docker/kubectl commands + isDockerKubectl := strings.HasPrefix(cmdLower, "docker ") || + strings.HasPrefix(cmdLower, "kubectl ") + if !isDockerKubectl { + return false + } + + // Docker/kubectl -it or -i -t combinations (common shorthand) + if strings.Contains(cmdLower, " -it ") || strings.Contains(cmdLower, " -it\t") || + strings.HasSuffix(cmdLower, " -it") || + strings.Contains(cmdLower, " -ti ") || strings.Contains(cmdLower, " -ti\t") || + strings.HasSuffix(cmdLower, " -ti") { + return true + } + + // Explicit long flags + if strings.Contains(cmdLower, " --tty") || strings.Contains(cmdLower, " --interactive") { + return true + } + + // Check for standalone -t and -i flags that aren't part of other patterns + // Avoid matching: 2>&1 (stderr redirect), -t tablename, etc. + // Look for " -t " or " -i " as standalone flags followed by non-alphanumeric + parts := strings.Fields(cmdLower) + for i, part := range parts { + if part == "-t" || part == "-i" { + // Found standalone -t or -i flag + // Check if this is in the context of exec/run subcommands + for j := 0; j < i; j++ { + if parts[j] == "exec" || parts[j] == "run" { + return true + } + } + } + } + + return false +} + +// isPagerOrEditorTool detects pager and editor tools that require terminal interaction. +func isPagerOrEditorTool(cmdLower string) bool { + // Extract first word + firstWord := cmdLower + if spaceIdx := strings.Index(cmdLower, " "); spaceIdx > 0 { + firstWord = cmdLower[:spaceIdx] + } + + pagerEditorTools := []string{"less", "more", "vim", "vi", "nano", "emacs", "pico", "ed"} + for _, tool := range pagerEditorTools { + if firstWord == tool { + return true + } + } + return false +} + +// isLiveMonitoringTool detects tools that run indefinitely showing live data. +func isLiveMonitoringTool(cmdLower string) bool { + firstWord := cmdLower + if spaceIdx := strings.Index(cmdLower, " "); spaceIdx > 0 { + firstWord = cmdLower[:spaceIdx] + } + + // These tools run until interrupted + liveTools := []string{"top", "htop", "atop", "iotop", "iftop", "nload", "watch"} + for _, tool := range liveTools { + if firstWord == tool { + return true + } + } + return false +} + +// isUnboundedStreaming detects follow-mode commands without an exit bound. +// Exit-bounded = terminates deterministically (line count, time window, or timeout wrapper). +// +// Allowed (exit-bounded): +// - journalctl -n 100, tail -n 50, tail -100 -f, kubectl logs --tail=100 +// - journalctl --since "10 min ago", kubectl logs --since=10m +// - timeout 5s tail -f +// +// Blocked (runs indefinitely): +// - journalctl -f, tail -f, kubectl logs -f, dmesg -w +func isUnboundedStreaming(cmdLower string) bool { + // Only certain commands support follow mode - don't flag -f on other commands + // (e.g., "hostname -f" uses -f for "full", not "follow") + streamingCommands := []string{"tail ", "journalctl ", "docker logs ", "kubectl logs ", "dmesg "} + isStreamingCmd := false + for _, prefix := range streamingCommands { + if strings.HasPrefix(cmdLower, prefix) { + isStreamingCmd = true + break + } + } + if !isStreamingCmd { + return false + } + + // Check for follow flags + hasFollowFlag := strings.Contains(cmdLower, " -f") || + strings.Contains(cmdLower, " --follow") || + strings.Contains(cmdLower, " -w") // dmesg uses -w/--follow + + if !hasFollowFlag { + return false + } + + // If wrapped in timeout, it's exit-bounded + if strings.HasPrefix(cmdLower, "timeout ") { + return false + } + + // Check for explicit bounds that make it exit-bounded: + // - Line count: -n, --lines, --tail + // - Time window: --since, --until (journalctl/kubectl logs) + hasBound := strings.Contains(cmdLower, " -n ") || + strings.Contains(cmdLower, " -n=") || + strings.Contains(cmdLower, " --lines") || + strings.Contains(cmdLower, " --tail=") || + strings.Contains(cmdLower, " --tail ") || + strings.Contains(cmdLower, " --since") || // journalctl --since "10 min ago", kubectl logs --since=10m + strings.Contains(cmdLower, " --until") || // journalctl --until "2024-01-01" + hasTailShorthandBound(cmdLower) // tail -100 shorthand + + // Follow flag without bounds = runs indefinitely + return !hasBound +} + +// hasTailShorthandBound checks for tail's -N shorthand (e.g., tail -100 -f) +func hasTailShorthandBound(cmdLower string) bool { + if !strings.HasPrefix(cmdLower, "tail ") { + return false + } + // Look for -NUMBER pattern (tail's shorthand for -n NUMBER) + // Match patterns like: tail -100, tail -50 -f + parts := strings.Fields(cmdLower) + for _, part := range parts { + if len(part) >= 2 && part[0] == '-' { + // Check if rest is digits + allDigits := true + for _, c := range part[1:] { + if c < '0' || c > '9' { + allDigits = false + break + } + } + if allDigits && len(part) > 1 { + return true + } + } + } + return false +} + +// isInteractiveREPL detects commands that open an interactive REPL/shell +// unless given explicit non-interactive flags (-c, --execute, inline command). +// +// Blocked (opens REPL): +// - ssh host (no command) +// - mysql, psql, sqlite3 db (no -c/-e/inline SQL) +// - redis-cli (no command args) +// - python, node, irb (no script/command) +// - openssl s_client +// +// Allowed (non-interactive): +// - ssh host "command" +// - mysql -e "SELECT 1" +// - sqlite3 db "SELECT 1" +// - python -c "print(1)" +// - python script.py +func isInteractiveREPL(cmdLower string) bool { + firstWord := cmdLower + if spaceIdx := strings.Index(cmdLower, " "); spaceIdx > 0 { + firstWord = cmdLower[:spaceIdx] + } + + // SSH: interactive unless a command is provided after host + // ssh host -> interactive + // ssh host "ls -la" -> non-interactive + // ssh -t host -> interactive (explicit TTY) + if firstWord == "ssh" { + // If has -t flag, it's explicitly requesting TTY + if strings.Contains(cmdLower, " -t ") || strings.Contains(cmdLower, " -t") { + return true + } + // Count non-flag arguments after ssh + // ssh [options] host [command] + parts := strings.Fields(cmdLower) + nonFlagArgs := 0 + skipNext := false + for i, part := range parts[1:] { // skip "ssh" + if skipNext { + skipNext = false + continue + } + // Skip flags that take arguments + if part == "-i" || part == "-l" || part == "-p" || part == "-o" || part == "-F" { + skipNext = true + continue + } + // Skip other flags + if strings.HasPrefix(part, "-") { + continue + } + nonFlagArgs++ + // If we have more than just the host, there's a command + if nonFlagArgs > 1 || (nonFlagArgs == 1 && i < len(parts)-2) { + return false // has command, not interactive + } + } + // Only host, no command = interactive + return nonFlagArgs <= 1 + } + + // SQL CLIs: handled by sqlContentInspector, but catch bare invocations + // mysql, psql without -c/-e, sqlite3 without inline SQL + if firstWord == "mysql" || firstWord == "mariadb" { + // Non-interactive if has -e or --execute + if strings.Contains(cmdLower, " -e ") || strings.Contains(cmdLower, " -e\"") || + strings.Contains(cmdLower, " --execute") { + return false + } + // Non-interactive if has piped input (handled elsewhere, but check) + if strings.Contains(cmdLower, " < ") || strings.Contains(cmdLower, " <<") { + return false + } + return true // bare mysql = interactive + } + + if firstWord == "psql" { + // Non-interactive if has -c or --command + if strings.Contains(cmdLower, " -c ") || strings.Contains(cmdLower, " -c\"") || + strings.Contains(cmdLower, " --command") { + return false + } + if strings.Contains(cmdLower, " < ") || strings.Contains(cmdLower, " <<") { + return false + } + return true + } + + // redis-cli: interactive without command arguments + if firstWord == "redis-cli" { + // Check for command after connection flags + parts := strings.Fields(cmdLower) + hasCommand := false + skipNext := false + for _, part := range parts[1:] { + if skipNext { + skipNext = false + continue + } + // Connection flags that take arguments + if part == "-h" || part == "-p" || part == "-a" || part == "-n" || part == "--user" { + skipNext = true + continue + } + if strings.HasPrefix(part, "-") { + continue + } + // Non-flag argument = Redis command + hasCommand = true + break + } + return !hasCommand + } + + // Scripting REPLs: python, node, irb without script/command + if firstWord == "python" || firstWord == "python3" || firstWord == "python2" { + // Non-interactive if has -c or script file + if strings.Contains(cmdLower, " -c ") || strings.Contains(cmdLower, " -c\"") { + return false + } + // Check for script file (non-flag argument) + parts := strings.Fields(cmdLower) + for _, part := range parts[1:] { + if !strings.HasPrefix(part, "-") && !strings.HasPrefix(part, "\"") { + return false // has script file + } + } + return true // bare python = REPL + } + + if firstWord == "node" || firstWord == "nodejs" { + // Non-interactive if has -e or script file + if strings.Contains(cmdLower, " -e ") || strings.Contains(cmdLower, " -e\"") || + strings.Contains(cmdLower, " --eval") { + return false + } + parts := strings.Fields(cmdLower) + for _, part := range parts[1:] { + if !strings.HasPrefix(part, "-") { + return false // has script file + } + } + return true + } + + if firstWord == "irb" || firstWord == "pry" { + // Ruby REPLs - almost always interactive + // Non-interactive only with -e + if strings.Contains(cmdLower, " -e ") { + return false + } + return true + } + + // openssl s_client is always interactive (waits for input) + if strings.HasPrefix(cmdLower, "openssl s_client") || strings.HasPrefix(cmdLower, "openssl s_server") { + return true + } + + return false +} + +// hasStdoutRedirect checks for dangerous output redirects while allowing safe stderr redirects. +func hasStdoutRedirect(command string) bool { + if !strings.Contains(command, ">") { + return false + } + // Remove safe stderr redirects before checking + cmd := strings.ReplaceAll(command, "2>/dev/null", "") + cmd = strings.ReplaceAll(cmd, "2>&1", "") + return strings.Contains(cmd, ">") +} + +// pipedToDualUseTool checks if a piped command sends input to a dual-use tool +// that could interpret piped input dangerously (like SQL CLIs). +// Piping to read-only filters (grep, head, tail, etc.) is safe. +func pipedToDualUseTool(cmdLower string) bool { + // Find the last pipe (not ||) + pipeIdx := -1 + for i := 0; i < len(cmdLower)-1; i++ { + if cmdLower[i] == '|' && cmdLower[i+1] != '|' { + pipeIdx = i + } + } + if pipeIdx == -1 { + return false + } + + // Get the command after the last pipe + afterPipe := strings.TrimSpace(cmdLower[pipeIdx+1:]) + + // Dual-use tools that interpret piped input dangerously + dualUseTools := []string{ + "sqlite3", "mysql", "mariadb", "psql", "mycli", "pgcli", "litecli", + "redis-cli", "mongo", "mongosh", + "sh ", "sh\t", "bash ", "bash\t", "zsh ", "zsh\t", + "python", "perl", "ruby", "node", + "xargs", + } + for _, tool := range dualUseTools { + if strings.HasPrefix(afterPipe, tool) { + return true + } + } + + return false +} + +// checkSelfContainedGuardrails verifies the command is a single execution unit. +// Returns reason if any guardrail fails. +// Note: Most guardrails have been moved to checkMutationCapabilityGuards (Phase 1) +// to ensure they run before read-only-by-construction checks. +func checkSelfContainedGuardrails(command, cmdLower string) string { + // Most checks are now in Phase 1 (checkMutationCapabilityGuards) + // This phase is kept for potential future guardrails that should run + // after write pattern matching. + return "" +} + +// isReadOnlyByConstruction returns true for commands that cannot mutate by design. +// Only matches patterns at the START of the command to avoid false positives +// (e.g., "date " inside "UPDATE" SQL statements). +func isReadOnlyByConstruction(cmdLower string) bool { + // Note: Pager tools (less, more) and live monitors (top, htop) are excluded here + // because they're blocked by NonInteractiveOnly guardrails in Phase 1. + readOnlyCommands := []string{ + "cat", "head", "tail", + "ls", "ll", "dir", + "ps", "free", "df", "du", + "grep", "awk", "sed", "find", "locate", "which", "whereis", + "journalctl", "dmesg", + "uname", "hostname", "whoami", "id", "groups", + "date", "uptime", "env", "printenv", "locale", + "netstat", "ss", "ifconfig", "route", + "ping", "traceroute", "tracepath", "nslookup", "dig", "host", + "file", "stat", "wc", "sort", "uniq", "cut", "tr", + "lsof", "fuser", + "getent", "nproc", "lscpu", "lsmem", "lsblk", "blkid", + "zcat", "zgrep", "bzcat", "xzcat", + "md5sum", "sha256sum", "sha1sum", + "test", + // Media inspection tools + "ffprobe", "mediainfo", "exiftool", + } + + // Multi-word patterns that must appear at the start + multiWordPatterns := []string{ + "curl -s", "curl --silent", "curl -I", "curl --head", + "wget -q", "wget --spider", + "docker ps", "docker logs", "docker inspect", "docker stats", "docker images", "docker info", + "systemctl status", "systemctl is-active", "systemctl is-enabled", "systemctl list", "systemctl show", + "ip addr", "ip route", "ip link", + // Kubectl read-only commands + "kubectl get", "kubectl describe", "kubectl logs", "kubectl top", "kubectl cluster-info", + "kubectl api-resources", "kubectl api-versions", "kubectl version", "kubectl config view", + // Timeout wrapper (makes any command bounded) + "timeout ", + } + + // Extract first word of command + firstWord := cmdLower + if spaceIdx := strings.Index(cmdLower, " "); spaceIdx > 0 { + firstWord = cmdLower[:spaceIdx] + } + + // Check single-word commands + for _, cmd := range readOnlyCommands { + if firstWord == cmd { + return true + } + } + + // Check multi-word patterns at start + for _, pattern := range multiWordPatterns { + if strings.HasPrefix(cmdLower, pattern) { + return true + } + } + + // Special case: [ (test shorthand) + if strings.HasPrefix(cmdLower, "[ ") { + return true + } + + return false +} + +// matchesWritePatterns checks for known write-capable command patterns. +// Returns reason if a write pattern matches. +func matchesWritePatterns(cmdLower string) string { + // High-risk patterns + highRiskPatterns := map[string]string{ + "rm ": "file deletion", "rm\t": "file deletion", "rmdir": "directory deletion", + "shutdown": "system shutdown", "reboot": "system reboot", "poweroff": "system poweroff", "halt": "system halt", + "systemctl restart": "service restart", "systemctl stop": "service stop", "systemctl start": "service start", + "systemctl enable": "service enable", "systemctl disable": "service disable", + "service ": "service control", "init ": "init control", + "apt ": "package management", "apt-get ": "package management", "yum ": "package management", + "dnf ": "package management", "pacman ": "package management", "apk ": "package management", "brew ": "package management", + "pip install": "package install", "pip uninstall": "package uninstall", + "npm install": "package install", "npm uninstall": "package uninstall", "cargo install": "package install", + "docker rm": "container removal", "docker stop": "container stop", "docker kill": "container kill", + "docker restart": "container restart", "docker exec": "container exec", + "kill ": "process termination", "killall ": "process termination", "pkill ": "process termination", + "dd ": "disk write", "mkfs": "filesystem creation", "fdisk": "disk partition", "parted": "disk partition", "mkswap": "swap creation", + "iptables": "firewall modification", "firewall-cmd": "firewall modification", "ufw ": "firewall modification", + "truncate": "file truncation", + "chmod ": "permission change", "chown ": "ownership change", "chgrp ": "group change", + "useradd": "user creation", "userdel": "user deletion", "usermod": "user modification", + "passwd": "password change", "chpasswd": "password change", + "crontab -e": "cron edit", "crontab -r": "cron removal", "crontab -": "cron modification", + "visudo": "sudoers edit", "vipw": "passwd edit", + "mount ": "filesystem mount", "umount ": "filesystem unmount", + "modprobe": "kernel module", "rmmod": "kernel module removal", "insmod": "kernel module insertion", + "sysctl -w": "kernel parameter change", + } + for pattern, reason := range highRiskPatterns { + if strings.Contains(cmdLower, pattern) { + return reason + } + } + + // Medium-risk patterns + mediumRiskPatterns := map[string]string{ + "mv ": "file move", "cp ": "file copy", + "sed -i": "in-place edit", "awk -i": "in-place edit", + "touch ": "file creation", "mkdir ": "directory creation", + "echo ": "output (may redirect)", "printf ": "output (may redirect)", + "wget -O": "file download", "wget --output": "file download", + "tar -x": "archive extraction", "tar x": "archive extraction", "unzip ": "archive extraction", "gunzip ": "archive extraction", + "ln ": "link creation", "link ": "link creation", + } + for pattern, reason := range mediumRiskPatterns { + if strings.Contains(cmdLower, pattern) { + return reason + } + } + + // Curl with mutation verbs + if strings.Contains(cmdLower, "curl") { + if strings.Contains(cmdLower, "-d ") || strings.Contains(cmdLower, "--data") || + strings.Contains(cmdLower, "--upload") || + strings.Contains(cmdLower, "-X POST") || strings.Contains(cmdLower, "-X PUT") || + strings.Contains(cmdLower, "-X DELETE") || strings.Contains(cmdLower, "-X PATCH") { + return "HTTP mutation request" + } + } + + return "" +} + +// hasShellChainingOutsideQuotes checks if a command contains shell chaining operators +// (;, &&, ||) outside of quoted strings. This allows SQL statements like "SELECT 1;" +// while still catching shell command chaining like "ls; rm -rf /". +// +// Handles escaped quotes (\' and \") by skipping the escaped character. +// Fails closed: if quote state becomes ambiguous (unclosed quotes), returns true. +func hasShellChainingOutsideQuotes(cmd string) bool { + inSingleQuote := false + inDoubleQuote := false + + for i := 0; i < len(cmd); i++ { + ch := cmd[i] + + // Handle escape sequences: skip the next character + // This prevents \" or \' from toggling quote state + if ch == '\\' && i+1 < len(cmd) { + i++ // Skip the escaped character + continue + } + + // Track quote state + switch ch { + case '\'': + if !inDoubleQuote { + inSingleQuote = !inSingleQuote + } + case '"': + if !inSingleQuote { + inDoubleQuote = !inDoubleQuote + } + case ';': + if !inSingleQuote && !inDoubleQuote { + return true + } + case '&': + // Check for && (need to look at next char) + if !inSingleQuote && !inDoubleQuote && i+1 < len(cmd) && cmd[i+1] == '&' { + return true + } + case '|': + // Check for || (need to look at next char) + // Note: single | is a pipe, which is allowed for read operations + if !inSingleQuote && !inDoubleQuote && i+1 < len(cmd) && cmd[i+1] == '|' { + return true + } + } + } + + // Fail closed: if quotes are unclosed, treat as potentially dangerous + // (ambiguous state means we can't be sure chaining operators are inside quotes) + if inSingleQuote || inDoubleQuote { + return true + } + + return false +} + +// classifyCommandRisk provides backward-compatible risk classification. +// It delegates to ClassifyExecutionIntent and maps the result to CommandRisk, +// preserving the High/Medium write distinction for existing callers. +// +// Deprecated: Use ClassifyExecutionIntent for new code. +func classifyCommandRisk(command string) CommandRisk { + result := ClassifyExecutionIntent(command) + switch result.Intent { + case IntentReadOnlyCertain, IntentReadOnlyConditional: + return CommandRiskReadOnly + default: + // For backward compatibility, distinguish HighWrite from MediumWrite + // using the same pattern checks from matchesWritePatterns + return classifyWriteRiskLevel(command, result.Reason) + } +} + +// classifyWriteRiskLevel determines whether a write command is high or medium risk. +// Used by classifyCommandRisk for backward compatibility. +func classifyWriteRiskLevel(command, reason string) CommandRisk { + cmdLower := strings.ToLower(command) + + // High-risk: destructive system operations + highRiskPatterns := []string{ + // Shell mutation capabilities (these dominate everything) + "> ", ">>", "| tee ", + // Destructive file operations + "rm ", "rm\t", "rmdir", + // System control + "shutdown", "reboot", "poweroff", "halt", + // Service control (except status) + "systemctl restart", "systemctl stop", "systemctl start", + "systemctl enable", "systemctl disable", + "service ", "init ", + // Package managers + "apt ", "apt-get ", "yum ", "dnf ", "pacman ", "apk ", "brew ", + "pip install", "pip uninstall", "npm install", "npm uninstall", "cargo install", + // Container destruction + "docker rm", "docker stop", "docker kill", "docker restart", + // Process termination + "kill ", "killall ", "pkill ", + // Disk operations + "dd ", "mkfs", "fdisk", "parted", "mkswap", + // Firewall + "iptables", "firewall-cmd", "ufw ", + // File truncation + "truncate", + // Permissions/ownership + "chmod ", "chown ", "chgrp ", + // User management + "useradd", "userdel", "usermod", "passwd", "chpasswd", + // Cron/sudoers + "crontab -", "visudo", "vipw", + // Mounts and kernel + "mount ", "umount ", "modprobe", "rmmod", "insmod", "sysctl -w", + // sudo escalation + "sudo", + } + + for _, pattern := range highRiskPatterns { + if strings.Contains(cmdLower, pattern) { + return CommandRiskHighWrite + } + } + + // Everything else is medium-risk + return CommandRiskMediumWrite +} + +// GetReadOnlyViolationHint returns a hint for why a command was blocked from pulse_read. +// Uses the IntentResult reason plus context-aware suggestions. +func GetReadOnlyViolationHint(command string, result IntentResult) string { + baseHint := result.Reason + cmdLower := strings.ToLower(command) + + // Phase 1 guardrail hints (structural issues that must be removed) + if isPhase1GuardrailFailure(result.Reason) { + return getPhase1Hint(result.Reason, baseHint) + } + + // Content inspection hints (SQL CLIs, etc.) + isSQLCLI := strings.Contains(cmdLower, "sqlite3") || + strings.Contains(cmdLower, "mysql") || + strings.Contains(cmdLower, "mariadb") || + strings.Contains(cmdLower, "psql") + + if isSQLCLI { + return getSQLHint(result.Reason, baseHint) + } + + // Unknown command fallback hint + if strings.Contains(result.Reason, "unknown") || strings.Contains(result.Reason, "no inspector") { + return baseHint + ". Try a self-contained form: no pipes, no redirects, single statement. If this is a read-only operation, consider using a known read-only command instead." + } + + return baseHint +} + +// isPhase1GuardrailFailure returns true if the reason indicates a Phase 1 structural issue. +func isPhase1GuardrailFailure(reason string) bool { + guardrailKeywords := []string{ + "sudo", "redirect", "tee", "substitution", "chaining", "piped input", + // NonInteractiveOnly guardrails + "TTY", "terminal", "pager", "editor", "indefinitely", "unbounded", "streaming", + } + for _, kw := range guardrailKeywords { + if strings.Contains(reason, kw) { + return true + } + } + return false +} + +// getPhase1Hint returns actionable hints for Phase 1 guardrail failures. +func getPhase1Hint(reason, baseHint string) string { + switch { + case strings.Contains(reason, "sudo"): + return baseHint + ". Remove sudo to use pulse_read; use pulse_control for privileged operations." + case strings.Contains(reason, "redirect"): + return baseHint + ". Remove redirects (>, >>, <, <<, <<<) to use pulse_read." + case strings.Contains(reason, "tee"): + return baseHint + ". Remove tee to use pulse_read; tee writes to files." + case strings.Contains(reason, "substitution"): + return baseHint + ". Remove $() or backticks to use pulse_read." + case strings.Contains(reason, "chaining"): + return baseHint + ". Run commands separately instead of chaining with ; && ||." + case strings.Contains(reason, "piped input"): + return baseHint + ". For dual-use tools, include content directly instead of piping. Example: sqlite3 db.db \"SELECT ...\" instead of cat file | sqlite3 db.db" + // NonInteractiveOnly hints + case strings.Contains(reason, "TTY") || strings.Contains(reason, "terminal"): + return baseHint + ". Remove -it/--tty/--interactive flags. Use non-interactive form: docker exec container cmd (not docker exec -it)." + case strings.Contains(reason, "pager") || strings.Contains(reason, "editor"): + return baseHint + ". Use cat, head -n, or tail -n instead of interactive tools." + case strings.Contains(reason, "indefinitely"): + return baseHint + ". Use bounded alternatives: ps aux (not top), journalctl -n 100 (not watch)." + case strings.Contains(reason, "unbounded") || strings.Contains(reason, "streaming"): + return baseHint + ". Add line limit: journalctl -n 100 -f or tail -n 50 -f, or wrap with timeout." + default: + return baseHint + ". Remove redirects, chaining, sudo, or subshells to use pulse_read." + } +} + +// getSQLHint returns actionable hints for SQL CLI content inspection failures. +func getSQLHint(reason, baseHint string) string { + switch { + case strings.Contains(reason, "external") || strings.Contains(reason, "no inline"): + return baseHint + ". Include SQL directly in quotes: sqlite3 db.db \"SELECT ...\"" + case strings.Contains(reason, "write") || strings.Contains(reason, "control"): + return baseHint + ". Use only SELECT statements. Avoid: INSERT, UPDATE, DELETE, DROP, CREATE, PRAGMA, BEGIN, COMMIT, ROLLBACK." + default: + return baseHint + ". For read-only queries, use self-contained SELECT statements without transaction control." + } +} + const ( defaultMaxTopologyNodes = 5 defaultMaxTopologyVMsPerNode = 5 @@ -21,72 +1337,552 @@ const ( defaultMaxListDockerContainersPerHost = 10 ) -// registerQueryTools registers infrastructure query tools +// buildResourceID creates a canonical resource ID. +// Prefers kind:provider_uid when UID is available, falls back to kind:name. +func buildResourceID(kind, name, providerUID string) string { + if providerUID != "" { + return kind + ":" + providerUID + } + return kind + ":" + name +} + +// buildDisplayPath creates a human-readable location path. +// e.g., "docker:jellyfin @ lxc:media-server @ node:delly" +func buildDisplayPath(locationChain []string) string { + if len(locationChain) == 0 { + return "" + } + // Reverse for display (innermost first) + reversed := make([]string, len(locationChain)) + for i, loc := range locationChain { + reversed[len(locationChain)-1-i] = loc + } + return strings.Join(reversed, " @ ") +} + +// registerResolvedResource adds a discovered resource to the resolved context if available. +// This is called by query tools when they find resources, enabling action tools to validate +// that commands are targeting legitimate, discovered resources. +// +// NOTE: This does NOT mark the resource as "recently accessed" for routing validation. +// Use registerResolvedResourceWithExplicitAccess() for single-resource operations where +// user intent is clear. +func (e *PulseToolExecutor) registerResolvedResource(reg ResourceRegistration) { + if e.resolvedContext == nil { + return + } + e.resolvedContext.AddResolvedResource(reg) +} + +// registerResolvedResourceWithExplicitAccess adds a resource AND marks it as recently accessed. +// Use this for single-resource operations (pulse_query get, explicit select) where user +// intent to target this specific resource is clear. +// +// DO NOT use this for bulk operations (list, search) that return many resources, +// as it would poison routing validation and cause false ROUTING_MISMATCH blocks. +func (e *PulseToolExecutor) registerResolvedResourceWithExplicitAccess(reg ResourceRegistration) { + if e.resolvedContext == nil { + return + } + e.resolvedContext.AddResolvedResource(reg) + + // Build the resource ID to mark explicit access (must match AddResolvedResource format) + // Format: {kind}:{host}:{provider_uid} for scoped resources + // {kind}:{provider_uid} for global resources + var resourceID string + if reg.ProviderUID != "" { + if reg.HostUID != "" || reg.HostName != "" { + hostScope := reg.HostUID + if hostScope == "" { + hostScope = reg.HostName + } + resourceID = reg.Kind + ":" + hostScope + ":" + reg.ProviderUID + } else { + resourceID = reg.Kind + ":" + reg.ProviderUID + } + } else { + if reg.HostUID != "" || reg.HostName != "" { + hostScope := reg.HostUID + if hostScope == "" { + hostScope = reg.HostName + } + resourceID = reg.Kind + ":" + hostScope + ":" + reg.Name + } else { + resourceID = reg.Kind + ":" + reg.Name + } + } + e.resolvedContext.MarkExplicitAccess(resourceID) +} + +// ValidationResult holds the result of resource validation. +// Check StrictError first using errors.As() for typed error handling. +type ValidationResult struct { + Resource ResolvedResourceInfo + ErrorMsg string // Human-readable error (backwards compat) + StrictError *ErrStrictResolution // Typed error for strict mode violations +} + +// IsBlocked returns true if the validation blocked the operation +func (v *ValidationResult) IsBlocked() bool { + return v.StrictError != nil +} + +// validateResolvedResource checks if a resource has been previously discovered via query/discovery tools. +// Returns a ValidationResult containing: +// - Resource: the resolved resource info if found +// - ErrorMsg: human-readable error message (empty if valid) +// - StrictError: typed error for strict mode violations (nil if not blocked) +// +// Setting skipIfNoContext=true makes validation optional (for backwards compatibility). +// +// When PULSE_STRICT_RESOLUTION=true is set, write operations (start, stop, restart, delete, exec) +// will be blocked if the resource wasn't discovered first. This prevents the AI from operating +// on fabricated or hallucinated resource IDs. +func (e *PulseToolExecutor) validateResolvedResource(resourceName, action string, skipIfNoContext bool) ValidationResult { + // Determine if this requires hard validation (strict mode + write action) + strictMode := isStrictResolutionEnabled() + isWrite := isWriteAction(action) + requireHardValidation := strictMode && isWrite + + if e.resolvedContext == nil { + if requireHardValidation { + // Record telemetry for strict resolution block + if e.telemetryCallback != nil { + e.telemetryCallback.RecordStrictResolutionBlock("validateResolvedResource", action) + } + err := &ErrStrictResolution{ + ResourceID: resourceName, + Action: action, + Message: fmt.Sprintf("Resource '%s' has not been discovered. Use pulse_query to find resources before performing '%s' action.", resourceName, action), + } + return ValidationResult{ + ErrorMsg: err.Message, + StrictError: err, + } + } + if skipIfNoContext { + return ValidationResult{} + } + return ValidationResult{ + ErrorMsg: fmt.Sprintf("Resource '%s' has not been discovered. Use pulse_query to find resources first.", resourceName), + } + } + + // First, try to find by alias (most common case - user refers to resources by name) + res, found := e.resolvedContext.GetResolvedResourceByAlias(resourceName) + if found { + // Check if action is allowed + allowedActions := res.GetAllowedActions() + if len(allowedActions) > 0 { + actionAllowed := false + for _, allowed := range allowedActions { + if allowed == action || allowed == "*" { + actionAllowed = true + break + } + } + if !actionAllowed { + return ValidationResult{ + Resource: res, + ErrorMsg: fmt.Sprintf("Action '%s' is not permitted for resource '%s'. Allowed actions: %v", action, resourceName, allowedActions), + } + } + } + return ValidationResult{Resource: res} + } + + // Try direct ID lookup (for when caller passes canonical ID) + res, found = e.resolvedContext.GetResolvedResourceByID(resourceName) + if found { + // Same action validation + allowedActions := res.GetAllowedActions() + if len(allowedActions) > 0 { + actionAllowed := false + for _, allowed := range allowedActions { + if allowed == action || allowed == "*" { + actionAllowed = true + break + } + } + if !actionAllowed { + return ValidationResult{ + Resource: res, + ErrorMsg: fmt.Sprintf("Action '%s' is not permitted for resource '%s'. Allowed actions: %v", action, resourceName, allowedActions), + } + } + } + return ValidationResult{Resource: res} + } + + // Resource not found + if requireHardValidation { + // Record telemetry for strict resolution block + if e.telemetryCallback != nil { + e.telemetryCallback.RecordStrictResolutionBlock("validateResolvedResource", action) + } + err := &ErrStrictResolution{ + ResourceID: resourceName, + Action: action, + Message: fmt.Sprintf("Resource '%s' has not been discovered in this session. Use pulse_query action=search to find it before performing '%s' action.", resourceName, action), + } + return ValidationResult{ + ErrorMsg: err.Message, + StrictError: err, + } + } + + // Allow operation if skipIfNoContext (backwards compat for soft validation) + if skipIfNoContext { + return ValidationResult{} + } + + return ValidationResult{ + ErrorMsg: fmt.Sprintf("Resource '%s' has not been discovered in this session. Use pulse_query action=search to find it first.", resourceName), + } +} + +// validateResolvedResourceForExec validates a resource for command execution. +// It uses command risk classification to determine if strict validation applies. +// +// Behavior in strict mode (PULSE_STRICT_RESOLUTION=true): +// - Read-only commands are allowed IF the session has ANY resolved context +// (prevents arbitrary host guessing while allowing diagnostic commands) +// - Write commands require the specific resource to be discovered first +// +// Behavior in normal mode: +// - All commands are allowed with soft validation (warning logs) +func (e *PulseToolExecutor) validateResolvedResourceForExec(resourceName, command string, skipIfNoContext bool) ValidationResult { + // Classify the command risk + risk := classifyCommandRisk(command) + + // For read-only commands in strict mode, allow if session has ANY resolved context + // This prevents arbitrary host guessing while still allowing diagnostic commands + // on hosts that have been discovered (even if not the specific resource) + if risk == CommandRiskReadOnly && isStrictResolutionEnabled() { + // Check if there's any resolved context at all + if e.resolvedContext != nil { + // Try to find the resource - if found, great + result := e.validateResolvedResource(resourceName, "query", true) + if result.Resource != nil { + return result + } + + // Resource not found, but we have some context - check if ANY host is discovered + // This is a scoped bypass: read-only commands allowed only if session is "active" + // (i.e., user has already done some discovery) + if e.hasAnyResolvedHost() { + // Allow read-only command with warning + return ValidationResult{ + ErrorMsg: fmt.Sprintf("Resource '%s' not explicitly discovered, but allowing read-only command due to existing session context", resourceName), + } + } + } + // No context at all - require discovery even for read-only in strict mode + // Record telemetry for strict resolution block + if e.telemetryCallback != nil { + e.telemetryCallback.RecordStrictResolutionBlock("validateResolvedResourceForExec", "exec (read-only)") + } + return ValidationResult{ + ErrorMsg: "No resources discovered in this session. Use pulse_query to discover resources first.", + StrictError: &ErrStrictResolution{ + ResourceID: resourceName, + Action: "exec (read-only)", + Message: fmt.Sprintf("Resource '%s' cannot be accessed. No resources have been discovered in this session. Use pulse_query action=search to discover available resources.", resourceName), + }, + } + } + + // For read-only commands in non-strict mode, use soft validation + if risk == CommandRiskReadOnly { + return e.validateResolvedResource(resourceName, "query", skipIfNoContext) + } + + // For write commands, use "exec" action which triggers strict validation + return e.validateResolvedResource(resourceName, "exec", skipIfNoContext) +} + +// hasAnyResolvedHost checks if there's at least one discovered resource in the session. +// This is used to scope read-only exec bypass - if the user has discovered ANY resource, +// we allow read-only commands to other resources (with warnings). +func (e *PulseToolExecutor) hasAnyResolvedHost() bool { + if e.resolvedContext == nil { + return false + } + return e.resolvedContext.HasAnyResources() +} + +// RoutingValidationResult holds the result of routing context validation. +type RoutingValidationResult struct { + RoutingError *ErrRoutingMismatch // Non-nil if routing mismatch detected +} + +// IsBlocked returns true if routing validation blocked the operation +func (r *RoutingValidationResult) IsBlocked() bool { + return r.RoutingError != nil +} + +// validateRoutingContext checks if a target_host should be a more specific resource. +// +// This validation prevents the model from accidentally operating on a parent Proxmox host +// when the user clearly intends to target a child resource (LXC/VM) on that host. +// +// IMPORTANT: This check is intentionally scoped to RECENTLY ACCESSED resources to avoid +// false positives. The logic is: +// +// - If target_host resolves directly to a resource in ResolvedContext → OK +// - If target_host is a Proxmox node AND the user RECENTLY referenced child resources +// on that node (within RecentAccessWindow) → block with ROUTING_MISMATCH +// +// This prevents blocking legitimate host-level operations like "apt update on @delly" +// while still catching the "user said @homepage-docker but model targets delly" scenario. +// +// The key insight: if the user explicitly mentioned a child resource in this turn/exchange, +// they probably intend to target that child, not the parent host. +func (e *PulseToolExecutor) validateRoutingContext(targetHost string) RoutingValidationResult { + // Skip if no state provider or resolved context + if e.stateProvider == nil || e.resolvedContext == nil { + return RoutingValidationResult{} + } + + // First, check if targetHost resolves directly to a resource in ResolvedContext + // If so, no routing mismatch - user is targeting the right thing + if res, found := e.resolvedContext.GetResolvedResourceByAlias(targetHost); found { + // Target matches a resolved resource directly - no mismatch + _ = res + return RoutingValidationResult{} + } + + // Check if targetHost is a Proxmox node (host) + state := e.stateProvider.GetState() + loc := state.ResolveResource(targetHost) + + // Only check for mismatch if targetHost is a Proxmox node (host type) + if !loc.Found || loc.ResourceType != "node" { + return RoutingValidationResult{} + } + + // targetHost is a Proxmox node. Check if ResolvedContext has RECENTLY ACCESSED + // child resources on this node (within the recent access window). + // This is the key refinement: we only block if the user recently referenced + // a child resource, implying they intended to target that child. + recentChildren := e.findRecentlyReferencedChildrenOnNode(loc.Node) + if len(recentChildren) == 0 { + return RoutingValidationResult{} + } + + // Extract names, IDs, and kinds for the error response + var childNames []string + var childIDs []string + var childKinds []string + for _, child := range recentChildren { + childNames = append(childNames, child.Name) + childIDs = append(childIDs, child.ResourceID) + childKinds = append(childKinds, child.Kind) + } + + // Record telemetry for routing mismatch block + // Use the first child kind for the label (we use small enums to avoid cardinality issues) + if e.telemetryCallback != nil && len(childKinds) > 0 { + e.telemetryCallback.RecordRoutingMismatchBlock("routing_validation", "node", childKinds[0]) + } + + // Rate-limited debug logging for support/debugging + // Logs: target_kind, child_kind, suggested_resource_id (no user paths to avoid cardinality) + logRoutingMismatchDebug(targetHost, childKinds, childIDs) + + // Found recently referenced child resources! Block with ROUTING_MISMATCH + return RoutingValidationResult{ + RoutingError: &ErrRoutingMismatch{ + TargetHost: targetHost, + MoreSpecificResources: childNames, + MoreSpecificIDs: childIDs, + ChildKinds: childKinds, + Message: fmt.Sprintf( + "target_host '%s' is a Proxmox node, but you recently referenced more specific resources on it: %v. "+ + "Did you mean to target one of these instead? File operations on a Proxmox host do NOT affect files inside LXC/VM guests.", + targetHost, childNames), + }, + } +} + +// recentChildInfo holds both the name and canonical ID of a recently referenced child resource. +type recentChildInfo struct { + Name string // Human-readable name + ResourceID string // Canonical ID (kind:host:id) + Kind string // Resource kind (lxc, vm, docker_container) for telemetry +} + +// findRecentlyReferencedChildrenOnNode returns the names and IDs of LXC/VM resources on a +// specific Proxmox node that were RECENTLY ACCESSED (within RecentAccessWindow). +// +// This is used by validateRoutingContext to detect when the user referenced a child resource +// in the current turn/exchange, indicating they probably intended to target that child. +func (e *PulseToolExecutor) findRecentlyReferencedChildrenOnNode(nodeName string) []recentChildInfo { + if e.resolvedContext == nil || e.stateProvider == nil { + return nil + } + + var children []recentChildInfo + state := e.stateProvider.GetState() + + // Check LXC containers on this node + for _, ct := range state.Containers { + if ct.Node != nodeName { + continue + } + // Check if this LXC is in the resolved context AND was recently accessed + if res, found := e.resolvedContext.GetResolvedResourceByAlias(ct.Name); found { + if res.GetKind() == "lxc" { + // Check if this resource was recently accessed + resourceID := res.GetResourceID() + if e.resolvedContext.WasRecentlyAccessed(resourceID, RecentAccessWindow) { + children = append(children, recentChildInfo{ + Name: ct.Name, + ResourceID: resourceID, + Kind: "lxc", + }) + } + } + } + } + + // Check VMs on this node + for _, vm := range state.VMs { + if vm.Node != nodeName { + continue + } + // Check if this VM is in the resolved context AND was recently accessed + if res, found := e.resolvedContext.GetResolvedResourceByAlias(vm.Name); found { + if res.GetKind() == "vm" { + // Check if this resource was recently accessed + resourceID := res.GetResourceID() + if e.resolvedContext.WasRecentlyAccessed(resourceID, RecentAccessWindow) { + children = append(children, recentChildInfo{ + Name: vm.Name, + ResourceID: resourceID, + Kind: "vm", + }) + } + } + } + } + + return children +} + +// logRoutingMismatchDebug logs routing mismatch details for debugging and support. +// Rate-limited to avoid log spam (at most once per 10 seconds). +// Only logs safe, low-cardinality fields: target_kind, child_kind, suggested_resource_id. +func logRoutingMismatchDebug(targetHost string, childKinds, childIDs []string) { + routingMismatchLogLimiter.mu.Lock() + defer routingMismatchLogLimiter.mu.Unlock() + + if time.Since(routingMismatchLogLimiter.lastLog) < routingMismatchLogLimiter.interval { + return // Rate limited + } + routingMismatchLogLimiter.lastLog = time.Now() + + // Get first child kind and ID for logging (safe, low cardinality) + childKind := "unknown" + suggestedID := "none" + if len(childKinds) > 0 { + childKind = childKinds[0] + } + if len(childIDs) > 0 { + suggestedID = childIDs[0] + } + + log.Debug(). + Str("event", "routing_mismatch_block"). + Str("target_kind", "node"). + Str("child_kind", childKind). + Str("suggested_resource_id", suggestedID). + Int("affected_children", len(childIDs)). + Msg("[RoutingValidation] Blocked operation targeting parent node when child recently referenced") +} + +// registerQueryTools registers the consolidated pulse_query tool func (e *PulseToolExecutor) registerQueryTools() { e.registry.Register(RegisteredTool{ Definition: Tool{ - Name: "pulse_get_capabilities", - Description: `Get server capabilities and connected agents. + Name: "pulse_query", + Description: `Query and search infrastructure resources. Start here to find resources by name. -Returns: JSON with control_level, enabled features, connected agent count and details. +Actions: +- search: Find resources by name, type, or status. Use this first when looking for a specific service/container/VM by name. +- get: Get detailed info about a specific resource (CPU, memory, status, host) +- config: Get VM/LXC configuration (disk, network, resources) +- topology: Get hierarchical infrastructure view +- list: List all infrastructure (lightweight overview) +- health: Check connection health for instances -Use when: You need to check if control features are enabled or verify agent connectivity before running commands. -Use this to confirm hostnames when choosing target_host.`, - InputSchema: InputSchema{ - Type: "object", - Properties: map[string]PropertySchema{}, - }, - }, - Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { - return exec.executeGetCapabilities(ctx) - }, - }) +When investigating applications (e.g., "check Jellyfin logs"): +1. Use action="search" with query="jellyfin" to find where it runs +2. Use pulse_discovery to get deep context (log paths, config locations) +3. Use pulse_control type="command" to run investigative commands - e.registry.Register(RegisteredTool{ - Definition: Tool{ - Name: "pulse_get_url_content", - Description: "Fetch content from a URL. Use to check if web services are responding or read API endpoints.", +Examples: +- Find a service: action="search", query="jellyfin" +- Get resource details: action="get", resource_type="docker", resource_id="nginx" +- List running VMs: action="list", type="vms", status="running"`, InputSchema: InputSchema{ Type: "object", Properties: map[string]PropertySchema{ - "url": { + "action": { Type: "string", - Description: "The URL to fetch", + Description: "Query action to perform", + Enum: []string{"search", "get", "config", "topology", "list", "health"}, + }, + "query": { + Type: "string", + Description: "Search query (for action: search)", + }, + "resource_type": { + Type: "string", + Description: "Resource type: 'vm', 'container', 'docker', 'node' (for action: get, config, search)", + Enum: []string{"vm", "container", "docker", "node"}, + }, + "resource_id": { + Type: "string", + Description: "Resource identifier (VMID or name) (for action: get, config)", }, - }, - Required: []string{"url"}, - }, - }, - Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { - return exec.executeGetURLContent(ctx, args) - }, - }) - - e.registry.Register(RegisteredTool{ - Definition: Tool{ - Name: "pulse_list_infrastructure", - Description: `Lightweight list of nodes, VMs, containers, and Docker hosts (summaries only). - -Returns: JSON with nodes/vms/containers/docker_hosts arrays (summaries) and total counts. - -Use when: You need a quick list or to find a resource without full topology. Prefer this over pulse_get_topology for large environments. Use filters to keep output small. -This is monitoring data from Pulse agents; prefer it over running commands for inventory or status checks. -Default: Docker container lists are capped (use max_docker_containers_per_host to expand).`, - InputSchema: InputSchema{ - Type: "object", - Properties: map[string]PropertySchema{ "type": { Type: "string", - Description: "Optional filter: nodes, vms, containers, or docker", + Description: "Filter by type (for action: list): 'nodes', 'vms', 'containers', 'docker'", Enum: []string{"nodes", "vms", "containers", "docker"}, }, "status": { Type: "string", - Description: "Optional status filter (e.g. running, stopped, online)", + Description: "Filter by status (for action: search, list)", + }, + "include": { + Type: "string", + Description: "Include filter for topology: 'all', 'proxmox', 'docker' (for action: topology)", + Enum: []string{"all", "proxmox", "docker"}, + }, + "summary_only": { + Type: "boolean", + Description: "Return only summary counts for topology (for action: topology)", + }, + "max_nodes": { + Type: "integer", + Description: "Max Proxmox nodes to include (for action: topology)", + }, + "max_vms_per_node": { + Type: "integer", + Description: "Max VMs per node (for action: topology)", + }, + "max_containers_per_node": { + Type: "integer", + Description: "Max containers per node (for action: topology)", + }, + "max_docker_hosts": { + Type: "integer", + Description: "Max Docker hosts (for action: topology)", }, "max_docker_containers_per_host": { Type: "integer", - Description: "Max Docker containers per host to include (default: 10)", + Description: "Max Docker containers per host (for action: topology, list)", }, "limit": { Type: "integer", @@ -97,358 +1893,34 @@ Default: Docker container lists are capped (use max_docker_containers_per_host t Description: "Number of results to skip", }, }, + Required: []string{"action"}, }, }, Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { - return exec.executeListInfrastructure(ctx, args) - }, - }) - - e.registry.Register(RegisteredTool{ - Definition: Tool{ - Name: "pulse_search_resources", - Description: `Search for resources by name or ID across nodes, VMs, containers, and Docker hosts/containers. - -Returns: JSON with compact matches (type, id, name, status, node/host). - -Use when: You need to locate a specific resource before calling pulse_get_resource or control tools. Use filters to keep output small. -This is monitoring data from Pulse agents; use it to identify targets before running commands.`, - InputSchema: InputSchema{ - Type: "object", - Properties: map[string]PropertySchema{ - "query": { - Type: "string", - Description: "Substring to match against names, IDs, or image (Docker)", - }, - "type": { - Type: "string", - Description: "Optional filter: node, vm, container, docker, or docker_host", - Enum: []string{"node", "vm", "container", "docker", "docker_host"}, - }, - "status": { - Type: "string", - Description: "Optional status/state filter", - }, - "limit": { - Type: "integer", - Description: "Maximum number of results (default: 20)", - }, - "offset": { - Type: "integer", - Description: "Number of results to skip", - }, - }, - Required: []string{"query"}, - }, - }, - Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { - return exec.executeSearchResources(ctx, args) - }, - }) - - e.registry.Register(RegisteredTool{ - Definition: Tool{ - Name: "pulse_get_topology", - Description: `Get live infrastructure state - all Proxmox nodes with VMs/LXC containers, and Docker hosts with containers. - -Returns: JSON with proxmox.nodes[] (each with vms[], containers[], status, agent_connected), docker.hosts[] (each with containers[], status), and summary counts. - -Use when: You need a full inventory or relationship view of infrastructure. -This is live monitoring data from Pulse agents; prefer it over running commands for status or inventory questions. - -For targeted lookups or simple status checks, prefer pulse_search_resources or pulse_list_infrastructure to keep output small. - -Options: Use include (proxmox/docker), summary_only, and max_* fields to reduce payload size. -Defaults (when max_* omitted): max_nodes=5, max_vms_per_node=5, max_containers_per_node=5, max_docker_hosts=3, max_docker_containers_per_host=5. - -This data is authoritative and updates every ~10 seconds. Trust it for status questions - no verification needed.`, - InputSchema: InputSchema{ - Type: "object", - Properties: map[string]PropertySchema{ - "include": { - Type: "string", - Description: "Optional: limit to proxmox or docker data (default: all)", - Enum: []string{"all", "proxmox", "docker"}, - }, - "summary_only": { - Type: "boolean", - Description: "If true, return only summary counts (no node/host lists)", - }, - "max_nodes": { - Type: "integer", - Description: "Max Proxmox nodes to include", - }, - "max_vms_per_node": { - Type: "integer", - Description: "Max VMs per node to include", - }, - "max_containers_per_node": { - Type: "integer", - Description: "Max containers per node to include", - }, - "max_docker_hosts": { - Type: "integer", - Description: "Max Docker hosts to include", - }, - "max_docker_containers_per_host": { - Type: "integer", - Description: "Max Docker containers per host to include", - }, - }, - }, - }, - Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { - return exec.executeGetTopology(ctx, args) - }, - }) - - e.registry.Register(RegisteredTool{ - Definition: Tool{ - Name: "pulse_set_resource_url", - Description: "Set the web URL for a resource in Pulse after discovering a web service.", - InputSchema: InputSchema{ - Type: "object", - Properties: map[string]PropertySchema{ - "resource_type": { - Type: "string", - Description: "Type of resource: 'guest', 'docker', or 'host'", - Enum: []string{"guest", "docker", "host"}, - }, - "resource_id": { - Type: "string", - Description: "The resource ID from context", - }, - "url": { - Type: "string", - Description: "The URL to set (empty to remove)", - }, - }, - Required: []string{"resource_type", "resource_id"}, - }, - }, - Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { - return exec.executeSetResourceURL(ctx, args) - }, - }) - - e.registry.Register(RegisteredTool{ - Definition: Tool{ - Name: "pulse_get_resource", - Description: `Get detailed information about a specific VM, LXC container, or Docker container. - -Returns: JSON with name, status, IPs, ports, labels, mounts, network config, CPU/memory stats. - -Use when: You need detailed info about ONE specific resource (IPs, ports, config) that topology does not provide. - -If you do not know the ID or name, use pulse_search_resources or pulse_list_infrastructure first. - -Note: For simple status checks, use pulse_search_resources or pulse_list_infrastructure instead of full topology.`, - InputSchema: InputSchema{ - Type: "object", - Properties: map[string]PropertySchema{ - "resource_type": { - Type: "string", - Description: "Type: 'vm' (Proxmox VM), 'container' (Proxmox LXC), or 'docker' (Docker container)", - Enum: []string{"vm", "container", "docker"}, - }, - "resource_id": { - Type: "string", - Description: "VMID number (e.g. '101') or container name", - }, - }, - Required: []string{"resource_type", "resource_id"}, - }, - }, - Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { - return exec.executeGetResource(ctx, args) + return exec.executeQuery(ctx, args) }, }) } -func (e *PulseToolExecutor) executeGetCapabilities(_ context.Context) (CallToolResult, error) { - var agents []AgentInfo - if e.agentServer != nil { - connectedAgents := e.agentServer.GetConnectedAgents() - for _, a := range connectedAgents { - agents = append(agents, AgentInfo{ - Hostname: a.Hostname, - Version: a.Version, - Platform: a.Platform, - ConnectedAt: a.ConnectedAt.Format("2006-01-02T15:04:05Z"), - }) - } +// executeQuery routes to the appropriate query handler based on action +func (e *PulseToolExecutor) executeQuery(ctx context.Context, args map[string]interface{}) (CallToolResult, error) { + action, _ := args["action"].(string) + switch action { + case "search": + return e.executeSearchResources(ctx, args) + case "get": + return e.executeGetResource(ctx, args) + case "config": + return e.executeGetGuestConfig(ctx, args) + case "topology": + return e.executeGetTopology(ctx, args) + case "list": + return e.executeListInfrastructure(ctx, args) + case "health": + return e.executeGetConnectionHealth(ctx, args) + default: + return NewErrorResult(fmt.Errorf("unknown action: %s. Use: search, get, config, topology, list, health", action)), nil } - - response := CapabilitiesResponse{ - ControlLevel: string(e.controlLevel), - Features: FeatureFlags{ - MetricsHistory: e.metricsHistory != nil, - Baselines: e.baselineProvider != nil, - Patterns: e.patternProvider != nil, - Alerts: e.alertProvider != nil, - Findings: e.findingsProvider != nil, - Backups: e.backupProvider != nil, - Storage: e.storageProvider != nil, - DiskHealth: e.diskHealthProvider != nil, - AgentProfiles: e.agentProfileManager != nil, - Control: e.controlLevel != ControlLevelReadOnly && e.controlLevel != "", - }, - ProtectedGuests: e.protectedGuests, - ConnectedAgents: len(agents), - Agents: agents, - Version: ServerVersion, - } - - return NewJSONResult(response), nil -} - -func (e *PulseToolExecutor) executeGetURLContent(ctx context.Context, args map[string]interface{}) (CallToolResult, error) { - if ctx == nil { - ctx = context.Background() - } - urlStr, _ := args["url"].(string) - if urlStr == "" { - return NewErrorResult(fmt.Errorf("url is required")), nil - } - - parsedURL, err := parseAndValidateFetchURL(ctx, urlStr) - if err != nil { - return NewJSONResult(URLFetchResponse{ - URL: urlStr, - Error: err.Error(), - }), nil - } - - client := &http.Client{ - Timeout: 30 * time.Second, - CheckRedirect: func(req *http.Request, via []*http.Request) error { - if len(via) >= 3 { - return fmt.Errorf("too many redirects") - } - if _, err := parseAndValidateFetchURL(ctx, req.URL.String()); err != nil { - return err - } - return nil - }, - } - - req, err := http.NewRequestWithContext(ctx, http.MethodGet, parsedURL.String(), nil) - if err != nil { - return NewJSONResult(URLFetchResponse{ - URL: urlStr, - Error: err.Error(), - }), nil - } - - resp, err := client.Do(req) - if err != nil { - return NewJSONResult(URLFetchResponse{ - URL: urlStr, - Error: err.Error(), - }), nil - } - defer resp.Body.Close() - - body, err := io.ReadAll(io.LimitReader(resp.Body, 50*1024)) - if err != nil { - return NewJSONResult(URLFetchResponse{ - URL: urlStr, - Error: fmt.Sprintf("error reading response: %v", err), - }), nil - } - - headers := make(map[string]string) - for k, v := range resp.Header { - if len(v) > 0 { - headers[k] = v[0] - } - } - - return NewJSONResult(URLFetchResponse{ - URL: urlStr, - StatusCode: resp.StatusCode, - Headers: headers, - Body: string(body), - }), nil -} - -func parseAndValidateFetchURL(ctx context.Context, urlStr string) (*url.URL, error) { - clean := strings.TrimSpace(urlStr) - if clean == "" { - return nil, fmt.Errorf("url is required") - } - - parsed, err := url.Parse(clean) - if err != nil { - return nil, fmt.Errorf("invalid URL: %w", err) - } - if !parsed.IsAbs() { - return nil, fmt.Errorf("URL must be absolute") - } - if parsed.Scheme != "http" && parsed.Scheme != "https" { - return nil, fmt.Errorf("only http/https URLs are allowed") - } - if parsed.User != nil { - return nil, fmt.Errorf("URLs with embedded credentials are not allowed") - } - if parsed.Fragment != "" { - return nil, fmt.Errorf("URL fragments are not allowed") - } - - host := parsed.Hostname() - if host == "" { - return nil, fmt.Errorf("URL must include a host") - } - - if isBlockedFetchHost(host) { - return nil, fmt.Errorf("URL host is blocked") - } - - if ip := net.ParseIP(host); ip != nil { - if isBlockedFetchIP(ip) { - return nil, fmt.Errorf("URL IP is blocked") - } - return parsed, nil - } - - if ctx == nil { - ctx = context.Background() - } - addrs, err := net.DefaultResolver.LookupIPAddr(ctx, host) - if err != nil { - return nil, fmt.Errorf("failed to resolve host: %w", err) - } - for _, addr := range addrs { - if isBlockedFetchIP(addr.IP) { - return nil, fmt.Errorf("URL host resolves to a blocked address") - } - } - - return parsed, nil -} - -func isBlockedFetchHost(host string) bool { - h := strings.TrimSpace(strings.ToLower(host)) - if h == "localhost" || h == "localhost." { - return true - } - return false -} - -func isBlockedFetchIP(ip net.IP) bool { - if ip == nil { - return true - } - if ip.IsLoopback() || ip.IsUnspecified() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() { - if ip.IsLoopback() && os.Getenv("PULSE_AI_ALLOW_LOOPBACK") == "true" { - return false - } - return true - } - if !ip.IsGlobalUnicast() && !ip.IsPrivate() { - return true - } - return false } func (e *PulseToolExecutor) executeListInfrastructure(_ context.Context, args map[string]interface{}) (CallToolResult, error) { @@ -874,41 +2346,6 @@ func (e *PulseToolExecutor) executeGetTopology(_ context.Context, args map[strin return NewJSONResult(response), nil } -func (e *PulseToolExecutor) executeSetResourceURL(_ context.Context, args map[string]interface{}) (CallToolResult, error) { - resourceType, _ := args["resource_type"].(string) - resourceID, _ := args["resource_id"].(string) - url, _ := args["url"].(string) - - if resourceType == "" { - return NewErrorResult(fmt.Errorf("resource_type is required")), nil - } - if resourceID == "" { - return NewErrorResult(fmt.Errorf("resource_id is required")), nil - } - - if e.metadataUpdater == nil { - return NewTextResult("Metadata updater not available."), nil - } - - if err := e.metadataUpdater.SetResourceURL(resourceType, resourceID, url); err != nil { - return NewErrorResult(err), nil - } - - result := map[string]interface{}{ - "success": true, - "resource_type": resourceType, - "resource_id": resourceID, - "url": url, - } - if url == "" { - result["action"] = "cleared" - } else { - result["action"] = "set" - } - - return NewJSONResult(result), nil -} - func (e *PulseToolExecutor) executeGetResource(_ context.Context, args map[string]interface{}) (CallToolResult, error) { resourceType, _ := args["resource_type"].(string) resourceID, _ := args["resource_id"].(string) @@ -957,6 +2394,24 @@ func (e *PulseToolExecutor) executeGetResource(_ context.Context, args map[strin Addresses: nic.Addresses, }) } + // Register in resolved context WITH explicit access (single-resource get operation) + e.registerResolvedResourceWithExplicitAccess(ResourceRegistration{ + Kind: "vm", + ProviderUID: fmt.Sprintf("%d", vm.VMID), // VMID is the stable provider ID + Name: vm.Name, + Aliases: []string{vm.Name, fmt.Sprintf("%d", vm.VMID), vm.ID}, + HostUID: vm.Node, + HostName: vm.Node, + VMID: vm.VMID, + Node: vm.Node, + LocationChain: []string{"node:" + vm.Node, "vm:" + vm.Name}, + Executors: []ExecutorRegistration{{ + ExecutorID: vm.Node, + Adapter: "qm", + Actions: []string{"query", "get", "logs", "console"}, + Priority: 10, + }}, + }) return NewJSONResult(response), nil } } @@ -996,6 +2451,24 @@ func (e *PulseToolExecutor) executeGetResource(_ context.Context, args map[strin Addresses: nic.Addresses, }) } + // Register in resolved context WITH explicit access (single-resource get operation) + e.registerResolvedResourceWithExplicitAccess(ResourceRegistration{ + Kind: "lxc", + ProviderUID: fmt.Sprintf("%d", ct.VMID), // VMID is the stable provider ID + Name: ct.Name, + Aliases: []string{ct.Name, fmt.Sprintf("%d", ct.VMID), ct.ID}, + HostUID: ct.Node, + HostName: ct.Node, + VMID: ct.VMID, + Node: ct.Node, + LocationChain: []string{"node:" + ct.Node, "lxc:" + ct.Name}, + Executors: []ExecutorRegistration{{ + ExecutorID: ct.Node, + Adapter: "pct", + Actions: []string{"query", "get", "logs", "console", "exec"}, + Priority: 10, + }}, + }) return NewJSONResult(response), nil } } @@ -1057,6 +2530,26 @@ func (e *PulseToolExecutor) executeGetResource(_ context.Context, args map[strin }) } + // Register in resolved context WITH explicit access (single-resource get operation) + aliases := []string{c.Name, c.ID} + if len(c.ID) > 12 { + aliases = append(aliases, c.ID[:12]) // Add short ID for longer IDs + } + e.registerResolvedResourceWithExplicitAccess(ResourceRegistration{ + Kind: "docker_container", + ProviderUID: c.ID, // Docker container ID is the stable provider ID + Name: c.Name, + Aliases: aliases, + HostUID: host.ID, + HostName: host.Hostname, + LocationChain: []string{"host:" + host.Hostname, "docker:" + c.Name}, + Executors: []ExecutorRegistration{{ + ExecutorID: host.Hostname, + Adapter: "docker", + Actions: []string{"query", "get", "logs", "exec", "restart", "stop", "start"}, + Priority: 10, + }}, + }) return NewJSONResult(response), nil } } @@ -1072,6 +2565,220 @@ func (e *PulseToolExecutor) executeGetResource(_ context.Context, args map[strin } } +func (e *PulseToolExecutor) executeGetGuestConfig(_ context.Context, args map[string]interface{}) (CallToolResult, error) { + resourceType, _ := args["resource_type"].(string) + resourceID, _ := args["resource_id"].(string) + + if resourceType == "" { + return NewErrorResult(fmt.Errorf("resource_type is required")), nil + } + if resourceID == "" { + return NewErrorResult(fmt.Errorf("resource_id is required")), nil + } + if e.stateProvider == nil { + return NewTextResult("State information not available."), nil + } + if e.guestConfigProvider == nil { + return NewTextResult("Guest configuration not available."), nil + } + + state := e.stateProvider.GetState() + guestType, vmid, name, node, instance, err := resolveGuestFromState(state, resourceType, resourceID) + if err != nil { + return NewErrorResult(err), nil + } + + rawConfig, err := e.guestConfigProvider.GetGuestConfig(guestType, instance, node, vmid) + if err != nil { + return NewErrorResult(err), nil + } + + response := GuestConfigResponse{ + GuestType: guestType, + VMID: vmid, + Name: name, + Node: node, + Instance: instance, + } + + switch guestType { + case "container", "lxc": + hostname, osType, onboot, rootfs, mounts := parseContainerConfig(rawConfig) + response.Hostname = hostname + response.OSType = osType + response.Onboot = onboot + response.RootFS = rootfs + response.Mounts = mounts + case "vm": + osType, onboot, disks := parseVMConfig(rawConfig) + response.OSType = osType + response.Onboot = onboot + response.Disks = disks + default: + return NewErrorResult(fmt.Errorf("unsupported guest type: %s", guestType)), nil + } + + return NewJSONResult(response), nil +} + +func resolveGuestFromState(state models.StateSnapshot, resourceType, resourceID string) (guestType string, vmid int, name, node, instance string, err error) { + resourceType = strings.ToLower(strings.TrimSpace(resourceType)) + resourceID = strings.TrimSpace(resourceID) + if resourceType == "" || resourceID == "" { + return "", 0, "", "", "", fmt.Errorf("resource_type and resource_id are required") + } + + switch resourceType { + case "container", "lxc": + for _, ct := range state.Containers { + if fmt.Sprintf("%d", ct.VMID) == resourceID || ct.Name == resourceID || ct.ID == resourceID { + return "container", ct.VMID, ct.Name, ct.Node, ct.Instance, nil + } + } + return "", 0, "", "", "", fmt.Errorf("container not found: %s", resourceID) + case "vm": + for _, vm := range state.VMs { + if fmt.Sprintf("%d", vm.VMID) == resourceID || vm.Name == resourceID || vm.ID == resourceID { + return "vm", vm.VMID, vm.Name, vm.Node, vm.Instance, nil + } + } + return "", 0, "", "", "", fmt.Errorf("vm not found: %s", resourceID) + default: + return "", 0, "", "", "", fmt.Errorf("invalid resource_type: %s", resourceType) + } +} + +func parseContainerConfig(config map[string]interface{}) (hostname, osType string, onboot *bool, rootfs string, mounts []GuestMountConfig) { + if len(config) == 0 { + return "", "", nil, "", nil + } + + for key, value := range config { + lowerKey := strings.ToLower(strings.TrimSpace(key)) + switch lowerKey { + case "hostname": + hostname = strings.TrimSpace(fmt.Sprint(value)) + case "ostype": + osType = strings.TrimSpace(fmt.Sprint(value)) + case "onboot": + onboot = parseOnbootValue(value) + } + if lowerKey != "rootfs" && !strings.HasPrefix(lowerKey, "mp") { + continue + } + + raw := strings.TrimSpace(fmt.Sprint(value)) + if raw == "" { + continue + } + + source, mountpoint := parseMountValue(raw) + if lowerKey == "rootfs" { + rootfs = source + if mountpoint == "" { + mountpoint = "/" + } + } + + mounts = append(mounts, GuestMountConfig{ + Key: lowerKey, + Source: source, + Mountpoint: mountpoint, + }) + } + + if len(mounts) > 1 { + sort.Slice(mounts, func(i, j int) bool { + return mounts[i].Key < mounts[j].Key + }) + } + + return hostname, osType, onboot, rootfs, mounts +} + +func parseVMConfig(config map[string]interface{}) (osType string, onboot *bool, disks []GuestDiskConfig) { + if len(config) == 0 { + return "", nil, nil + } + + for key, value := range config { + lowerKey := strings.ToLower(strings.TrimSpace(key)) + switch lowerKey { + case "ostype": + osType = strings.TrimSpace(fmt.Sprint(value)) + case "onboot": + onboot = parseOnbootValue(value) + } + if !isVMConfigDiskKey(lowerKey) { + continue + } + raw := strings.TrimSpace(fmt.Sprint(value)) + if raw == "" { + continue + } + disks = append(disks, GuestDiskConfig{ + Key: lowerKey, + Value: raw, + }) + } + + if len(disks) > 1 { + sort.Slice(disks, func(i, j int) bool { + return disks[i].Key < disks[j].Key + }) + } + + return osType, onboot, disks +} + +func isVMConfigDiskKey(key string) bool { + if strings.HasPrefix(key, "scsi") || + strings.HasPrefix(key, "virtio") || + strings.HasPrefix(key, "sata") || + strings.HasPrefix(key, "ide") || + strings.HasPrefix(key, "unused") || + strings.HasPrefix(key, "efidisk") || + strings.HasPrefix(key, "tpmstate") { + return true + } + return false +} + +func parseMountValue(raw string) (source, mountpoint string) { + parts := strings.Split(raw, ",") + if len(parts) > 0 { + source = strings.TrimSpace(parts[0]) + } + for _, part := range parts[1:] { + kv := strings.SplitN(strings.TrimSpace(part), "=", 2) + if len(kv) != 2 { + continue + } + k := strings.ToLower(strings.TrimSpace(kv[0])) + v := strings.TrimSpace(kv[1]) + if k == "mp" || k == "mountpoint" { + mountpoint = v + } + } + return source, mountpoint +} + +func parseOnbootValue(value interface{}) *bool { + raw := strings.TrimSpace(fmt.Sprint(value)) + if raw == "" { + return nil + } + if raw == "1" || strings.EqualFold(raw, "yes") || strings.EqualFold(raw, "true") { + val := true + return &val + } + if raw == "0" || strings.EqualFold(raw, "no") || strings.EqualFold(raw, "false") { + val := false + return &val + } + return nil +} + func (e *PulseToolExecutor) executeSearchResources(_ context.Context, args map[string]interface{}) (CallToolResult, error) { if e.stateProvider == nil { return NewTextResult("State provider not available."), nil @@ -1084,6 +2791,10 @@ func (e *PulseToolExecutor) executeSearchResources(_ context.Context, args map[s } typeFilter, _ := args["type"].(string) + // Map resource_type to type for search + if typeFilter == "" { + typeFilter, _ = args["resource_type"].(string) + } statusFilter, _ := args["status"].(string) limit := intArg(args, "limit", 20) offset := intArg(args, "offset", 0) @@ -1106,21 +2817,81 @@ func (e *PulseToolExecutor) executeSearchResources(_ context.Context, args map[s return NewErrorResult(fmt.Errorf("invalid type: %s. Use node, vm, container, docker, or docker_host", typeFilter)), nil } + // normalizeForSearch replaces common separators with spaces for fuzzy matching + normalizeForSearch := func(s string) string { + s = strings.ToLower(s) + s = strings.ReplaceAll(s, "-", " ") + s = strings.ReplaceAll(s, "_", " ") + s = strings.ReplaceAll(s, ".", " ") + return s + } + matchesQuery := func(query string, candidates ...string) bool { + queryNorm := normalizeForSearch(query) + queryWords := strings.Fields(queryNorm) + for _, candidate := range candidates { if candidate == "" { continue } - if strings.Contains(strings.ToLower(candidate), query) { + candidateNorm := normalizeForSearch(candidate) + + // Direct substring match (normalized) + if strings.Contains(candidateNorm, queryNorm) { return true } + + // All query words must be present in candidate + if len(queryWords) > 0 { + allMatch := true + for _, word := range queryWords { + if !strings.Contains(candidateNorm, word) { + allMatch = false + break + } + } + if allMatch { + return true + } + } } return false } + // Helper to collect IP addresses from guest network interfaces + collectGuestIPs := func(interfaces []models.GuestNetworkInterface) []string { + var ips []string + for _, iface := range interfaces { + ips = append(ips, iface.Addresses...) + } + return ips + } + + // Helper to collect IP addresses from Docker container networks + collectDockerIPs := func(networks []models.DockerContainerNetworkLink) []string { + var ips []string + for _, net := range networks { + if net.IPv4 != "" { + ips = append(ips, net.IPv4) + } + if net.IPv6 != "" { + ips = append(ips, net.IPv6) + } + } + return ips + } + queryLower := strings.ToLower(query) state := e.stateProvider.GetState() + // Build a set of connected agent hostnames for quick lookup + connectedAgentHostnames := make(map[string]bool) + if e.agentServer != nil { + for _, agent := range e.agentServer.GetConnectedAgents() { + connectedAgentHostnames[agent.Hostname] = true + } + } + matches := make([]ResourceMatch, 0, limit) total := 0 @@ -1146,10 +2917,11 @@ func (e *PulseToolExecutor) executeSearchResources(_ context.Context, args map[s continue } addMatch(ResourceMatch{ - Type: "node", - ID: node.ID, - Name: node.Name, - Status: node.Status, + Type: "node", + ID: node.ID, + Name: node.Name, + Status: node.Status, + AgentConnected: connectedAgentHostnames[node.Name], }) } } @@ -1159,16 +2931,24 @@ func (e *PulseToolExecutor) executeSearchResources(_ context.Context, args map[s if statusFilter != "" && !strings.EqualFold(vm.Status, statusFilter) { continue } - if !matchesQuery(queryLower, vm.Name, vm.ID, fmt.Sprintf("%d", vm.VMID)) { + // Build searchable candidates: name, ID, VMID, IPs, tags + candidates := []string{vm.Name, vm.ID, fmt.Sprintf("%d", vm.VMID)} + candidates = append(candidates, vm.IPAddresses...) + candidates = append(candidates, vm.Tags...) + candidates = append(candidates, collectGuestIPs(vm.NetworkInterfaces)...) + + if !matchesQuery(queryLower, candidates...) { continue } addMatch(ResourceMatch{ - Type: "vm", - ID: vm.ID, - Name: vm.Name, - Status: vm.Status, - Node: vm.Node, - VMID: vm.VMID, + Type: "vm", + ID: vm.ID, + Name: vm.Name, + Status: vm.Status, + Node: vm.Node, + NodeHasAgent: connectedAgentHostnames[vm.Node], + VMID: vm.VMID, + AgentConnected: connectedAgentHostnames[vm.Name], }) } } @@ -1178,16 +2958,24 @@ func (e *PulseToolExecutor) executeSearchResources(_ context.Context, args map[s if statusFilter != "" && !strings.EqualFold(ct.Status, statusFilter) { continue } - if !matchesQuery(queryLower, ct.Name, ct.ID, fmt.Sprintf("%d", ct.VMID)) { + // Build searchable candidates: name, ID, VMID, IPs, tags + candidates := []string{ct.Name, ct.ID, fmt.Sprintf("%d", ct.VMID)} + candidates = append(candidates, ct.IPAddresses...) + candidates = append(candidates, ct.Tags...) + candidates = append(candidates, collectGuestIPs(ct.NetworkInterfaces)...) + + if !matchesQuery(queryLower, candidates...) { continue } addMatch(ResourceMatch{ - Type: "container", - ID: ct.ID, - Name: ct.Name, - Status: ct.Status, - Node: ct.Node, - VMID: ct.VMID, + Type: "container", + ID: ct.ID, + Name: ct.Name, + Status: ct.Status, + Node: ct.Node, + NodeHasAgent: connectedAgentHostnames[ct.Node], + VMID: ct.VMID, + AgentConnected: connectedAgentHostnames[ct.Name], }) } } @@ -1223,7 +3011,11 @@ func (e *PulseToolExecutor) executeSearchResources(_ context.Context, args map[s if statusFilter != "" && !strings.EqualFold(c.State, statusFilter) { continue } - if !matchesQuery(queryLower, c.Name, c.ID, c.Image) { + // Build searchable candidates: name, ID, image, IPs + candidates := []string{c.Name, c.ID, c.Image} + candidates = append(candidates, collectDockerIPs(c.Networks)...) + + if !matchesQuery(queryLower, candidates...) { continue } addMatch(ResourceMatch{ @@ -1252,6 +3044,102 @@ func (e *PulseToolExecutor) executeSearchResources(_ context.Context, args map[s } } + // Register all found resources in the resolved context + // This enables action tools to validate that commands target legitimate resources + for _, match := range matches { + var reg ResourceRegistration + + switch match.Type { + case "node": + reg = ResourceRegistration{ + Kind: "node", + ProviderUID: match.ID, // Node ID is the provider UID + Name: match.Name, + Aliases: []string{match.Name, match.ID}, + HostName: match.Name, + LocationChain: []string{"node:" + match.Name}, + Executors: []ExecutorRegistration{{ + ExecutorID: match.Name, + Adapter: "direct", + Actions: []string{"query", "get", "exec"}, + Priority: 10, + }}, + } + case "vm": + reg = ResourceRegistration{ + Kind: "vm", + ProviderUID: fmt.Sprintf("%d", match.VMID), + Name: match.Name, + Aliases: []string{match.Name, fmt.Sprintf("%d", match.VMID), match.ID}, + HostUID: match.Node, + HostName: match.Node, + VMID: match.VMID, + Node: match.Node, + LocationChain: []string{"node:" + match.Node, "vm:" + match.Name}, + Executors: []ExecutorRegistration{{ + ExecutorID: match.Node, + Adapter: "qm", + Actions: []string{"query", "get", "logs", "console"}, + Priority: 10, + }}, + } + case "container": + reg = ResourceRegistration{ + Kind: "lxc", + ProviderUID: fmt.Sprintf("%d", match.VMID), + Name: match.Name, + Aliases: []string{match.Name, fmt.Sprintf("%d", match.VMID), match.ID}, + HostUID: match.Node, + HostName: match.Node, + VMID: match.VMID, + Node: match.Node, + LocationChain: []string{"node:" + match.Node, "lxc:" + match.Name}, + Executors: []ExecutorRegistration{{ + ExecutorID: match.Node, + Adapter: "pct", + Actions: []string{"query", "get", "logs", "console", "exec"}, + Priority: 10, + }}, + } + case "docker_host": + reg = ResourceRegistration{ + Kind: "docker_host", + ProviderUID: match.ID, + Name: match.Name, + Aliases: []string{match.Name, match.ID, match.Host}, + HostUID: match.Host, + HostName: match.Host, + LocationChain: []string{"host:" + match.Host}, + Executors: []ExecutorRegistration{{ + ExecutorID: match.Host, + Adapter: "direct", + Actions: []string{"query", "get"}, + Priority: 10, + }}, + } + case "docker": + reg = ResourceRegistration{ + Kind: "docker_container", + ProviderUID: match.ID, // Docker container ID + Name: match.Name, + Aliases: []string{match.Name, match.ID}, + HostUID: match.Host, + HostName: match.Host, + LocationChain: []string{"host:" + match.Host, "docker:" + match.Name}, + Executors: []ExecutorRegistration{{ + ExecutorID: match.Host, + Adapter: "docker", + Actions: []string{"query", "get", "logs", "exec", "restart", "stop", "start"}, + Priority: 10, + }}, + } + default: + continue // Skip unknown types + } + + e.registerResolvedResource(reg) + } + return NewJSONResult(response), nil } @@ -1269,402 +3157,3 @@ func intArg(args map[string]interface{}, key string, defaultVal int) int { } return defaultVal } - -// ========== Kubernetes Tools ========== - -// registerKubernetesTools registers Kubernetes query tools -func (e *PulseToolExecutor) registerKubernetesTools() { - e.registry.Register(RegisteredTool{ - Definition: Tool{ - Name: "pulse_get_kubernetes_clusters", - Description: `List Kubernetes clusters monitored by Pulse with health summary. - -Returns: JSON with clusters array containing id, name, status, version, node count, pod count, deployment count. - -Use when: User asks about Kubernetes clusters, wants an overview of K8s infrastructure, or needs to find a specific cluster.`, - InputSchema: InputSchema{ - Type: "object", - Properties: map[string]PropertySchema{}, - }, - }, - Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { - return exec.executeGetKubernetesClusters(ctx) - }, - }) - - e.registry.Register(RegisteredTool{ - Definition: Tool{ - Name: "pulse_get_kubernetes_nodes", - Description: `List nodes in a Kubernetes cluster with capacity and status. - -Returns: JSON with nodes array containing name, ready status, roles, kubelet version, capacity (CPU, memory, pods), allocatable resources. - -Use when: User asks about Kubernetes nodes, node health, or cluster capacity.`, - InputSchema: InputSchema{ - Type: "object", - Properties: map[string]PropertySchema{ - "cluster": { - Type: "string", - Description: "Cluster name or ID (required)", - }, - }, - Required: []string{"cluster"}, - }, - }, - Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { - return exec.executeGetKubernetesNodes(ctx, args) - }, - }) - - e.registry.Register(RegisteredTool{ - Definition: Tool{ - Name: "pulse_get_kubernetes_pods", - Description: `List pods in a Kubernetes cluster, optionally filtered by namespace or status. - -Returns: JSON with pods array containing name, namespace, node, phase, restarts, containers with their states. - -Use when: User asks about pods, wants to find a specific pod, or check pod health in a namespace.`, - InputSchema: InputSchema{ - Type: "object", - Properties: map[string]PropertySchema{ - "cluster": { - Type: "string", - Description: "Cluster name or ID (required)", - }, - "namespace": { - Type: "string", - Description: "Optional: filter by namespace", - }, - "status": { - Type: "string", - Description: "Optional: filter by pod phase (Running, Pending, Failed, Succeeded)", - }, - "limit": { - Type: "integer", - Description: "Maximum number of results (default: 100)", - }, - "offset": { - Type: "integer", - Description: "Number of results to skip", - }, - }, - Required: []string{"cluster"}, - }, - }, - Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { - return exec.executeGetKubernetesPods(ctx, args) - }, - }) - - e.registry.Register(RegisteredTool{ - Definition: Tool{ - Name: "pulse_get_kubernetes_deployments", - Description: `List deployments in a Kubernetes cluster with replica status. - -Returns: JSON with deployments array containing name, namespace, desired/ready/available/updated replicas. - -Use when: User asks about deployments, wants to check deployment health, or find unhealthy deployments.`, - InputSchema: InputSchema{ - Type: "object", - Properties: map[string]PropertySchema{ - "cluster": { - Type: "string", - Description: "Cluster name or ID (required)", - }, - "namespace": { - Type: "string", - Description: "Optional: filter by namespace", - }, - "limit": { - Type: "integer", - Description: "Maximum number of results (default: 100)", - }, - "offset": { - Type: "integer", - Description: "Number of results to skip", - }, - }, - Required: []string{"cluster"}, - }, - }, - Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { - return exec.executeGetKubernetesDeployments(ctx, args) - }, - }) -} - -func (e *PulseToolExecutor) executeGetKubernetesClusters(_ context.Context) (CallToolResult, error) { - if e.stateProvider == nil { - return NewTextResult("State provider not available."), nil - } - - state := e.stateProvider.GetState() - - if len(state.KubernetesClusters) == 0 { - return NewTextResult("No Kubernetes clusters found. Kubernetes monitoring may not be configured."), nil - } - - var clusters []KubernetesClusterSummary - for _, c := range state.KubernetesClusters { - readyNodes := 0 - for _, node := range c.Nodes { - if node.Ready { - readyNodes++ - } - } - - displayName := c.DisplayName - if c.CustomDisplayName != "" { - displayName = c.CustomDisplayName - } - - clusters = append(clusters, KubernetesClusterSummary{ - ID: c.ID, - Name: c.Name, - DisplayName: displayName, - Server: c.Server, - Version: c.Version, - Status: c.Status, - NodeCount: len(c.Nodes), - PodCount: len(c.Pods), - DeploymentCount: len(c.Deployments), - ReadyNodes: readyNodes, - }) - } - - response := KubernetesClustersResponse{ - Clusters: clusters, - Total: len(clusters), - } - - return NewJSONResult(response), nil -} - -func (e *PulseToolExecutor) executeGetKubernetesNodes(_ context.Context, args map[string]interface{}) (CallToolResult, error) { - if e.stateProvider == nil { - return NewTextResult("State provider not available."), nil - } - - clusterArg, _ := args["cluster"].(string) - if clusterArg == "" { - return NewErrorResult(fmt.Errorf("cluster is required")), nil - } - - state := e.stateProvider.GetState() - - // Find the cluster (also match CustomDisplayName) - var cluster *KubernetesClusterSummary - for _, c := range state.KubernetesClusters { - if c.ID == clusterArg || c.Name == clusterArg || c.DisplayName == clusterArg || c.CustomDisplayName == clusterArg { - displayName := c.DisplayName - if c.CustomDisplayName != "" { - displayName = c.CustomDisplayName - } - cluster = &KubernetesClusterSummary{ - ID: c.ID, - Name: c.Name, - DisplayName: displayName, - } - - var nodes []KubernetesNodeSummary - for _, node := range c.Nodes { - nodes = append(nodes, KubernetesNodeSummary{ - UID: node.UID, - Name: node.Name, - Ready: node.Ready, - Unschedulable: node.Unschedulable, - Roles: node.Roles, - KubeletVersion: node.KubeletVersion, - ContainerRuntimeVersion: node.ContainerRuntimeVersion, - OSImage: node.OSImage, - Architecture: node.Architecture, - CapacityCPU: node.CapacityCPU, - CapacityMemoryBytes: node.CapacityMemoryBytes, - CapacityPods: node.CapacityPods, - AllocatableCPU: node.AllocCPU, - AllocatableMemoryBytes: node.AllocMemoryBytes, - AllocatablePods: node.AllocPods, - }) - } - - response := KubernetesNodesResponse{ - Cluster: cluster.DisplayName, - Nodes: nodes, - Total: len(nodes), - } - if response.Nodes == nil { - response.Nodes = []KubernetesNodeSummary{} - } - return NewJSONResult(response), nil - } - } - - return NewTextResult(fmt.Sprintf("Kubernetes cluster '%s' not found.", clusterArg)), nil -} - -func (e *PulseToolExecutor) executeGetKubernetesPods(_ context.Context, args map[string]interface{}) (CallToolResult, error) { - if e.stateProvider == nil { - return NewTextResult("State provider not available."), nil - } - - clusterArg, _ := args["cluster"].(string) - if clusterArg == "" { - return NewErrorResult(fmt.Errorf("cluster is required")), nil - } - - namespaceFilter, _ := args["namespace"].(string) - statusFilter, _ := args["status"].(string) - limit := intArg(args, "limit", 100) - offset := intArg(args, "offset", 0) - - state := e.stateProvider.GetState() - - // Find the cluster (also match CustomDisplayName) - for _, c := range state.KubernetesClusters { - if c.ID == clusterArg || c.Name == clusterArg || c.DisplayName == clusterArg || c.CustomDisplayName == clusterArg { - displayName := c.DisplayName - if c.CustomDisplayName != "" { - displayName = c.CustomDisplayName - } - - var pods []KubernetesPodSummary - totalPods := 0 - filteredCount := 0 - - for _, pod := range c.Pods { - // Apply filters - if namespaceFilter != "" && pod.Namespace != namespaceFilter { - continue - } - if statusFilter != "" && !strings.EqualFold(pod.Phase, statusFilter) { - continue - } - - filteredCount++ - - // Apply pagination - if totalPods < offset { - totalPods++ - continue - } - if len(pods) >= limit { - totalPods++ - continue - } - - var containers []KubernetesPodContainerSummary - for _, container := range pod.Containers { - containers = append(containers, KubernetesPodContainerSummary{ - Name: container.Name, - Ready: container.Ready, - State: container.State, - RestartCount: container.RestartCount, - Reason: container.Reason, - }) - } - - pods = append(pods, KubernetesPodSummary{ - UID: pod.UID, - Name: pod.Name, - Namespace: pod.Namespace, - NodeName: pod.NodeName, - Phase: pod.Phase, - Reason: pod.Reason, - Restarts: pod.Restarts, - QoSClass: pod.QoSClass, - OwnerKind: pod.OwnerKind, - OwnerName: pod.OwnerName, - Containers: containers, - }) - totalPods++ - } - - response := KubernetesPodsResponse{ - Cluster: displayName, - Pods: pods, - Total: len(c.Pods), - Filtered: filteredCount, - } - if response.Pods == nil { - response.Pods = []KubernetesPodSummary{} - } - return NewJSONResult(response), nil - } - } - - return NewTextResult(fmt.Sprintf("Kubernetes cluster '%s' not found.", clusterArg)), nil -} - -func (e *PulseToolExecutor) executeGetKubernetesDeployments(_ context.Context, args map[string]interface{}) (CallToolResult, error) { - if e.stateProvider == nil { - return NewTextResult("State provider not available."), nil - } - - clusterArg, _ := args["cluster"].(string) - if clusterArg == "" { - return NewErrorResult(fmt.Errorf("cluster is required")), nil - } - - namespaceFilter, _ := args["namespace"].(string) - limit := intArg(args, "limit", 100) - offset := intArg(args, "offset", 0) - - state := e.stateProvider.GetState() - - // Find the cluster (also match CustomDisplayName) - for _, c := range state.KubernetesClusters { - if c.ID == clusterArg || c.Name == clusterArg || c.DisplayName == clusterArg || c.CustomDisplayName == clusterArg { - displayName := c.DisplayName - if c.CustomDisplayName != "" { - displayName = c.CustomDisplayName - } - - var deployments []KubernetesDeploymentSummary - filteredCount := 0 - count := 0 - - for _, dep := range c.Deployments { - // Apply namespace filter - if namespaceFilter != "" && dep.Namespace != namespaceFilter { - continue - } - - filteredCount++ - - // Apply pagination - if count < offset { - count++ - continue - } - if len(deployments) >= limit { - count++ - continue - } - - deployments = append(deployments, KubernetesDeploymentSummary{ - UID: dep.UID, - Name: dep.Name, - Namespace: dep.Namespace, - DesiredReplicas: dep.DesiredReplicas, - ReadyReplicas: dep.ReadyReplicas, - AvailableReplicas: dep.AvailableReplicas, - UpdatedReplicas: dep.UpdatedReplicas, - }) - count++ - } - - response := KubernetesDeploymentsResponse{ - Cluster: displayName, - Deployments: deployments, - Total: len(c.Deployments), - Filtered: filteredCount, - } - if response.Deployments == nil { - response.Deployments = []KubernetesDeploymentSummary{} - } - return NewJSONResult(response), nil - } - } - - return NewTextResult(fmt.Sprintf("Kubernetes cluster '%s' not found.", clusterArg)), nil -} diff --git a/internal/ai/tools/tools_query_test.go b/internal/ai/tools/tools_query_test.go index 1c645a3d5..4592a864d 100644 --- a/internal/ai/tools/tools_query_test.go +++ b/internal/ai/tools/tools_query_test.go @@ -3,80 +3,12 @@ package tools import ( "context" "encoding/json" - "net" - "net/http" - "net/http/httptest" "testing" "github.com/rcourtman/pulse-go-rewrite/internal/agentexec" "github.com/rcourtman/pulse-go-rewrite/internal/models" ) -func TestExecuteGetCapabilities(t *testing.T) { - executor := NewPulseToolExecutor(ExecutorConfig{ - StateProvider: &mockStateProvider{}, - AgentServer: &mockAgentServer{ - agents: []agentexec.ConnectedAgent{ - {Hostname: "host1", Version: "1.0", Platform: "linux"}, - }, - }, - MetricsHistory: &mockMetricsHistoryProvider{}, - BaselineProvider: &BaselineMCPAdapter{}, - PatternProvider: &PatternMCPAdapter{}, - AlertProvider: &mockAlertProvider{}, - FindingsProvider: &mockFindingsProvider{}, - ControlLevel: ControlLevelControlled, - ProtectedGuests: []string{"100"}, - }) - - result, err := executor.executeGetCapabilities(context.Background()) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var response CapabilitiesResponse - if err := json.Unmarshal([]byte(result.Content[0].Text), &response); err != nil { - t.Fatalf("decode response: %v", err) - } - if response.ControlLevel != string(ControlLevelControlled) || response.ConnectedAgents != 1 { - t.Fatalf("unexpected response: %+v", response) - } - if !response.Features.Control || !response.Features.MetricsHistory { - t.Fatalf("unexpected features: %+v", response.Features) - } -} - -func TestExecuteGetURLContent(t *testing.T) { - t.Setenv("PULSE_AI_ALLOW_LOOPBACK", "true") - - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("X-Test", "ok") - w.WriteHeader(http.StatusOK) - w.Write([]byte("hello")) - })) - defer server.Close() - - executor := NewPulseToolExecutor(ExecutorConfig{StateProvider: &mockStateProvider{}}) - - if result, _ := executor.executeGetURLContent(context.Background(), map[string]interface{}{}); !result.IsError { - t.Fatal("expected error when url missing") - } - - result, err := executor.executeGetURLContent(context.Background(), map[string]interface{}{ - "url": server.URL, - }) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - var response URLFetchResponse - if err := json.Unmarshal([]byte(result.Content[0].Text), &response); err != nil { - t.Fatalf("decode response: %v", err) - } - if response.StatusCode != http.StatusOK || response.Headers["X-Test"] != "ok" { - t.Fatalf("unexpected response: %+v", response) - } -} - func TestExecuteListInfrastructureAndTopology(t *testing.T) { state := models.StateSnapshot{ Nodes: []models.Node{{ID: "node1", Name: "node1", Status: "online"}}, @@ -261,34 +193,7 @@ func TestExecuteSearchResources_Errors(t *testing.T) { } } -func TestExecuteSetResourceURLAndGetResource(t *testing.T) { - executor := NewPulseToolExecutor(ExecutorConfig{StateProvider: &mockStateProvider{}}) - - if result, _ := executor.executeSetResourceURL(context.Background(), map[string]interface{}{}); !result.IsError { - t.Fatal("expected error when resource_type missing") - } - - updater := &fakeMetadataUpdater{} - executor.metadataUpdater = updater - result, err := executor.executeSetResourceURL(context.Background(), map[string]interface{}{ - "resource_type": "guest", - "resource_id": "100", - "url": "http://example", - }) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(updater.resourceArgs) != 3 || updater.resourceArgs[2] != "http://example" { - t.Fatalf("unexpected updater args: %+v", updater.resourceArgs) - } - var setResp map[string]interface{} - if err := json.Unmarshal([]byte(result.Content[0].Text), &setResp); err != nil { - t.Fatalf("decode set response: %v", err) - } - if setResp["action"] != "set" { - t.Fatalf("unexpected set response: %+v", setResp) - } - +func TestExecuteGetResource(t *testing.T) { state := models.StateSnapshot{ VMs: []models.VM{{ID: "vm1", VMID: 100, Name: "vm1", Status: "running", Node: "node1"}}, Containers: []models.Container{{ID: "ct1", VMID: 200, Name: "ct1", Status: "running", Node: "node1"}}, @@ -302,7 +207,9 @@ func TestExecuteSetResourceURLAndGetResource(t *testing.T) { }}, }}, } - executor.stateProvider = &mockStateProvider{state: state} + executor := NewPulseToolExecutor(ExecutorConfig{ + StateProvider: &mockStateProvider{state: state}, + }) resource, _ := executor.executeGetResource(context.Background(), map[string]interface{}{ "resource_type": "vm", @@ -379,32 +286,6 @@ func TestExecuteGetResource_DockerDetails(t *testing.T) { } } -func TestExecuteSetResourceURL_ClearAndMissingUpdater(t *testing.T) { - executor := NewPulseToolExecutor(ExecutorConfig{}) - result, _ := executor.executeSetResourceURL(context.Background(), map[string]interface{}{ - "resource_type": "vm", - "resource_id": "100", - }) - if result.Content[0].Text != "Metadata updater not available." { - t.Fatalf("unexpected response: %s", result.Content[0].Text) - } - - updater := &fakeMetadataUpdater{} - executor.metadataUpdater = updater - result, _ = executor.executeSetResourceURL(context.Background(), map[string]interface{}{ - "resource_type": "vm", - "resource_id": "100", - "url": "", - }) - var resp map[string]interface{} - if err := json.Unmarshal([]byte(result.Content[0].Text), &resp); err != nil { - t.Fatalf("decode response: %v", err) - } - if resp["action"] != "cleared" { - t.Fatalf("unexpected response: %+v", resp) - } -} - func TestIntArg(t *testing.T) { if got := intArg(map[string]interface{}{}, "limit", 10); got != 10 { t.Fatalf("unexpected default: %d", got) @@ -414,95 +295,6 @@ func TestIntArg(t *testing.T) { } } -func TestParseAndValidateFetchURL(t *testing.T) { - t.Run("Empty", func(t *testing.T) { - if _, err := parseAndValidateFetchURL(context.Background(), ""); err == nil { - t.Fatal("expected error for empty URL") - } - }) - - t.Run("InvalidURL", func(t *testing.T) { - if _, err := parseAndValidateFetchURL(context.Background(), "http://%"); err == nil { - t.Fatal("expected error for invalid URL") - } - }) - - t.Run("NotAbsolute", func(t *testing.T) { - if _, err := parseAndValidateFetchURL(context.Background(), "example.com"); err == nil { - t.Fatal("expected error for relative URL") - } - }) - - t.Run("BadScheme", func(t *testing.T) { - if _, err := parseAndValidateFetchURL(context.Background(), "ftp://example.com"); err == nil { - t.Fatal("expected error for scheme") - } - }) - - t.Run("Credentials", func(t *testing.T) { - if _, err := parseAndValidateFetchURL(context.Background(), "http://user:pass@example.com"); err == nil { - t.Fatal("expected error for credentials") - } - }) - - t.Run("Fragment", func(t *testing.T) { - if _, err := parseAndValidateFetchURL(context.Background(), "https://example.com/#frag"); err == nil { - t.Fatal("expected error for fragment") - } - }) - - t.Run("MissingHost", func(t *testing.T) { - if _, err := parseAndValidateFetchURL(context.Background(), "http:///"); err == nil { - t.Fatal("expected error for missing host") - } - }) - - t.Run("BlockedHost", func(t *testing.T) { - if _, err := parseAndValidateFetchURL(context.Background(), "http://localhost"); err == nil { - t.Fatal("expected error for blocked host") - } - }) - - t.Run("BlockedIP", func(t *testing.T) { - if _, err := parseAndValidateFetchURL(context.Background(), "http://127.0.0.1"); err == nil { - t.Fatal("expected error for blocked IP") - } - }) - - t.Run("AllowLoopback", func(t *testing.T) { - t.Setenv("PULSE_AI_ALLOW_LOOPBACK", "true") - parsed, err := parseAndValidateFetchURL(context.Background(), "http://127.0.0.1:8080") - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if parsed.Hostname() != "127.0.0.1" { - t.Fatalf("unexpected host: %s", parsed.Hostname()) - } - }) -} - -func TestIsBlockedFetchIP(t *testing.T) { - if !isBlockedFetchIP(nil) { - t.Fatal("expected nil IP to be blocked") - } - if !isBlockedFetchIP(net.ParseIP("0.0.0.0")) { - t.Fatal("expected unspecified IP to be blocked") - } - if !isBlockedFetchIP(net.ParseIP("169.254.1.1")) { - t.Fatal("expected link-local IP to be blocked") - } - if isBlockedFetchIP(net.ParseIP("8.8.8.8")) { - t.Fatal("expected global IP to be allowed") - } - - t.Run("LoopbackAllowed", func(t *testing.T) { - t.Setenv("PULSE_AI_ALLOW_LOOPBACK", "true") - if isBlockedFetchIP(net.ParseIP("127.0.0.1")) { - t.Fatal("expected loopback IP to be allowed") - } - }) -} - func TestExecuteListInfrastructurePaginationAndDockerFilter(t *testing.T) { state := models.StateSnapshot{ Nodes: []models.Node{ @@ -645,20 +437,3 @@ func TestExecuteGetResource_MissingArgs(t *testing.T) { t.Fatal("expected error for missing resource_id") } } - -func TestExecuteGetURLContent_InvalidURL(t *testing.T) { - executor := NewPulseToolExecutor(ExecutorConfig{}) - result, err := executor.executeGetURLContent(context.Background(), map[string]interface{}{ - "url": "ftp://example.com", - }) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - var response URLFetchResponse - if err := json.Unmarshal([]byte(result.Content[0].Text), &response); err != nil { - t.Fatalf("decode response: %v", err) - } - if response.Error == "" { - t.Fatalf("expected error response: %+v", response) - } -} diff --git a/internal/ai/tools/tools_storage.go b/internal/ai/tools/tools_storage.go new file mode 100644 index 000000000..b89935122 --- /dev/null +++ b/internal/ai/tools/tools_storage.go @@ -0,0 +1,146 @@ +package tools + +import ( + "context" + "fmt" +) + +// registerStorageTools registers the consolidated pulse_storage tool +func (e *PulseToolExecutor) registerStorageTools() { + e.registry.Register(RegisteredTool{ + Definition: Tool{ + Name: "pulse_storage", + Description: `Query storage pools, backups, snapshots, Ceph, and replication. + +Types: +- pools: Storage pool usage and health (ZFS, Ceph, LVM, etc.) +- config: Proxmox storage.cfg configuration +- backups: Backup status for VMs/containers (PBS and PVE) +- backup_tasks: Recent backup task history +- snapshots: VM/container snapshots +- ceph: Ceph cluster status from Proxmox API +- ceph_details: Detailed Ceph status from host agents +- replication: Proxmox replication job status +- pbs_jobs: PBS backup, sync, verify, prune jobs +- raid: Host RAID array status +- disk_health: SMART and RAID health from agents +- resource_disks: VM/container filesystem usage + +Examples: +- List storage pools: type="pools" +- Get specific storage: type="pools", storage_id="local-lvm" +- Get backups for VM: type="backups", resource_id="101" +- Get Ceph status: type="ceph" +- Get replication jobs: type="replication"`, + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]PropertySchema{ + "type": { + Type: "string", + Description: "Storage type to query", + Enum: []string{"pools", "config", "backups", "backup_tasks", "snapshots", "ceph", "ceph_details", "replication", "pbs_jobs", "raid", "disk_health", "resource_disks"}, + }, + "storage_id": { + Type: "string", + Description: "Filter by storage ID (for pools, config)", + }, + "resource_id": { + Type: "string", + Description: "Filter by VM/container ID (for backups, snapshots, resource_disks)", + }, + "guest_id": { + Type: "string", + Description: "Filter by guest ID (for snapshots, backup_tasks)", + }, + "vm_id": { + Type: "string", + Description: "Filter by VM ID (for replication)", + }, + "instance": { + Type: "string", + Description: "Filter by Proxmox/PBS instance", + }, + "node": { + Type: "string", + Description: "Filter by node name", + }, + "host": { + Type: "string", + Description: "Filter by host (for raid, ceph_details)", + }, + "cluster": { + Type: "string", + Description: "Filter by Ceph cluster name", + }, + "job_type": { + Type: "string", + Description: "Filter PBS jobs by type: backup, sync, verify, prune, garbage", + Enum: []string{"backup", "sync", "verify", "prune", "garbage"}, + }, + "state": { + Type: "string", + Description: "Filter RAID arrays by state: clean, degraded, rebuilding", + }, + "status": { + Type: "string", + Description: "Filter backup tasks by status: ok, error", + }, + "resource_type": { + Type: "string", + Description: "Filter by type: vm or lxc (for resource_disks)", + }, + "min_usage": { + Type: "number", + Description: "Only show resources with disk usage above this percentage (for resource_disks)", + }, + "limit": { + Type: "integer", + Description: "Maximum number of results (default: 100)", + }, + "offset": { + Type: "integer", + Description: "Number of results to skip", + }, + }, + Required: []string{"type"}, + }, + }, + Handler: func(ctx context.Context, exec *PulseToolExecutor, args map[string]interface{}) (CallToolResult, error) { + return exec.executeStorage(ctx, args) + }, + }) +} + +// executeStorage routes to the appropriate storage handler based on type +// All handler functions are implemented in tools_infrastructure.go +func (e *PulseToolExecutor) executeStorage(ctx context.Context, args map[string]interface{}) (CallToolResult, error) { + storageType, _ := args["type"].(string) + switch storageType { + case "pools": + return e.executeListStorage(ctx, args) + case "config": + return e.executeGetStorageConfig(ctx, args) + case "backups": + return e.executeListBackups(ctx, args) + case "backup_tasks": + return e.executeListBackupTasks(ctx, args) + case "snapshots": + return e.executeListSnapshots(ctx, args) + case "ceph": + return e.executeGetCephStatus(ctx, args) + case "ceph_details": + return e.executeGetHostCephDetails(ctx, args) + case "replication": + return e.executeGetReplication(ctx, args) + case "pbs_jobs": + return e.executeListPBSJobs(ctx, args) + case "raid": + return e.executeGetHostRAIDStatus(ctx, args) + case "disk_health": + return e.executeGetDiskHealth(ctx, args) + case "resource_disks": + return e.executeGetResourceDisks(ctx, args) + default: + return NewErrorResult(fmt.Errorf("unknown type: %s. Use: pools, config, backups, backup_tasks, snapshots, ceph, ceph_details, replication, pbs_jobs, raid, disk_health, resource_disks", storageType)), nil + } +}