Add Windows service support to unified agent

Port Windows SCM integration from pulse-host-agent to pulse-agent,
enabling the unified agent to run as a Windows service with proper
start/stop handling and event logging.

Related to #766
This commit is contained in:
rcourtman
2025-11-27 17:00:03 +00:00
parent a92a53fb98
commit 94259a45da
3 changed files with 270 additions and 5 deletions

View File

@@ -42,7 +42,17 @@ func main() {
logger := zerolog.New(os.Stdout).Level(cfg.LogLevel).With().Timestamp().Logger()
cfg.Logger = &logger
// 3. Setup Context & Signal Handling
// 3. Check if running as Windows service
ranAsService, err := runAsWindowsService(cfg, logger)
if err != nil {
logger.Fatal().Err(err).Msg("Windows service failed")
}
if ranAsService {
// Service handled everything, exit normally
return
}
// 4. Setup Context & Signal Handling (for non-service mode)
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
defer cancel()
@@ -56,7 +66,7 @@ func main() {
Bool("auto_update", !cfg.DisableAutoUpdate).
Msg("Starting Pulse Unified Agent")
// 4. Start Auto-Updater
// 5. Start Auto-Updater
updater := agentupdate.New(agentupdate.Config{
PulseURL: cfg.PulseURL,
APIToken: cfg.APIToken,
@@ -73,7 +83,7 @@ func main() {
return nil
})
// 5. Start Host Agent (if enabled)
// 6. Start Host Agent (if enabled)
if cfg.EnableHost {
hostCfg := hostagent.Config{
PulseURL: cfg.PulseURL,
@@ -103,7 +113,7 @@ func main() {
})
}
// 6. Start Docker Agent (if enabled)
// 7. Start Docker Agent (if enabled)
if cfg.EnableDocker {
dockerCfg := dockeragent.Config{
PulseURL: cfg.PulseURL,
@@ -138,7 +148,7 @@ func main() {
})
}
// 6. Wait for all agents to exit
// 8. Wait for all agents to exit
if err := g.Wait(); err != nil && err != context.Canceled {
logger.Error().Err(err).Msg("Agent terminated with error")
os.Exit(1)

View File

@@ -0,0 +1,12 @@
//go:build !windows
package main
import (
"github.com/rs/zerolog"
)
// runAsWindowsService is a no-op on non-Windows platforms
func runAsWindowsService(_ Config, _ zerolog.Logger) (bool, error) {
return false, nil
}

View File

@@ -0,0 +1,243 @@
//go:build windows
package main
import (
"context"
"fmt"
"os"
"time"
"github.com/rcourtman/pulse-go-rewrite/internal/agentupdate"
"github.com/rcourtman/pulse-go-rewrite/internal/dockeragent"
"github.com/rcourtman/pulse-go-rewrite/internal/hostagent"
"github.com/rs/zerolog"
"golang.org/x/sync/errgroup"
"golang.org/x/sys/windows/svc"
"golang.org/x/sys/windows/svc/eventlog"
)
const serviceName = "PulseAgent"
type windowsService struct {
cfg Config
logger zerolog.Logger
eventLog *eventlog.Log
}
func (ws *windowsService) Execute(args []string, r <-chan svc.ChangeRequest, changes chan<- svc.Status) (ssec bool, errno uint32) {
const cmdsAccepted = svc.AcceptStop | svc.AcceptShutdown
changes <- svc.Status{State: svc.StartPending}
if ws.eventLog != nil {
ws.eventLog.Info(1, "Pulse Agent service starting")
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
g, ctx := errgroup.WithContext(ctx)
// Start Auto-Updater
updater := agentupdate.New(agentupdate.Config{
PulseURL: ws.cfg.PulseURL,
APIToken: ws.cfg.APIToken,
AgentName: "pulse-agent",
CurrentVersion: Version,
CheckInterval: 1 * time.Hour,
InsecureSkipVerify: ws.cfg.InsecureSkipVerify,
Logger: &ws.logger,
Disabled: ws.cfg.DisableAutoUpdate,
})
g.Go(func() error {
updater.RunLoop(ctx)
return nil
})
// Start Host Agent (if enabled)
if ws.cfg.EnableHost {
hostCfg := hostagent.Config{
PulseURL: ws.cfg.PulseURL,
APIToken: ws.cfg.APIToken,
Interval: ws.cfg.Interval,
HostnameOverride: ws.cfg.HostnameOverride,
AgentID: ws.cfg.AgentID,
AgentType: "unified",
AgentVersion: Version,
Tags: ws.cfg.Tags,
InsecureSkipVerify: ws.cfg.InsecureSkipVerify,
LogLevel: ws.cfg.LogLevel,
Logger: &ws.logger,
}
agent, err := hostagent.New(hostCfg)
if err != nil {
ws.logger.Error().Err(err).Msg("Failed to create host agent")
if ws.eventLog != nil {
ws.eventLog.Error(1, fmt.Sprintf("Failed to create host agent: %v", err))
}
changes <- svc.Status{State: svc.Stopped}
return true, 1
}
g.Go(func() error {
ws.logger.Info().Msg("Host agent module started")
return agent.Run(ctx)
})
}
// Start Docker Agent (if enabled)
if ws.cfg.EnableDocker {
dockerCfg := dockeragent.Config{
PulseURL: ws.cfg.PulseURL,
APIToken: ws.cfg.APIToken,
Interval: ws.cfg.Interval,
HostnameOverride: ws.cfg.HostnameOverride,
AgentID: ws.cfg.AgentID,
AgentType: "unified",
AgentVersion: Version,
InsecureSkipVerify: ws.cfg.InsecureSkipVerify,
DisableAutoUpdate: true,
LogLevel: ws.cfg.LogLevel,
Logger: &ws.logger,
SwarmScope: "node",
IncludeContainers: true,
IncludeServices: true,
IncludeTasks: true,
CollectDiskMetrics: true,
}
agent, err := dockeragent.New(dockerCfg)
if err != nil {
ws.logger.Error().Err(err).Msg("Failed to create docker agent")
if ws.eventLog != nil {
ws.eventLog.Error(1, fmt.Sprintf("Failed to create docker agent: %v", err))
}
changes <- svc.Status{State: svc.Stopped}
return true, 1
}
g.Go(func() error {
ws.logger.Info().Msg("Docker agent module started")
return agent.Run(ctx)
})
}
changes <- svc.Status{State: svc.Running, Accepts: cmdsAccepted}
ws.logger.Info().
Str("version", Version).
Str("pulse_url", ws.cfg.PulseURL).
Bool("host_agent", ws.cfg.EnableHost).
Bool("docker_agent", ws.cfg.EnableDocker).
Msg("Pulse Agent service is running")
if ws.eventLog != nil {
ws.eventLog.Info(1, fmt.Sprintf("Pulse Agent started (URL: %s, Host: %v, Docker: %v)", ws.cfg.PulseURL, ws.cfg.EnableHost, ws.cfg.EnableDocker))
}
// Channel to receive errgroup completion
doneChan := make(chan error, 1)
go func() {
doneChan <- g.Wait()
}()
// Service control loop
loop:
for {
select {
case c := <-r:
switch c.Cmd {
case svc.Interrogate:
changes <- c.CurrentStatus
case svc.Stop, svc.Shutdown:
ws.logger.Info().Uint32("command", uint32(c.Cmd)).Msg("Received service control command")
if ws.eventLog != nil {
ws.eventLog.Info(1, "Pulse Agent received stop command")
}
changes <- svc.Status{State: svc.StopPending}
cancel()
break loop
default:
ws.logger.Warn().Uint32("command", uint32(c.Cmd)).Msg("Unexpected service control command")
}
case err := <-doneChan:
if err != nil && err != context.Canceled {
ws.logger.Error().Err(err).Msg("Agent error")
if ws.eventLog != nil {
ws.eventLog.Error(1, fmt.Sprintf("Pulse Agent error: %v", err))
}
changes <- svc.Status{State: svc.Stopped}
return true, 1
}
break loop
}
}
// Wait for agents to stop gracefully (with timeout)
shutdownTimeout := time.NewTimer(10 * time.Second)
defer shutdownTimeout.Stop()
select {
case <-doneChan:
ws.logger.Info().Msg("Agents stopped gracefully")
if ws.eventLog != nil {
ws.eventLog.Info(1, "Pulse Agent stopped gracefully")
}
case <-shutdownTimeout.C:
ws.logger.Warn().Msg("Agent shutdown timeout, forcing stop")
if ws.eventLog != nil {
ws.eventLog.Warning(1, "Pulse Agent shutdown timeout")
}
}
changes <- svc.Status{State: svc.Stopped}
return false, 0
}
// runAsWindowsService checks if we're running as a Windows service and handles it.
// Returns a special error to indicate the service ran (and main should exit),
// returns nil if not running as a service (main should continue normally),
// or returns an error if something failed.
func runAsWindowsService(cfg Config, logger zerolog.Logger) (ranAsService bool, err error) {
isService, err := svc.IsWindowsService()
if err != nil {
return false, fmt.Errorf("failed to determine if running as service: %w", err)
}
if !isService {
return false, nil
}
logger.Info().Msg("Running as Windows service")
// Open Windows Event Log (best effort)
elog, err := eventlog.Open(serviceName)
if err != nil {
logger.Warn().Err(err).Msg("Could not open Windows Event Log, continuing without it")
elog = nil
}
defer func() {
if elog != nil {
elog.Close()
}
}()
ws := &windowsService{
cfg: cfg,
logger: logger,
eventLog: elog,
}
err = svc.Run(serviceName, ws)
if err != nil {
if elog != nil {
elog.Error(1, fmt.Sprintf("Failed to run service: %v", err))
}
return true, fmt.Errorf("failed to run Windows service: %w", err)
}
// Service ran successfully and exited
os.Exit(0)
return true, nil // unreachable, but required for compilation
}