Files
Pulse/cmd/pulse-sensor-proxy/throttle.go
rcourtman 885a62e96b feat(security): Implement range-based rate limiting
Prevents multi-UID rate limit bypass attacks from containers. Previously,
attackers could create multiple users in a container (each mapped to
unique host UIDs 100000-165535) to bypass per-UID rate limits.

Implementation:
- Automatic detection of ID-mapped UID ranges from /etc/subuid and /etc/subgid
- Rate limits applied per-range for container UIDs
- Rate limits applied per-UID for host UIDs (backwards compatible)
- identifyPeer() checks if BOTH UID AND GID are in mapped ranges
- Metrics show peer='range:100000-165535' or peer='uid:0'

Security benefit: Entire container limited as single entity, preventing
100+ UIDs from bypassing rate controls.

New metrics:
- pulse_proxy_limiter_rejections_total{peer,reason}
- pulse_proxy_limiter_penalties_total{peer,reason}
- pulse_proxy_global_concurrency_inflight

Related to security audit 2025-11-07.

Co-authored-by: Codex <codex@openai.com>
2025-11-07 17:08:45 +00:00

282 lines
6.6 KiB
Go

package main
import (
"fmt"
"sync"
"time"
"golang.org/x/time/rate"
)
// peerID identifies a connecting principal (grouped by UID or ID range)
type peerID struct {
uid uint32
uidRange *idRange
}
func (p peerID) String() string {
if p.uidRange != nil {
end := p.uidRange.start + p.uidRange.length - 1
return fmt.Sprintf("range:%d-%d", p.uidRange.start, end)
}
return fmt.Sprintf("uid:%d", p.uid)
}
// limiterEntry holds rate limiting and concurrency controls for a peer
type limiterEntry struct {
limiter *rate.Limiter
semaphore chan struct{}
lastSeen time.Time
}
type limiterPolicy struct {
perPeerLimit rate.Limit
perPeerBurst int
perPeerConcurrency int
globalConcurrency int
penaltyDuration time.Duration
}
// rateLimiter manages per-peer rate limits and concurrency
type rateLimiter struct {
mu sync.Mutex
entries map[string]*limiterEntry
quitChan chan struct{}
globalSem chan struct{}
policy limiterPolicy
metrics *ProxyMetrics
uidRanges []idRange
gidRanges []idRange
}
const (
defaultPerPeerBurst = 5 // Allow burst of 5 requests for multi-node polling
defaultPerPeerConcurrency = 2
defaultGlobalConcurrency = 8
)
var (
defaultPerPeerRateInterval = 1 * time.Second // 1 qps (60/min) - supports 5-10 node deployments
defaultPenaltyDuration = 2 * time.Second
defaultPerPeerLimit = rate.Every(defaultPerPeerRateInterval)
)
// newRateLimiter creates a new rate limiter with cleanup loop
// If rateLimitCfg is provided, it overrides the default rate limit settings
func newRateLimiter(metrics *ProxyMetrics, rateLimitCfg *RateLimitConfig, uidRanges, gidRanges []idRange) *rateLimiter {
// Use defaults
perPeerLimit := defaultPerPeerLimit
perPeerBurst := defaultPerPeerBurst
// Override with config if provided
if rateLimitCfg != nil {
if rateLimitCfg.PerPeerIntervalMs > 0 {
interval := time.Duration(rateLimitCfg.PerPeerIntervalMs) * time.Millisecond
perPeerLimit = rate.Every(interval)
}
if rateLimitCfg.PerPeerBurst > 0 {
perPeerBurst = rateLimitCfg.PerPeerBurst
}
}
rl := &rateLimiter{
entries: make(map[string]*limiterEntry),
quitChan: make(chan struct{}),
globalSem: make(chan struct{}, defaultGlobalConcurrency),
policy: limiterPolicy{
perPeerLimit: perPeerLimit,
perPeerBurst: perPeerBurst,
perPeerConcurrency: defaultPerPeerConcurrency,
globalConcurrency: defaultGlobalConcurrency,
penaltyDuration: defaultPenaltyDuration,
},
metrics: metrics,
uidRanges: append([]idRange(nil), uidRanges...),
gidRanges: append([]idRange(nil), gidRanges...),
}
if rl.metrics != nil {
rl.metrics.setLimiterPeers(0)
}
go rl.cleanupLoop()
return rl
}
// allow checks if a peer is allowed to make a request and reserves concurrency.
// Returns a release function, rejection reason (if any), and whether the request is allowed.
func (rl *rateLimiter) allow(id peerID) (release func(), reason string, allowed bool) {
key := id.String()
rl.mu.Lock()
entry := rl.entries[key]
if entry == nil {
entry = &limiterEntry{
limiter: rate.NewLimiter(rl.policy.perPeerLimit, rl.policy.perPeerBurst),
semaphore: make(chan struct{}, rl.policy.perPeerConcurrency),
}
rl.entries[key] = entry
if rl.metrics != nil {
rl.metrics.setLimiterPeers(len(rl.entries))
}
}
entry.lastSeen = time.Now()
rl.mu.Unlock()
// Check rate limit
if !entry.limiter.Allow() {
rl.recordRejection("rate", key)
return nil, "rate", false
}
// Acquire global concurrency
select {
case rl.globalSem <- struct{}{}:
if rl.metrics != nil {
rl.metrics.incGlobalConcurrency()
}
default:
rl.recordRejection("global_concurrency", key)
return nil, "global_concurrency", false
}
// Try to acquire per-peer concurrency slot
select {
case entry.semaphore <- struct{}{}:
return func() {
<-entry.semaphore
<-rl.globalSem
if rl.metrics != nil {
rl.metrics.decGlobalConcurrency()
}
}, "", true
default:
<-rl.globalSem
if rl.metrics != nil {
rl.metrics.decGlobalConcurrency()
}
rl.recordRejection("peer_concurrency", key)
return nil, "peer_concurrency", false
}
}
// cleanupLoop periodically removes idle peer entries
func (rl *rateLimiter) cleanupLoop() {
ticker := time.NewTicker(5 * time.Minute)
defer ticker.Stop()
for {
select {
case <-ticker.C:
rl.mu.Lock()
for key, entry := range rl.entries {
if time.Since(entry.lastSeen) > 10*time.Minute {
delete(rl.entries, key)
}
}
if rl.metrics != nil {
rl.metrics.setLimiterPeers(len(rl.entries))
}
rl.mu.Unlock()
case <-rl.quitChan:
return
}
}
}
// shutdown stops the cleanup loop
func (rl *rateLimiter) shutdown() {
close(rl.quitChan)
}
func (rl *rateLimiter) penalize(peerLabel, reason string) {
if rl.policy.penaltyDuration <= 0 {
return
}
time.Sleep(rl.policy.penaltyDuration)
if rl.metrics != nil {
rl.metrics.recordPenalty(reason, peerLabel)
}
}
func (rl *rateLimiter) recordRejection(reason, peerLabel string) {
if rl.metrics != nil {
rl.metrics.recordLimiterReject(reason, peerLabel)
}
}
func (rl *rateLimiter) identifyPeer(cred *peerCredentials) peerID {
if cred == nil {
return peerID{}
}
if rl == nil {
return peerID{uid: cred.uid}
}
if len(rl.uidRanges) == 0 || len(rl.gidRanges) == 0 {
return peerID{uid: cred.uid}
}
uidRange := findRange(rl.uidRanges, cred.uid)
gidRange := findRange(rl.gidRanges, cred.gid)
if uidRange != nil && gidRange != nil {
return peerID{uid: cred.uid, uidRange: uidRange}
}
return peerID{uid: cred.uid}
}
func findRange(ranges []idRange, value uint32) *idRange {
for i := range ranges {
if ranges[i].contains(value) {
return &ranges[i]
}
}
return nil
}
// nodeGate controls per-node concurrency for temperature requests
type nodeGate struct {
mu sync.Mutex
inFlight map[string]*nodeLock
}
// nodeLock tracks in-flight requests for a specific node
type nodeLock struct {
refCount int
guard chan struct{}
}
// newNodeGate creates a new node concurrency gate
func newNodeGate() *nodeGate {
return &nodeGate{
inFlight: make(map[string]*nodeLock),
}
}
// acquire gets exclusive access to make requests to a node
// Returns a release function that must be called when done
func (g *nodeGate) acquire(node string) func() {
g.mu.Lock()
lock := g.inFlight[node]
if lock == nil {
lock = &nodeLock{
guard: make(chan struct{}, 1), // single slot = only one SSH fetch per node
}
g.inFlight[node] = lock
}
lock.refCount++
g.mu.Unlock()
// Wait for exclusive access
lock.guard <- struct{}{}
// Return release function
return func() {
<-lock.guard
g.mu.Lock()
lock.refCount--
if lock.refCount == 0 {
delete(g.inFlight, node)
}
g.mu.Unlock()
}
}