mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-02-18 00:17:39 +01:00
Prevents multi-UID rate limit bypass attacks from containers. Previously,
attackers could create multiple users in a container (each mapped to
unique host UIDs 100000-165535) to bypass per-UID rate limits.
Implementation:
- Automatic detection of ID-mapped UID ranges from /etc/subuid and /etc/subgid
- Rate limits applied per-range for container UIDs
- Rate limits applied per-UID for host UIDs (backwards compatible)
- identifyPeer() checks if BOTH UID AND GID are in mapped ranges
- Metrics show peer='range:100000-165535' or peer='uid:0'
Security benefit: Entire container limited as single entity, preventing
100+ UIDs from bypassing rate controls.
New metrics:
- pulse_proxy_limiter_rejections_total{peer,reason}
- pulse_proxy_limiter_penalties_total{peer,reason}
- pulse_proxy_global_concurrency_inflight
Related to security audit 2025-11-07.
Co-authored-by: Codex <codex@openai.com>
282 lines
6.6 KiB
Go
282 lines
6.6 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"sync"
|
|
"time"
|
|
|
|
"golang.org/x/time/rate"
|
|
)
|
|
|
|
// peerID identifies a connecting principal (grouped by UID or ID range)
|
|
type peerID struct {
|
|
uid uint32
|
|
uidRange *idRange
|
|
}
|
|
|
|
func (p peerID) String() string {
|
|
if p.uidRange != nil {
|
|
end := p.uidRange.start + p.uidRange.length - 1
|
|
return fmt.Sprintf("range:%d-%d", p.uidRange.start, end)
|
|
}
|
|
return fmt.Sprintf("uid:%d", p.uid)
|
|
}
|
|
|
|
// limiterEntry holds rate limiting and concurrency controls for a peer
|
|
type limiterEntry struct {
|
|
limiter *rate.Limiter
|
|
semaphore chan struct{}
|
|
lastSeen time.Time
|
|
}
|
|
|
|
type limiterPolicy struct {
|
|
perPeerLimit rate.Limit
|
|
perPeerBurst int
|
|
perPeerConcurrency int
|
|
globalConcurrency int
|
|
penaltyDuration time.Duration
|
|
}
|
|
|
|
// rateLimiter manages per-peer rate limits and concurrency
|
|
type rateLimiter struct {
|
|
mu sync.Mutex
|
|
entries map[string]*limiterEntry
|
|
quitChan chan struct{}
|
|
globalSem chan struct{}
|
|
policy limiterPolicy
|
|
metrics *ProxyMetrics
|
|
uidRanges []idRange
|
|
gidRanges []idRange
|
|
}
|
|
|
|
const (
|
|
defaultPerPeerBurst = 5 // Allow burst of 5 requests for multi-node polling
|
|
defaultPerPeerConcurrency = 2
|
|
defaultGlobalConcurrency = 8
|
|
)
|
|
|
|
var (
|
|
defaultPerPeerRateInterval = 1 * time.Second // 1 qps (60/min) - supports 5-10 node deployments
|
|
defaultPenaltyDuration = 2 * time.Second
|
|
defaultPerPeerLimit = rate.Every(defaultPerPeerRateInterval)
|
|
)
|
|
|
|
// newRateLimiter creates a new rate limiter with cleanup loop
|
|
// If rateLimitCfg is provided, it overrides the default rate limit settings
|
|
func newRateLimiter(metrics *ProxyMetrics, rateLimitCfg *RateLimitConfig, uidRanges, gidRanges []idRange) *rateLimiter {
|
|
// Use defaults
|
|
perPeerLimit := defaultPerPeerLimit
|
|
perPeerBurst := defaultPerPeerBurst
|
|
|
|
// Override with config if provided
|
|
if rateLimitCfg != nil {
|
|
if rateLimitCfg.PerPeerIntervalMs > 0 {
|
|
interval := time.Duration(rateLimitCfg.PerPeerIntervalMs) * time.Millisecond
|
|
perPeerLimit = rate.Every(interval)
|
|
}
|
|
if rateLimitCfg.PerPeerBurst > 0 {
|
|
perPeerBurst = rateLimitCfg.PerPeerBurst
|
|
}
|
|
}
|
|
|
|
rl := &rateLimiter{
|
|
entries: make(map[string]*limiterEntry),
|
|
quitChan: make(chan struct{}),
|
|
globalSem: make(chan struct{}, defaultGlobalConcurrency),
|
|
policy: limiterPolicy{
|
|
perPeerLimit: perPeerLimit,
|
|
perPeerBurst: perPeerBurst,
|
|
perPeerConcurrency: defaultPerPeerConcurrency,
|
|
globalConcurrency: defaultGlobalConcurrency,
|
|
penaltyDuration: defaultPenaltyDuration,
|
|
},
|
|
metrics: metrics,
|
|
uidRanges: append([]idRange(nil), uidRanges...),
|
|
gidRanges: append([]idRange(nil), gidRanges...),
|
|
}
|
|
if rl.metrics != nil {
|
|
rl.metrics.setLimiterPeers(0)
|
|
}
|
|
go rl.cleanupLoop()
|
|
return rl
|
|
}
|
|
|
|
// allow checks if a peer is allowed to make a request and reserves concurrency.
|
|
// Returns a release function, rejection reason (if any), and whether the request is allowed.
|
|
func (rl *rateLimiter) allow(id peerID) (release func(), reason string, allowed bool) {
|
|
key := id.String()
|
|
rl.mu.Lock()
|
|
entry := rl.entries[key]
|
|
if entry == nil {
|
|
entry = &limiterEntry{
|
|
limiter: rate.NewLimiter(rl.policy.perPeerLimit, rl.policy.perPeerBurst),
|
|
semaphore: make(chan struct{}, rl.policy.perPeerConcurrency),
|
|
}
|
|
rl.entries[key] = entry
|
|
if rl.metrics != nil {
|
|
rl.metrics.setLimiterPeers(len(rl.entries))
|
|
}
|
|
}
|
|
entry.lastSeen = time.Now()
|
|
rl.mu.Unlock()
|
|
|
|
// Check rate limit
|
|
if !entry.limiter.Allow() {
|
|
rl.recordRejection("rate", key)
|
|
return nil, "rate", false
|
|
}
|
|
|
|
// Acquire global concurrency
|
|
select {
|
|
case rl.globalSem <- struct{}{}:
|
|
if rl.metrics != nil {
|
|
rl.metrics.incGlobalConcurrency()
|
|
}
|
|
default:
|
|
rl.recordRejection("global_concurrency", key)
|
|
return nil, "global_concurrency", false
|
|
}
|
|
|
|
// Try to acquire per-peer concurrency slot
|
|
select {
|
|
case entry.semaphore <- struct{}{}:
|
|
return func() {
|
|
<-entry.semaphore
|
|
<-rl.globalSem
|
|
if rl.metrics != nil {
|
|
rl.metrics.decGlobalConcurrency()
|
|
}
|
|
}, "", true
|
|
default:
|
|
<-rl.globalSem
|
|
if rl.metrics != nil {
|
|
rl.metrics.decGlobalConcurrency()
|
|
}
|
|
rl.recordRejection("peer_concurrency", key)
|
|
return nil, "peer_concurrency", false
|
|
}
|
|
}
|
|
|
|
// cleanupLoop periodically removes idle peer entries
|
|
func (rl *rateLimiter) cleanupLoop() {
|
|
ticker := time.NewTicker(5 * time.Minute)
|
|
defer ticker.Stop()
|
|
for {
|
|
select {
|
|
case <-ticker.C:
|
|
rl.mu.Lock()
|
|
for key, entry := range rl.entries {
|
|
if time.Since(entry.lastSeen) > 10*time.Minute {
|
|
delete(rl.entries, key)
|
|
}
|
|
}
|
|
if rl.metrics != nil {
|
|
rl.metrics.setLimiterPeers(len(rl.entries))
|
|
}
|
|
rl.mu.Unlock()
|
|
case <-rl.quitChan:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// shutdown stops the cleanup loop
|
|
func (rl *rateLimiter) shutdown() {
|
|
close(rl.quitChan)
|
|
}
|
|
|
|
func (rl *rateLimiter) penalize(peerLabel, reason string) {
|
|
if rl.policy.penaltyDuration <= 0 {
|
|
return
|
|
}
|
|
time.Sleep(rl.policy.penaltyDuration)
|
|
if rl.metrics != nil {
|
|
rl.metrics.recordPenalty(reason, peerLabel)
|
|
}
|
|
}
|
|
|
|
func (rl *rateLimiter) recordRejection(reason, peerLabel string) {
|
|
if rl.metrics != nil {
|
|
rl.metrics.recordLimiterReject(reason, peerLabel)
|
|
}
|
|
}
|
|
|
|
func (rl *rateLimiter) identifyPeer(cred *peerCredentials) peerID {
|
|
if cred == nil {
|
|
return peerID{}
|
|
}
|
|
if rl == nil {
|
|
return peerID{uid: cred.uid}
|
|
}
|
|
|
|
if len(rl.uidRanges) == 0 || len(rl.gidRanges) == 0 {
|
|
return peerID{uid: cred.uid}
|
|
}
|
|
|
|
uidRange := findRange(rl.uidRanges, cred.uid)
|
|
gidRange := findRange(rl.gidRanges, cred.gid)
|
|
|
|
if uidRange != nil && gidRange != nil {
|
|
return peerID{uid: cred.uid, uidRange: uidRange}
|
|
}
|
|
|
|
return peerID{uid: cred.uid}
|
|
}
|
|
|
|
func findRange(ranges []idRange, value uint32) *idRange {
|
|
for i := range ranges {
|
|
if ranges[i].contains(value) {
|
|
return &ranges[i]
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// nodeGate controls per-node concurrency for temperature requests
|
|
type nodeGate struct {
|
|
mu sync.Mutex
|
|
inFlight map[string]*nodeLock
|
|
}
|
|
|
|
// nodeLock tracks in-flight requests for a specific node
|
|
type nodeLock struct {
|
|
refCount int
|
|
guard chan struct{}
|
|
}
|
|
|
|
// newNodeGate creates a new node concurrency gate
|
|
func newNodeGate() *nodeGate {
|
|
return &nodeGate{
|
|
inFlight: make(map[string]*nodeLock),
|
|
}
|
|
}
|
|
|
|
// acquire gets exclusive access to make requests to a node
|
|
// Returns a release function that must be called when done
|
|
func (g *nodeGate) acquire(node string) func() {
|
|
g.mu.Lock()
|
|
lock := g.inFlight[node]
|
|
if lock == nil {
|
|
lock = &nodeLock{
|
|
guard: make(chan struct{}, 1), // single slot = only one SSH fetch per node
|
|
}
|
|
g.inFlight[node] = lock
|
|
}
|
|
lock.refCount++
|
|
g.mu.Unlock()
|
|
|
|
// Wait for exclusive access
|
|
lock.guard <- struct{}{}
|
|
|
|
// Return release function
|
|
return func() {
|
|
<-lock.guard
|
|
g.mu.Lock()
|
|
lock.refCount--
|
|
if lock.refCount == 0 {
|
|
delete(g.inFlight, node)
|
|
}
|
|
g.mu.Unlock()
|
|
}
|
|
}
|