mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-02-18 23:41:48 +01:00
cmd/eval/main.go: - Fix fmt.Errorf format string lint warning (use %s instead of bare string) internal/logging/logging_test.go: - Update tests to account for LogBroadcaster wrapper in baseWriter - Use string representation checks instead of direct pointer comparison - Verify both the underlying writer and broadcaster are present
733 lines
23 KiB
Go
733 lines
23 KiB
Go
// Command eval runs Pulse Assistant evaluation scenarios against a live Pulse instance.
|
|
//
|
|
// Usage:
|
|
//
|
|
// go run ./cmd/eval # Run quick smoke test
|
|
// go run ./cmd/eval -scenario all # Run all scenarios
|
|
// go run ./cmd/eval -scenario readonly # Run read-only infrastructure scenario
|
|
// go run ./cmd/eval -list # List available scenarios
|
|
//
|
|
// Options:
|
|
//
|
|
// -scenario string Scenario to run: smoke, readonly, enforce, routing, routing-recovery, logs, readonly-recovery, search-id, disambiguate, context-target, discovery, writeverify, strict, strict-block, strict-recovery, readonly-guardrails, noninteractive, approval, approval-approve, approval-deny, approval-combo, patrol, patrol-basic, patrol-investigation, patrol-finding-quality, patrol-signal-coverage, matrix, all (default "smoke")
|
|
// -url string Pulse API base URL (default "http://127.0.0.1:7655")
|
|
// -user string Username for auth (default "admin")
|
|
// -pass string Password for auth (default "admin")
|
|
// -model string Model override for chat requests
|
|
// -models string Comma-separated list of models to run (overrides -model)
|
|
// -auto-models Auto-select latest models per provider
|
|
// -list List available scenarios and exit
|
|
// -quiet Only show summary, not step-by-step output
|
|
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"flag"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"os"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/ai/eval"
|
|
)
|
|
|
|
func main() {
|
|
scenario := flag.String("scenario", "smoke", "Scenario to run: smoke, readonly, enforce, routing, routing-recovery, logs, readonly-recovery, search-id, disambiguate, context-target, discovery, writeverify, guest-control, guest-idempotent, guest-discovery, guest-natural, guest-multi, readonly-filtering, read-loop-recovery, ambiguous-intent, strict, strict-block, strict-recovery, readonly-guardrails, noninteractive, approval, approval-approve, approval-deny, approval-combo, patrol, patrol-basic, patrol-investigation, patrol-finding-quality, patrol-signal-coverage, matrix, all")
|
|
url := flag.String("url", "http://127.0.0.1:7655", "Pulse API base URL")
|
|
user := flag.String("user", "admin", "Username for auth")
|
|
pass := flag.String("pass", "admin", "Password for auth")
|
|
model := flag.String("model", "", "Model override for chat requests")
|
|
models := flag.String("models", "", "Comma-separated list of models to run (overrides -model)")
|
|
autoModels := flag.Bool("auto-models", false, "Auto-select latest models per provider")
|
|
list := flag.Bool("list", false, "List available scenarios and exit")
|
|
quiet := flag.Bool("quiet", false, "Only show summary, not step-by-step output")
|
|
|
|
flag.Parse()
|
|
|
|
if *list {
|
|
listScenarios()
|
|
return
|
|
}
|
|
|
|
baseConfig := eval.DefaultConfig()
|
|
baseConfig.BaseURL = *url
|
|
baseConfig.Username = *user
|
|
baseConfig.Password = *pass
|
|
baseConfig.Verbose = !*quiet
|
|
|
|
if value, ok := envBool("EVAL_PREFLIGHT"); ok {
|
|
baseConfig.Preflight = value
|
|
}
|
|
if value, ok := envInt("EVAL_PREFLIGHT_TIMEOUT"); ok && value > 0 {
|
|
baseConfig.PreflightTimeout = time.Duration(value) * time.Second
|
|
} else if baseConfig.PreflightTimeout == 0 {
|
|
baseConfig.PreflightTimeout = 15 * time.Second
|
|
}
|
|
|
|
modelList := parseModelList(*models)
|
|
if len(modelList) == 0 && *autoModels {
|
|
autoList, details, stats, err := fetchAutoModels(baseConfig.BaseURL, baseConfig.Username, baseConfig.Password)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "Failed to auto-select models: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
modelList = autoList
|
|
fmt.Printf(">>> Auto-selected models: %s\n", strings.Join(modelList, ", "))
|
|
if len(stats) > 0 {
|
|
fmt.Println(">>> Auto-selection provider summary:")
|
|
providers := sortedProviders(stats)
|
|
for _, provider := range providers {
|
|
stat := stats[provider]
|
|
fmt.Printf(" - %s: %d models (%d notable)\n", provider, stat.Total, stat.Notable)
|
|
}
|
|
}
|
|
if len(details) > 0 {
|
|
fmt.Println(">>> Auto-selection details:")
|
|
for _, detail := range details {
|
|
meta := detail.Reason
|
|
if meta == "" {
|
|
meta = "selected"
|
|
}
|
|
fmt.Printf(" - %s: %s (%s)\n", detail.Provider, detail.ID, meta)
|
|
}
|
|
}
|
|
}
|
|
if len(modelList) == 0 {
|
|
modelList = []string{strings.TrimSpace(*model)}
|
|
}
|
|
if len(modelList) == 0 {
|
|
modelList = []string{""}
|
|
}
|
|
|
|
patrolScenarios := getPatrolScenarios(*scenario)
|
|
scenarios := getScenarios(*scenario)
|
|
if len(patrolScenarios) == 0 && len(scenarios) == 0 {
|
|
fmt.Fprintf(os.Stderr, "Unknown scenario: %s\n", *scenario)
|
|
fmt.Fprintf(os.Stderr, "Use -list to see available scenarios\n")
|
|
os.Exit(1)
|
|
}
|
|
|
|
allPassed := true
|
|
for _, modelID := range modelList {
|
|
config := baseConfig
|
|
config.Model = strings.TrimSpace(modelID)
|
|
|
|
if config.Model != "" {
|
|
fmt.Printf("\n>>> Using model: %s\n", config.Model)
|
|
}
|
|
|
|
if config.Preflight {
|
|
fmt.Printf(">>> Preflight enabled (timeout %s)\n", config.PreflightTimeout)
|
|
}
|
|
|
|
runner := eval.NewRunner(config)
|
|
|
|
if len(patrolScenarios) > 0 {
|
|
modelPassed := true
|
|
for _, ps := range patrolScenarios {
|
|
fmt.Printf("\n>>> Running patrol scenario: %s\n", ps.Name)
|
|
fmt.Printf(">>> %s\n", ps.Description)
|
|
|
|
result := runner.RunPatrolScenario(ps)
|
|
runner.PrintPatrolSummary(result)
|
|
|
|
if !result.Success {
|
|
modelPassed = false
|
|
}
|
|
}
|
|
|
|
if !modelPassed {
|
|
allPassed = false
|
|
}
|
|
continue
|
|
}
|
|
|
|
modelPassed := true
|
|
for _, s := range scenarios {
|
|
fmt.Printf("\n>>> Running scenario: %s\n", s.Name)
|
|
fmt.Printf(">>> %s\n", s.Description)
|
|
|
|
result := runner.RunScenario(s)
|
|
runner.PrintSummary(result)
|
|
|
|
if !result.Passed {
|
|
modelPassed = false
|
|
}
|
|
}
|
|
|
|
if !modelPassed {
|
|
allPassed = false
|
|
}
|
|
}
|
|
|
|
if allPassed {
|
|
fmt.Printf("\n>>> ALL SCENARIOS PASSED\n")
|
|
os.Exit(0)
|
|
} else {
|
|
fmt.Printf("\n>>> SOME SCENARIOS FAILED\n")
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
func listScenarios() {
|
|
fmt.Println("Available scenarios:")
|
|
fmt.Println()
|
|
fmt.Println(" Basic:")
|
|
fmt.Println(" smoke - Quick smoke test (1 step)")
|
|
fmt.Println(" readonly - Read-only infrastructure test (3 steps)")
|
|
fmt.Println(" enforce - Explicit tool enforcement (1 step)")
|
|
fmt.Println(" routing - Routing validation test (2 steps)")
|
|
fmt.Println(" routing-recovery - Routing mismatch recovery (2 steps)")
|
|
fmt.Println(" logs - Log tailing/bounded command test (2 steps)")
|
|
fmt.Println(" readonly-recovery - Read-only violation recovery (1 step)")
|
|
fmt.Println(" search-id - Search then get by resource ID (1 step)")
|
|
fmt.Println(" disambiguate - Ambiguous resource disambiguation (1 step)")
|
|
fmt.Println(" context-target - Context target carryover (2 steps)")
|
|
fmt.Println(" discovery - Infrastructure discovery test (2 steps)")
|
|
fmt.Println()
|
|
fmt.Println(" Guest Control:")
|
|
fmt.Println(" guest-control - Stop + start a guest via @mentions (2 steps)")
|
|
fmt.Println(" guest-idempotent - Idempotent stop (stop twice + start, 3 steps)")
|
|
fmt.Println(" guest-discovery - Stop without @mentions (discovery path, 2 steps)")
|
|
fmt.Println(" guest-natural - Natural language variations (turn off, shut down, 4 steps)")
|
|
fmt.Println(" guest-multi - Multi-mention status query (2 resources, 1 step)")
|
|
fmt.Println()
|
|
fmt.Println(" Safety & Filtering:")
|
|
fmt.Println(" readonly-filtering - Control tools excluded from read-only queries (3 steps)")
|
|
fmt.Println(" read-loop-recovery - Model produces text after budget blocks (2 steps)")
|
|
fmt.Println(" ambiguous-intent - Ambiguous requests default to read-only (3 steps)")
|
|
fmt.Println()
|
|
fmt.Println(" Advanced:")
|
|
fmt.Println(" troubleshoot - Multi-step troubleshooting workflow (4 steps)")
|
|
fmt.Println(" deepdive - Deep investigation of a service (4 steps)")
|
|
fmt.Println(" config - Configuration file inspection (3 steps)")
|
|
fmt.Println(" resources - Resource analysis and comparison (3 steps)")
|
|
fmt.Println(" multinode - Multi-node operations (3 steps)")
|
|
fmt.Println(" docker - Docker-in-LXC operations (3 steps)")
|
|
fmt.Println(" context - Context chain / follow-up questions (4 steps)")
|
|
fmt.Println(" writeverify - Write + verify FSM flow (1 step)")
|
|
fmt.Println(" strict - Strict resolution block + recovery (2 steps)")
|
|
fmt.Println(" strict-block - Strict resolution block only (1 step)")
|
|
fmt.Println(" strict-recovery - Strict resolution recovery (1 step)")
|
|
fmt.Println(" readonly-guardrails - Read-only enforcement (1 step)")
|
|
fmt.Println(" noninteractive - Non-interactive guardrails (1 step)")
|
|
fmt.Println(" approval - Approval flow (1 step, opt-in)")
|
|
fmt.Println(" approval-approve - Approval approve flow (1 step, opt-in)")
|
|
fmt.Println(" approval-deny - Approval deny flow (1 step, opt-in)")
|
|
fmt.Println(" approval-combo - Approval approve + deny in one session (2 steps, opt-in)")
|
|
fmt.Println()
|
|
fmt.Println(" Patrol:")
|
|
fmt.Println(" patrol - Run all patrol scenarios")
|
|
fmt.Println(" patrol-basic - Basic patrol run (completion, tools, findings)")
|
|
fmt.Println(" patrol-investigation - Investigation quality (investigate before report)")
|
|
fmt.Println(" patrol-finding-quality - Finding validation (well-formed findings)")
|
|
fmt.Println(" patrol-signal-coverage - Signal-to-finding coverage scoring")
|
|
fmt.Println()
|
|
fmt.Println(" Collections:")
|
|
fmt.Println(" all - Run all basic scenarios")
|
|
fmt.Println(" matrix - Model matrix quick run (smoke + readonly)")
|
|
fmt.Println(" advanced - Run all advanced scenarios")
|
|
fmt.Println(" full - Run everything")
|
|
fmt.Println()
|
|
fmt.Println("Example:")
|
|
fmt.Println(" go run ./cmd/eval -scenario troubleshoot")
|
|
fmt.Println(" go run ./cmd/eval -scenario patrol-basic")
|
|
}
|
|
|
|
func getPatrolScenarios(name string) []eval.PatrolScenario {
|
|
switch name {
|
|
case "patrol":
|
|
return eval.AllPatrolScenarios()
|
|
case "patrol-basic":
|
|
return []eval.PatrolScenario{eval.PatrolBasicScenario()}
|
|
case "patrol-investigation":
|
|
return []eval.PatrolScenario{eval.PatrolInvestigationScenario()}
|
|
case "patrol-finding-quality":
|
|
return []eval.PatrolScenario{eval.PatrolFindingQualityScenario()}
|
|
case "patrol-signal-coverage", "patrol-quality":
|
|
return []eval.PatrolScenario{eval.PatrolSignalCoverageScenario()}
|
|
default:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func getScenarios(name string) []eval.Scenario {
|
|
switch name {
|
|
// Basic scenarios
|
|
case "smoke":
|
|
return []eval.Scenario{eval.QuickSmokeTest()}
|
|
case "readonly":
|
|
return []eval.Scenario{eval.ReadOnlyInfrastructureScenario()}
|
|
case "enforce":
|
|
return []eval.Scenario{eval.ExplicitToolEnforcementScenario()}
|
|
case "routing":
|
|
return []eval.Scenario{eval.RoutingValidationScenario()}
|
|
case "routing-recovery":
|
|
return []eval.Scenario{eval.RoutingMismatchRecoveryScenario()}
|
|
case "logs":
|
|
return []eval.Scenario{eval.LogTailingScenario()}
|
|
case "readonly-recovery":
|
|
return []eval.Scenario{eval.ReadOnlyViolationRecoveryScenario()}
|
|
case "search-id":
|
|
return []eval.Scenario{eval.SearchByIDScenario()}
|
|
case "disambiguate":
|
|
return []eval.Scenario{eval.AmbiguousResourceDisambiguationScenario()}
|
|
case "context-target":
|
|
return []eval.Scenario{eval.ContextTargetCarryoverScenario()}
|
|
case "discovery":
|
|
return []eval.Scenario{eval.DiscoveryScenario()}
|
|
|
|
// Guest control scenarios
|
|
case "guest-control":
|
|
return []eval.Scenario{eval.GuestControlStopScenario()}
|
|
case "guest-idempotent":
|
|
return []eval.Scenario{eval.GuestControlIdempotentScenario()}
|
|
case "guest-discovery":
|
|
return []eval.Scenario{eval.GuestControlDiscoveryScenario()}
|
|
case "guest-natural":
|
|
return []eval.Scenario{eval.GuestControlNaturalLanguageScenario()}
|
|
case "guest-multi":
|
|
return []eval.Scenario{eval.GuestControlMultiMentionScenario()}
|
|
|
|
// Safety & filtering scenarios
|
|
case "readonly-filtering":
|
|
return []eval.Scenario{eval.ReadOnlyToolFilteringScenario()}
|
|
case "read-loop-recovery":
|
|
return []eval.Scenario{eval.ReadLoopRecoveryScenario()}
|
|
case "ambiguous-intent":
|
|
return []eval.Scenario{eval.AmbiguousIntentScenario()}
|
|
|
|
// Advanced scenarios
|
|
case "troubleshoot":
|
|
return []eval.Scenario{eval.TroubleshootingScenario()}
|
|
case "deepdive":
|
|
return []eval.Scenario{eval.DeepDiveScenario()}
|
|
case "config":
|
|
return []eval.Scenario{eval.ConfigInspectionScenario()}
|
|
case "resources":
|
|
return []eval.Scenario{eval.ResourceAnalysisScenario()}
|
|
case "multinode":
|
|
return []eval.Scenario{eval.MultiNodeScenario()}
|
|
case "docker":
|
|
return []eval.Scenario{eval.DockerInDockerScenario()}
|
|
case "context":
|
|
return []eval.Scenario{eval.ContextChainScenario()}
|
|
case "writeverify":
|
|
return []eval.Scenario{eval.WriteVerifyScenario()}
|
|
case "strict":
|
|
return []eval.Scenario{eval.StrictResolutionScenario()}
|
|
case "strict-block":
|
|
return []eval.Scenario{eval.StrictResolutionBlockScenario()}
|
|
case "strict-recovery":
|
|
return []eval.Scenario{eval.StrictResolutionRecoveryScenario()}
|
|
case "readonly-guardrails":
|
|
return []eval.Scenario{eval.ReadOnlyEnforcementScenario()}
|
|
case "noninteractive":
|
|
return []eval.Scenario{eval.NonInteractiveGuardrailScenario()}
|
|
case "approval":
|
|
return []eval.Scenario{eval.ApprovalScenario()}
|
|
case "approval-approve":
|
|
return []eval.Scenario{eval.ApprovalApproveScenario()}
|
|
case "approval-deny":
|
|
return []eval.Scenario{eval.ApprovalDenyScenario()}
|
|
case "approval-combo":
|
|
return []eval.Scenario{eval.ApprovalComboScenario()}
|
|
|
|
// Collections
|
|
case "all":
|
|
return []eval.Scenario{
|
|
eval.QuickSmokeTest(),
|
|
eval.ReadOnlyInfrastructureScenario(),
|
|
eval.ExplicitToolEnforcementScenario(),
|
|
eval.RoutingValidationScenario(),
|
|
eval.RoutingMismatchRecoveryScenario(),
|
|
eval.LogTailingScenario(),
|
|
eval.ReadOnlyViolationRecoveryScenario(),
|
|
eval.SearchByIDScenario(),
|
|
eval.AmbiguousResourceDisambiguationScenario(),
|
|
eval.ContextTargetCarryoverScenario(),
|
|
eval.DiscoveryScenario(),
|
|
}
|
|
case "matrix":
|
|
return []eval.Scenario{
|
|
eval.QuickSmokeTest(),
|
|
eval.ReadOnlyInfrastructureScenario(),
|
|
}
|
|
case "advanced":
|
|
return []eval.Scenario{
|
|
eval.TroubleshootingScenario(),
|
|
eval.DeepDiveScenario(),
|
|
eval.ConfigInspectionScenario(),
|
|
eval.ResourceAnalysisScenario(),
|
|
eval.MultiNodeScenario(),
|
|
eval.DockerInDockerScenario(),
|
|
eval.ContextChainScenario(),
|
|
eval.WriteVerifyScenario(),
|
|
eval.GuestControlStopScenario(),
|
|
eval.GuestControlIdempotentScenario(),
|
|
eval.GuestControlDiscoveryScenario(),
|
|
eval.GuestControlNaturalLanguageScenario(),
|
|
eval.GuestControlMultiMentionScenario(),
|
|
eval.ReadOnlyToolFilteringScenario(),
|
|
eval.ReadLoopRecoveryScenario(),
|
|
eval.AmbiguousIntentScenario(),
|
|
eval.StrictResolutionScenario(),
|
|
eval.StrictResolutionBlockScenario(),
|
|
eval.StrictResolutionRecoveryScenario(),
|
|
eval.ReadOnlyEnforcementScenario(),
|
|
eval.NonInteractiveGuardrailScenario(),
|
|
eval.ApprovalComboScenario(),
|
|
}
|
|
case "full":
|
|
return []eval.Scenario{
|
|
eval.QuickSmokeTest(),
|
|
eval.ReadOnlyInfrastructureScenario(),
|
|
eval.ExplicitToolEnforcementScenario(),
|
|
eval.RoutingValidationScenario(),
|
|
eval.RoutingMismatchRecoveryScenario(),
|
|
eval.LogTailingScenario(),
|
|
eval.ReadOnlyViolationRecoveryScenario(),
|
|
eval.SearchByIDScenario(),
|
|
eval.AmbiguousResourceDisambiguationScenario(),
|
|
eval.ContextTargetCarryoverScenario(),
|
|
eval.DiscoveryScenario(),
|
|
eval.TroubleshootingScenario(),
|
|
eval.DeepDiveScenario(),
|
|
eval.ConfigInspectionScenario(),
|
|
eval.ResourceAnalysisScenario(),
|
|
eval.MultiNodeScenario(),
|
|
eval.DockerInDockerScenario(),
|
|
eval.ContextChainScenario(),
|
|
eval.WriteVerifyScenario(),
|
|
eval.GuestControlStopScenario(),
|
|
eval.GuestControlIdempotentScenario(),
|
|
eval.GuestControlDiscoveryScenario(),
|
|
eval.GuestControlNaturalLanguageScenario(),
|
|
eval.GuestControlMultiMentionScenario(),
|
|
eval.ReadOnlyToolFilteringScenario(),
|
|
eval.ReadLoopRecoveryScenario(),
|
|
eval.AmbiguousIntentScenario(),
|
|
eval.StrictResolutionScenario(),
|
|
eval.StrictResolutionBlockScenario(),
|
|
eval.StrictResolutionRecoveryScenario(),
|
|
eval.ReadOnlyEnforcementScenario(),
|
|
eval.NonInteractiveGuardrailScenario(),
|
|
eval.ApprovalComboScenario(),
|
|
}
|
|
default:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func envBool(key string) (bool, bool) {
|
|
value, ok := os.LookupEnv(key)
|
|
if !ok {
|
|
return false, false
|
|
}
|
|
switch strings.ToLower(strings.TrimSpace(value)) {
|
|
case "1", "true", "yes", "y", "on":
|
|
return true, true
|
|
case "0", "false", "no", "n", "off":
|
|
return false, true
|
|
default:
|
|
return false, false
|
|
}
|
|
}
|
|
|
|
func envInt(key string) (int, bool) {
|
|
value, ok := os.LookupEnv(key)
|
|
if !ok {
|
|
return 0, false
|
|
}
|
|
var parsed int
|
|
if _, err := fmt.Sscanf(strings.TrimSpace(value), "%d", &parsed); err != nil {
|
|
return 0, false
|
|
}
|
|
return parsed, true
|
|
}
|
|
|
|
func parseModelList(raw string) []string {
|
|
raw = strings.TrimSpace(raw)
|
|
if raw == "" {
|
|
return nil
|
|
}
|
|
parts := strings.Split(raw, ",")
|
|
models := make([]string, 0, len(parts))
|
|
for _, part := range parts {
|
|
trimmed := strings.TrimSpace(part)
|
|
if trimmed == "" {
|
|
continue
|
|
}
|
|
models = append(models, trimmed)
|
|
}
|
|
return models
|
|
}
|
|
|
|
type apiModelInfo struct {
|
|
ID string `json:"id"`
|
|
Name string `json:"name"`
|
|
Description string `json:"description,omitempty"`
|
|
Notable bool `json:"notable"`
|
|
CreatedAt int64 `json:"created_at,omitempty"`
|
|
}
|
|
|
|
type apiModelsResponse struct {
|
|
Models []apiModelInfo `json:"models"`
|
|
Error string `json:"error,omitempty"`
|
|
}
|
|
|
|
type providerStats struct {
|
|
Total int
|
|
Notable int
|
|
}
|
|
|
|
type autoSelectionDetail struct {
|
|
Provider string
|
|
ID string
|
|
Name string
|
|
Notable bool
|
|
CreatedAt int64
|
|
Reason string
|
|
}
|
|
|
|
func fetchAutoModels(baseURL, user, pass string) ([]string, []autoSelectionDetail, map[string]providerStats, error) {
|
|
if strings.TrimSpace(baseURL) == "" {
|
|
return nil, nil, nil, fmt.Errorf("base URL is required")
|
|
}
|
|
|
|
req, err := http.NewRequest("GET", strings.TrimRight(baseURL, "/")+"/api/ai/models", nil)
|
|
if err != nil {
|
|
return nil, nil, nil, fmt.Errorf("failed to build models request: %w", err)
|
|
}
|
|
req.SetBasicAuth(user, pass)
|
|
|
|
client := &http.Client{Timeout: 30 * time.Second}
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, nil, nil, fmt.Errorf("models request failed: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
body, _ := io.ReadAll(resp.Body)
|
|
return nil, nil, nil, fmt.Errorf("models request returned %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
|
|
}
|
|
|
|
var payload apiModelsResponse
|
|
if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
|
|
return nil, nil, nil, fmt.Errorf("failed to decode models response: %w", err)
|
|
}
|
|
if payload.Error != "" {
|
|
return nil, nil, nil, fmt.Errorf("%s", payload.Error)
|
|
}
|
|
|
|
providerFilter := parseProviderFilterWithDefault(os.Getenv("EVAL_MODEL_PROVIDERS"))
|
|
excludeKeywords := parseExcludeKeywords(os.Getenv("EVAL_MODEL_EXCLUDE_KEYWORDS"))
|
|
limit := 2
|
|
if value, ok := envInt("EVAL_MODEL_LIMIT"); ok && value > 0 {
|
|
limit = value
|
|
}
|
|
|
|
grouped := make(map[string][]apiModelInfo)
|
|
stats := make(map[string]providerStats)
|
|
for _, model := range payload.Models {
|
|
if model.ID == "" {
|
|
continue
|
|
}
|
|
parts := strings.SplitN(model.ID, ":", 2)
|
|
provider := parts[0]
|
|
if provider == "" {
|
|
continue
|
|
}
|
|
if len(providerFilter) > 0 && !providerFilter[provider] {
|
|
continue
|
|
}
|
|
if len(excludeKeywords) > 0 && hasAnyKeyword(model, excludeKeywords) {
|
|
continue
|
|
}
|
|
grouped[provider] = append(grouped[provider], model)
|
|
stat := stats[provider]
|
|
stat.Total++
|
|
if model.Notable {
|
|
stat.Notable++
|
|
}
|
|
stats[provider] = stat
|
|
}
|
|
|
|
if len(grouped) == 0 {
|
|
return nil, nil, stats, fmt.Errorf("no models found for auto-selection")
|
|
}
|
|
|
|
providers := make([]string, 0, len(grouped))
|
|
for provider := range grouped {
|
|
providers = append(providers, provider)
|
|
}
|
|
sort.Strings(providers)
|
|
|
|
seen := make(map[string]bool)
|
|
selected := make([]string, 0, len(grouped)*limit)
|
|
details := make([]autoSelectionDetail, 0, len(grouped)*limit)
|
|
for _, provider := range providers {
|
|
models := grouped[provider]
|
|
sort.Slice(models, func(i, j int) bool {
|
|
if models[i].Notable != models[j].Notable {
|
|
return models[i].Notable
|
|
}
|
|
if models[i].CreatedAt != models[j].CreatedAt {
|
|
return models[i].CreatedAt > models[j].CreatedAt
|
|
}
|
|
return models[i].ID < models[j].ID
|
|
})
|
|
for _, model := range models {
|
|
if len(selected) >= len(grouped)*limit {
|
|
break
|
|
}
|
|
if seen[model.ID] {
|
|
continue
|
|
}
|
|
seen[model.ID] = true
|
|
selected = append(selected, model.ID)
|
|
details = append(details, autoSelectionDetail{
|
|
Provider: provider,
|
|
ID: model.ID,
|
|
Name: model.Name,
|
|
Notable: model.Notable,
|
|
CreatedAt: model.CreatedAt,
|
|
Reason: selectionReason(model, stats[provider]),
|
|
})
|
|
if countProvider(selected, provider) >= limit {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(selected) == 0 {
|
|
return nil, nil, stats, fmt.Errorf("auto-selection produced no models")
|
|
}
|
|
return selected, details, stats, nil
|
|
}
|
|
|
|
func parseProviderFilter(raw string) map[string]bool {
|
|
raw = strings.TrimSpace(raw)
|
|
if raw == "" {
|
|
return nil
|
|
}
|
|
out := make(map[string]bool)
|
|
for _, part := range strings.Split(raw, ",") {
|
|
trimmed := strings.TrimSpace(part)
|
|
if trimmed == "" {
|
|
continue
|
|
}
|
|
out[trimmed] = true
|
|
}
|
|
return out
|
|
}
|
|
|
|
func parseProviderFilterWithDefault(raw string) map[string]bool {
|
|
raw = strings.TrimSpace(raw)
|
|
if raw == "" {
|
|
return map[string]bool{
|
|
"openai": true,
|
|
"anthropic": true,
|
|
"deepseek": true,
|
|
"gemini": true,
|
|
"ollama": true,
|
|
}
|
|
}
|
|
return parseProviderFilter(raw)
|
|
}
|
|
|
|
func parseExcludeKeywords(raw string) []string {
|
|
raw = strings.TrimSpace(raw)
|
|
if raw == "" {
|
|
return []string{
|
|
"codex",
|
|
"openai:gpt-5.2-pro",
|
|
"image",
|
|
"vision",
|
|
"video",
|
|
"audio",
|
|
"speech",
|
|
"embed",
|
|
"embedding",
|
|
"moderation",
|
|
"rerank",
|
|
"tts",
|
|
"realtime",
|
|
"transcribe",
|
|
}
|
|
}
|
|
switch strings.ToLower(raw) {
|
|
case "0", "false", "off", "none":
|
|
return nil
|
|
}
|
|
parts := strings.Split(raw, ",")
|
|
out := make([]string, 0, len(parts))
|
|
for _, part := range parts {
|
|
trimmed := strings.TrimSpace(part)
|
|
if trimmed == "" {
|
|
continue
|
|
}
|
|
out = append(out, strings.ToLower(trimmed))
|
|
}
|
|
return out
|
|
}
|
|
|
|
func hasAnyKeyword(model apiModelInfo, keywords []string) bool {
|
|
if len(keywords) == 0 {
|
|
return false
|
|
}
|
|
target := strings.ToLower(model.ID + " " + model.Name + " " + model.Description)
|
|
for _, keyword := range keywords {
|
|
if keyword == "" {
|
|
continue
|
|
}
|
|
if strings.Contains(target, keyword) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func countProvider(models []string, provider string) int {
|
|
if provider == "" {
|
|
return 0
|
|
}
|
|
count := 0
|
|
prefix := provider + ":"
|
|
for _, model := range models {
|
|
if strings.HasPrefix(model, prefix) {
|
|
count++
|
|
}
|
|
}
|
|
return count
|
|
}
|
|
|
|
func selectionReason(model apiModelInfo, stat providerStats) string {
|
|
parts := make([]string, 0, 2)
|
|
if stat.Notable == 0 {
|
|
parts = append(parts, "no notable models")
|
|
}
|
|
if model.Notable {
|
|
parts = append(parts, "notable")
|
|
} else if model.CreatedAt > 0 {
|
|
created := time.Unix(model.CreatedAt, 0).UTC().Format("2006-01-02")
|
|
parts = append(parts, "created_at="+created)
|
|
} else {
|
|
parts = append(parts, "fallback")
|
|
}
|
|
return strings.Join(parts, "; ")
|
|
}
|
|
|
|
func sortedProviders(stats map[string]providerStats) []string {
|
|
providers := make([]string, 0, len(stats))
|
|
for provider := range stats {
|
|
providers = append(providers, provider)
|
|
}
|
|
sort.Strings(providers)
|
|
return providers
|
|
}
|