fix(ai): filter DeepSeek DSML internal function-call format from responses

2026-02-18 00:17:39 +01:00 · 2026-02-01 18:07:41 +00:00
parent 78cd64338a
commit 71e00ee7df
4 changed files with 145 additions and 28 deletions
--- a/internal/ai/chat/agentic.go
+++ b/internal/ai/chat/agentic.go
@@ -267,6 +267,18 @@ func (a *AgenticLoop) executeWithTools(ctx context.Context, sessionID string, me
 			switch event.Type {
 			case "content":
 				if data, ok := event.Data.(providers.ContentEvent); ok {
+					// Check for DeepSeek DSML marker - if detected, stop streaming this chunk
+					// The DSML format indicates the model is outputting internal function call
+					// formatting instead of using the proper tool calling API
+					if containsDeepSeekMarker(data.Text) {
+						// Don't append or stream this content
+						return
+					}
+					// Also check if the accumulated content already has the marker
+					// (in case it arrived in a previous chunk)
+					if containsDeepSeekMarker(contentBuilder.String()) {
+						return
+					}
 					contentBuilder.WriteString(data.Text)
 					// Forward to callback - send ContentData struct
 					jsonData, _ := json.Marshal(ContentData{Text: data.Text})
@@ -388,10 +400,12 @@ func (a *AgenticLoop) executeWithTools(ctx context.Context, sessionID string, me
 		}

 		// Create assistant message
+		// Clean DeepSeek artifacts from the content before storing
+		cleanedContent := cleanDeepSeekArtifacts(contentBuilder.String())
 		assistantMsg := Message{
 			ID:               uuid.New().String(),
 			Role:             "assistant",
-			Content:          contentBuilder.String(),
+			Content:          cleanedContent,
 			ReasoningContent: thinkingBuilder.String(),
 			Timestamp:        time.Now(),
 		}
@@ -1225,7 +1239,7 @@ func (a *AgenticLoop) ensureFinalTextResponse(
 		summaryMsg := Message{
 			ID:        uuid.New().String(),
 			Role:      "assistant",
-			Content:   summaryBuilder.String(),
+			Content:   cleanDeepSeekArtifacts(summaryBuilder.String()),
 			Timestamp: time.Now(),
 		}
 		resultMessages = append(resultMessages, summaryMsg)
@@ -2226,3 +2240,49 @@ func formatKeyParams(input map[string]interface{}) string {

 	return strings.Join(parts, ", ")
 }
+
+// cleanDeepSeekArtifacts removes DeepSeek's internal tool call format leakage.
+// When DeepSeek doesn't properly use the function calling API, it may output
+// its internal markup like <｜DSML｜function_calls>, <｜DSML｜invoke>, etc.
+// These patterns can appear with Unicode pipe (｜) or ASCII pipe (|).
+// This is applied to chat responses to prevent the artifacts from being shown to users.
+func cleanDeepSeekArtifacts(content string) string {
+	if content == "" {
+		return content
+	}
+
+	// DeepSeek internal function call format markers
+	markers := []string{
+		"<｜DSML｜",  // Unicode pipe variant (opening)
+		"</｜DSML｜", // Unicode pipe variant (closing)
+		"<|DSML|",  // ASCII pipe variant (opening)
+		"</|DSML|", // ASCII pipe variant (closing)
+		"<｜/DSML｜", // Alternative Unicode closing
+		"<|/DSML|", // Alternative ASCII closing
+	}
+
+	for _, marker := range markers {
+		if idx := strings.Index(content, marker); idx >= 0 {
+			// DeepSeek function call blocks typically appear at the end of responses
+			// Remove everything from the marker to the end
+			content = strings.TrimSpace(content[:idx])
+		}
+	}
+
+	return content
+}
+
+// containsDeepSeekMarker checks if the content contains any DeepSeek internal function call markers.
+// This is used during streaming to detect when we should stop forwarding content.
+func containsDeepSeekMarker(content string) bool {
+	markers := []string{
+		"<｜DSML｜", // Unicode pipe variant
+		"<|DSML|", // ASCII pipe variant
+	}
+	for _, marker := range markers {
+		if strings.Contains(content, marker) {
+			return true
+		}
+	}
+	return false
+}
--- a/internal/ai/chat/service.go
+++ b/internal/ai/chat/service.go
@@ -328,6 +328,12 @@ func (s *Service) ExecuteStream(ctx context.Context, req ExecuteRequest, callbac
 		configuredModel = strings.TrimSpace(s.cfg.GetChatModel())
 	}
 	s.mu.RUnlock()
+
+	// Per-request autonomous mode override (used by investigation to avoid
+	// mutating shared service state from concurrent goroutines).
+	if req.AutonomousMode != nil {
+		autonomousMode = *req.AutonomousMode
+	}
 	selectedModel = configuredModel
 	if overrideModel != "" {
 		selectedModel = overrideModel
@@ -481,6 +487,11 @@ func (s *Service) ExecuteStream(ctx context.Context, req ExecuteRequest, callbac
 		defer loop.SetMaxTurns(MaxAgenticTurns)
 	}

+	// Apply per-request autonomous mode to the loop. For investigation requests
+	// with AutonomousMode set, this uses the per-request value instead of
+	// mutating shared service state from concurrent goroutines.
+	loop.SetAutonomousMode(autonomousMode)
+
 	resultMessages, err := loop.ExecuteWithTools(ctx, session.ID, messages, filteredTools, callback)

 	log.Debug().
--- a/internal/ai/chat/types.go
+++ b/internal/ai/chat/types.go
@@ -67,11 +67,12 @@ type StructuredMention struct {

 // ExecuteRequest represents a chat execution request
 type ExecuteRequest struct {
-	Prompt    string              `json:"prompt"`
-	SessionID string              `json:"session_id,omitempty"`
-	Model     string              `json:"model,omitempty"`
-	Mentions  []StructuredMention `json:"mentions,omitempty"`
-	MaxTurns  int                 `json:"max_turns,omitempty"` // Override max agentic turns (0 = use default)
+	Prompt         string              `json:"prompt"`
+	SessionID      string              `json:"session_id,omitempty"`
+	Model          string              `json:"model,omitempty"`
+	Mentions       []StructuredMention `json:"mentions,omitempty"`
+	MaxTurns       int                 `json:"max_turns,omitempty"`       // Override max agentic turns (0 = use default)
+	AutonomousMode *bool               `json:"autonomous_mode,omitempty"` // Per-request autonomous override (nil = use service default)
 }

 // QuestionAnswer represents a user's answer to a question
--- a/internal/ai/patrol_ai_test.go
+++ b/internal/ai/patrol_ai_test.go
@@ -8,7 +8,7 @@ import (

 func TestCleanThinkingTokens_DeepSeek(t *testing.T) {
 	input := "Some analysis\n<｜end▁of▁thinking｜>\nActual content here"
-	result := cleanThinkingTokens(input)
+	result := CleanThinkingTokens(input)
 	if containsSubstr(result, "end▁of▁thinking") {
 		t.Errorf("DeepSeek thinking marker should be removed, got: %s", result)
 	}
@@ -34,22 +34,22 @@ After comprehensive analysis of your infrastructure, I identified several issues

 1. **Critical CPU overload on Tower host**`

-	result := cleanThinkingTokens(input)
+	result := CleanThinkingTokens(input)

 	if containsSubstr(result, "<｜end▁of▁thinking｜>") {
-		t.Errorf("cleanThinkingTokens() should have removed DeepSeek thinking markers")
+		t.Errorf("CleanThinkingTokens() should have removed DeepSeek thinking markers")
 	}
 	if containsSubstr(result, "Now, also consider") || containsSubstr(result, "Let's add an info") {
-		t.Errorf("cleanThinkingTokens() should have removed internal reasoning")
+		t.Errorf("CleanThinkingTokens() should have removed internal reasoning")
 	}
 	if !containsSubstr(result, "## Analysis Summary") {
-		t.Errorf("cleanThinkingTokens() removed header")
+		t.Errorf("CleanThinkingTokens() removed header")
 	}
 	if !containsSubstr(result, "### Key Findings") {
-		t.Errorf("cleanThinkingTokens() removed findings section")
+		t.Errorf("CleanThinkingTokens() removed findings section")
 	}
 	if !containsSubstr(result, "Critical CPU overload") {
-		t.Errorf("cleanThinkingTokens() removed actual finding")
+		t.Errorf("CleanThinkingTokens() removed actual finding")
 	}
 }

@@ -60,10 +60,10 @@ Now, let's check something.

 ## Real Content`

-	result := cleanThinkingTokens(input)
+	result := CleanThinkingTokens(input)

 	if result != "## Real Content" {
-		t.Errorf("cleanThinkingTokens() failed for ASCII variant: got %q", result)
+		t.Errorf("CleanThinkingTokens() failed for ASCII variant: got %q", result)
 	}
 }

@@ -71,7 +71,7 @@ Now, let's check something.

 func TestCleanThinkingTokens_ThinkBlock(t *testing.T) {
 	input := "Before content\n<think>This is internal reasoning\nthat spans multiple lines</think>\nAfter content"
-	result := cleanThinkingTokens(input)
+	result := CleanThinkingTokens(input)
 	if containsSubstr(result, "internal reasoning") {
 		t.Errorf("<think> block content should be removed, got: %s", result)
 	}
@@ -85,7 +85,7 @@ func TestCleanThinkingTokens_ThinkBlock(t *testing.T) {

 func TestCleanThinkingTokens_ThoughtBlock(t *testing.T) {
 	input := "Start\n<thought>Some deep thought process here</thought>\nEnd"
-	result := cleanThinkingTokens(input)
+	result := CleanThinkingTokens(input)
 	if containsSubstr(result, "deep thought") {
 		t.Errorf("<thought> block content should be removed, got: %s", result)
 	}
@@ -99,7 +99,7 @@ func TestCleanThinkingTokens_ThoughtBlock(t *testing.T) {

 func TestCleanThinkingTokens_ReasoningBlock(t *testing.T) {
 	input := "Start\n<|reasoning|>Internal reasoning here<|/reasoning|>\nEnd"
-	result := cleanThinkingTokens(input)
+	result := CleanThinkingTokens(input)
 	if containsSubstr(result, "Internal reasoning") {
 		t.Errorf("<|reasoning|> block content should be removed, got: %s", result)
 	}
@@ -113,7 +113,7 @@ func TestCleanThinkingTokens_ReasoningBlock(t *testing.T) {

 func TestCleanThinkingTokens_CaseInsensitiveBlocks(t *testing.T) {
 	input := "Before\n<THINK>uppercase thinking</THINK>\nAfter"
-	result := cleanThinkingTokens(input)
+	result := CleanThinkingTokens(input)
 	if containsSubstr(result, "uppercase thinking") {
 		t.Errorf("Case-insensitive <THINK> block should be removed, got: %s", result)
 	}
@@ -124,7 +124,7 @@ func TestCleanThinkingTokens_CaseInsensitiveBlocks(t *testing.T) {

 func TestCleanThinkingTokens_MultipleBlocks(t *testing.T) {
 	input := "<think>first block</think>content between<think>second block</think>final content"
-	result := cleanThinkingTokens(input)
+	result := CleanThinkingTokens(input)
 	if containsSubstr(result, "first block") || containsSubstr(result, "second block") {
 		t.Errorf("All think blocks should be removed, got: %s", result)
 	}
@@ -135,7 +135,7 @@ func TestCleanThinkingTokens_MultipleBlocks(t *testing.T) {

 func TestCleanThinkingTokens_UnclosedBlock(t *testing.T) {
 	input := "Start content<think>unclosed block with no end"
-	result := cleanThinkingTokens(input)
+	result := CleanThinkingTokens(input)
 	if !containsSubstr(result, "Start content") {
 		t.Errorf("Content before unclosed block should be preserved, got: %s", result)
 	}
@@ -157,14 +157,14 @@ Now, I need to look at memory.

 - Issue 1`

-	result := cleanThinkingTokens(input)
+	result := CleanThinkingTokens(input)

 	if !containsSubstr(result, "## Analysis") || !containsSubstr(result, "### Findings") || !containsSubstr(result, "- Issue 1") {
-		t.Errorf("cleanThinkingTokens() removed too much: got %q", result)
+		t.Errorf("CleanThinkingTokens() removed too much: got %q", result)
 	}

 	if containsSubstr(result, "Let's check") || containsSubstr(result, "Now, I need") {
-		t.Errorf("cleanThinkingTokens() should have removed reasoning: got %q", result)
+		t.Errorf("CleanThinkingTokens() should have removed reasoning: got %q", result)
 	}
 }

@@ -180,14 +180,14 @@ This is a normal response without any thinking tokens.
 1. Issue one
 2. Issue two`

-	result := cleanThinkingTokens(input)
+	result := CleanThinkingTokens(input)
 	if result != input {
-		t.Errorf("cleanThinkingTokens() modified clean content:\nGot: %q\nExpected: %q", result, input)
+		t.Errorf("CleanThinkingTokens() modified clean content:\nGot: %q\nExpected: %q", result, input)
 	}
 }

 func TestCleanThinkingTokens_EmptyContent(t *testing.T) {
-	result := cleanThinkingTokens("")
+	result := CleanThinkingTokens("")
 	if result != "" {
 		t.Errorf("Empty string should return empty, got: %q", result)
 	}
@@ -203,3 +203,48 @@ func containsSubstr(s, substr string) bool {
 	}
 	return false
 }
+
+func TestCleanThinkingTokens_DeepSeekDSMLFormat(t *testing.T) {
+	// This test verifies that DeepSeek's internal function call format is removed.
+	// When DeepSeek doesn't properly use the function calling API, it outputs
+	// its internal markup like <｜DSML｜function_calls> into visible responses.
+	input := `I've successfully updated the services.yaml file to remove the InfluxDB entry. Let me verify the change:
+
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="pulse_read">
+<｜DSML｜parameter name="action" string="true">exec</｜DSML｜parameter>
+<｜DSML｜parameter name="command" string="true">grep -A 10 "Monitoring:" /opt/homepage/config/services.yaml</｜DSML｜parameter>
+<｜DSML｜parameter name="target_host" string="true">homepage-docker</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls>`
+
+	result := CleanThinkingTokens(input)
+
+	expected := "I've successfully updated the services.yaml file to remove the InfluxDB entry. Let me verify the change:"
+	if result != expected {
+		t.Errorf("CleanThinkingTokens() failed to remove DeepSeek DSML format:\nGot: %q\nExpected: %q", result, expected)
+	}
+	if containsSubstr(result, "DSML") {
+		t.Errorf("CleanThinkingTokens() should have removed all DSML markers, got: %s", result)
+	}
+	if containsSubstr(result, "pulse_read") {
+		t.Errorf("CleanThinkingTokens() should have removed tool call content, got: %s", result)
+	}
+}
+
+func TestCleanThinkingTokens_DeepSeekDSMLFormatASCII(t *testing.T) {
+	// Test the ASCII variant of DSML markers
+	input := `Command completed successfully.
+
+<|DSML|function_calls>
+<|DSML|invoke name="pulse_query">
+<|DSML|parameter name="action">health</|DSML|parameter>
+</|DSML|invoke>
+</|DSML|function_calls>`
+
+	result := CleanThinkingTokens(input)
+	expected := "Command completed successfully."
+	if result != expected {
+		t.Errorf("CleanThinkingTokens() failed to remove ASCII DSML format:\nGot: %q\nExpected: %q", result, expected)
+	}
+}