diff --git a/server/custom_models_test.go b/server/custom_models_test.go index ebc72d61..2a68b257 100644 --- a/server/custom_models_test.go +++ b/server/custom_models_test.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "encoding/json" + "fmt" "net/http" "net/http/httptest" "os" @@ -14,6 +15,172 @@ import ( "shelley.exe.dev/llm/ant" ) +// mockAnthropicSSE returns an SSE stream with thinking blocks followed by text. +func mockAnthropicSSE(thinkingText, responseText string) string { + return fmt.Sprintf(`event: message_start +data: {"type":"message_start","message":{"id":"msg_test","type":"message","role":"assistant","model":"claude-test","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"output_tokens":0}}} + +event: content_block_start +data: {"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"%s"}} + +event: content_block_stop +data: {"type":"content_block_stop","index":0} + +event: content_block_start +data: {"type":"content_block_start","index":1,"content_block":{"type":"text","text":""}} + +event: content_block_delta +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"%s"}} + +event: content_block_stop +data: {"type":"content_block_stop","index":1} + +event: message_delta +data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":20}} + +event: message_stop +data: {"type":"message_stop"} + +`, thinkingText, responseText) +} + +// TestCustomModelTestEndpointWithMockAnthropic verifies that the model test +// endpoint correctly handles Anthropic responses with thinking blocks. +func TestCustomModelTestEndpointWithMockAnthropic(t *testing.T) { + mockSSE := mockAnthropicSSE("I need to think about this...", "test successful") + + // Mock Anthropic API server + mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/event-stream") + fmt.Fprint(w, mockSSE) + })) + defer mockServer.Close() + + h := NewTestHarness(t) + + testReq := struct { + ProviderType string `json:"provider_type"` + APIKey string `json:"api_key"` + Endpoint string `json:"endpoint"` + ModelName string `json:"model_name"` + }{ + ProviderType: "anthropic", + APIKey: "sk-test-fake", + Endpoint: mockServer.URL + "/v1/messages", + ModelName: "claude-test", + } + + body, _ := json.Marshal(testReq) + req := httptest.NewRequest(http.MethodPost, "/api/custom-models/test", bytes.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + + h.server.handleTestModel(w, req) + + if w.Code != http.StatusOK { + t.Fatalf("Expected status 200, got %d: %s", w.Code, w.Body.String()) + } + + var result map[string]interface{} + if err := json.Unmarshal(w.Body.Bytes(), &result); err != nil { + t.Fatalf("Failed to parse response: %v", err) + } + + success, ok := result["success"].(bool) + if !ok || !success { + t.Fatalf("Expected success=true, got: %v (message: %v)", result["success"], result["message"]) + } + + message, ok := result["message"].(string) + if !ok { + t.Fatal("Response missing message field") + } + + if message == "" || message == "Test failed: empty response from model" { + t.Fatal("Got empty response error despite valid mock API returning text after thinking block") + } + + t.Logf("SUCCESS: Mock Anthropic response with thinking handled correctly: %s", message) +} + +// TestCustomModelTestEndpointOnlyThinkingBlocks verifies that the model test +// endpoint correctly rejects responses with only thinking blocks (no text). +func TestCustomModelTestEndpointOnlyThinkingBlocks(t *testing.T) { + mockSSE := `event: message_start +data: {"type":"message_start","message":{"id":"msg_test","type":"message","role":"assistant","model":"claude-test","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"output_tokens":0}}} + +event: content_block_start +data: {"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"Hmm, let me think about this..."}} + +event: content_block_stop +data: {"type":"content_block_stop","index":0} + +event: message_delta +data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":10}} + +event: message_stop +data: {"type":"message_stop"} +` + + mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/event-stream") + fmt.Fprint(w, mockSSE) + })) + defer mockServer.Close() + + h := NewTestHarness(t) + + testReq := struct { + ProviderType string `json:"provider_type"` + APIKey string `json:"api_key"` + Endpoint string `json:"endpoint"` + ModelName string `json:"model_name"` + }{ + ProviderType: "anthropic", + APIKey: "sk-test-fake", + Endpoint: mockServer.URL + "/v1/messages", + ModelName: "claude-test", + } + + body, _ := json.Marshal(testReq) + req := httptest.NewRequest(http.MethodPost, "/api/custom-models/test", bytes.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + + h.server.handleTestModel(w, req) + + if w.Code != http.StatusOK { + t.Fatalf("Expected status 200, got %d: %s", w.Code, w.Body.String()) + } + + var result map[string]interface{} + if err := json.Unmarshal(w.Body.Bytes(), &result); err != nil { + t.Fatalf("Failed to parse response: %v", err) + } + + success, ok := result["success"].(bool) + if ok && success { + t.Fatal("Expected success=false for response with only thinking blocks") + } + + message, ok := result["message"].(string) + if !ok { + t.Fatal("Response missing message field") + } + + if message != "Test failed: empty response from model" { + t.Fatalf("Expected 'empty response' error, got: %s", message) + } + + t.Logf("SUCCESS: Response with only thinking blocks correctly rejected: %s", message) +} + // TestCustomModelWithThinking tests that the custom model test endpoint // correctly handles responses from Anthropic models with ThinkingLevel enabled. // When thinking is enabled, the first content block is a thinking block, not text.