Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions e2e/distributed_latency_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ func TestDistributedLatencyTracking(t *testing.T) {
redisClient := goredis.NewClient(&goredis.Options{
Addr: mr.Addr(),
})
defer redisClient.Close()
defer func() { _ = redisClient.Close() }()

logger, _ := zap.NewDevelopment()

Expand Down Expand Up @@ -129,7 +129,7 @@ func TestMultiPodFailover(t *testing.T) {
redisClient := goredis.NewClient(&goredis.Options{
Addr: mr.Addr(),
})
defer redisClient.Close()
defer func() { _ = redisClient.Close() }()

logger, _ := zap.NewDevelopment()
tracker := redis.NewLatencyTracker(redisClient, logger)
Expand Down Expand Up @@ -159,7 +159,7 @@ func TestMultiPodFailover(t *testing.T) {

// Verify we can identify the fastest model
var fastestModel string
var fastestLatency time.Duration = 1 * time.Hour
fastestLatency := 1 * time.Hour

for model, stats := range allStats {
t.Logf("Model: %s, Avg: %v, P95: %v", model, stats.Average, stats.P95)
Expand All @@ -184,7 +184,7 @@ func TestConcurrentLatencyUpdates(t *testing.T) {
redisClient := goredis.NewClient(&goredis.Options{
Addr: mr.Addr(),
})
defer redisClient.Close()
defer func() { _ = redisClient.Close() }()

logger := zap.NewNop() // Silence logs for concurrency test
tracker := redis.NewLatencyTracker(redisClient, logger)
Expand Down Expand Up @@ -248,7 +248,7 @@ func TestLatencyBasedRouting(t *testing.T) {
redisClient := goredis.NewClient(&goredis.Options{
Addr: mr.Addr(),
})
defer redisClient.Close()
defer func() { _ = redisClient.Close() }()

logger, _ := zap.NewDevelopment()

Expand Down Expand Up @@ -313,7 +313,7 @@ func TestHealthScoreCalculation(t *testing.T) {
redisClient := goredis.NewClient(&goredis.Options{
Addr: mr.Addr(),
})
defer redisClient.Close()
defer func() { _ = redisClient.Close() }()

logger, _ := zap.NewDevelopment()
tracker := redis.NewLatencyTracker(redisClient, logger)
Expand Down
3 changes: 2 additions & 1 deletion internal/api/handlers/admin/others.go
Original file line number Diff line number Diff line change
Expand Up @@ -958,10 +958,11 @@ func (h *SystemHandler) GetAuthConfig(w http.ResponseWriter, r *http.Request) {

func (h *SystemHandler) GetConfig(w http.ResponseWriter, r *http.Request) {
cfg := config.Get()

// Build router configuration
routerConfig := map[string]interface{}{
"routing_strategy": cfg.Router.RoutingStrategy,
"fallbacks": cfg.Router.Fallbacks,
}

h.sendJSON(w, http.StatusOK, map[string]interface{}{
Expand Down
140 changes: 128 additions & 12 deletions internal/api/handlers/dashboard.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,16 @@ type DashboardMetrics struct {
}

type ModelUsage struct {
Model string `json:"model"`
Requests int64 `json:"requests"`
Tokens int64 `json:"tokens"`
Cost float64 `json:"cost"`
AvgLatency int64 `json:"avg_latency"`
SuccessRate float64 `json:"success_rate"`
Model string `json:"model"`
Requests int64 `json:"requests"`
Tokens int64 `json:"tokens"`
Cost float64 `json:"cost"`
AvgLatency int64 `json:"avg_latency"`
SuccessRate float64 `json:"success_rate"`
HealthScore float64 `json:"health_score"`
P95Latency float64 `json:"p95_latency"`
P99Latency float64 `json:"p99_latency"`
CacheHitRate float64 `json:"cache_hit_rate"`
}

func (h *DashboardHandler) GetDashboardMetrics(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -139,17 +143,20 @@ func (h *DashboardHandler) GetDashboardMetrics(w http.ResponseWriter, r *http.Re
h.logger.Error("Failed to get 1h stats", zap.Error(err))
}

// Get top models by requests (last 24h)
// Get top models by requests (last 24h) with enhanced metrics
var topModels []ModelUsage
err = h.db.Raw(`
SELECT
SELECT
model,
COUNT(*) as requests,
SUM(total_tokens) as tokens,
SUM(total_cost) as cost,
ROUND(AVG(latency)) as avg_latency,
ROUND(AVG(CASE WHEN status_code = 200 THEN 100 ELSE 0 END), 2) as success_rate
FROM usage_logs
ROUND(AVG(CASE WHEN status_code = 200 THEN 100 ELSE 0 END), 2) as success_rate,
ROUND(AVG(CASE WHEN cache_hit THEN 100 ELSE 0 END), 2) as cache_hit_rate,
PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY latency) as p95_latency,
PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY latency) as p99_latency
FROM usage_logs
WHERE timestamp >= ?
GROUP BY model
ORDER BY requests DESC
Expand All @@ -159,6 +166,14 @@ func (h *DashboardHandler) GetDashboardMetrics(w http.ResponseWriter, r *http.Re
if err != nil {
h.logger.Error("Failed to get top models", zap.Error(err))
} else {
// Calculate health scores based on latency and success rate
for i := range topModels {
topModels[i].HealthScore = calculateHealthScore(
topModels[i].AvgLatency,
topModels[i].SuccessRate,
topModels[i].P99Latency,
)
}
metrics.TopModels = topModels
}

Expand Down Expand Up @@ -220,7 +235,7 @@ func (h *DashboardHandler) GetUsageTrends(w http.ResponseWriter, r *http.Request
if days == "" {
days = "30"
}

var daysInt int
switch days {
case "7":
Expand All @@ -241,7 +256,7 @@ func (h *DashboardHandler) GetUsageTrends(w http.ResponseWriter, r *http.Request
}

err := h.db.Raw(`
SELECT
SELECT
DATE(timestamp) as date,
COUNT(*) as requests,
SUM(total_tokens) as tokens,
Expand All @@ -263,4 +278,105 @@ func (h *DashboardHandler) GetUsageTrends(w http.ResponseWriter, r *http.Request
h.logger.Error("Failed to encode usage trends", zap.Error(err))
http.Error(w, "Failed to encode response", http.StatusInternalServerError)
}
}

func (h *DashboardHandler) GetModelTrends(w http.ResponseWriter, r *http.Request) {
modelName := chi.URLParam(r, "model")
if modelName == "" {
http.Error(w, "Model name is required", http.StatusBadRequest)
return
}

days := r.URL.Query().Get("days")
if days == "" {
days = "30"
}

var daysInt int
switch days {
case "7":
daysInt = 7
case "30":
daysInt = 30
default:
daysInt = 30
}

since := time.Now().AddDate(0, 0, -daysInt)

var trends []struct {
Date string `json:"date"`
Requests int64 `json:"requests"`
Tokens int64 `json:"tokens"`
Cost float64 `json:"cost"`
AvgLatency int64 `json:"avg_latency"`
SuccessRate float64 `json:"success_rate"`
}

err := h.db.Raw(`
SELECT
DATE(timestamp) as date,
COUNT(*) as requests,
SUM(total_tokens) as tokens,
SUM(total_cost) as cost,
ROUND(AVG(latency)) as avg_latency,
ROUND(AVG(CASE WHEN status_code = 200 THEN 100 ELSE 0 END), 2) as success_rate
FROM usage_logs
WHERE model = ? AND timestamp >= ?
GROUP BY DATE(timestamp)
ORDER BY date ASC
`, modelName, since).Scan(&trends).Error

if err != nil {
h.logger.Error("Failed to get model trends", zap.String("model", modelName), zap.Error(err))
http.Error(w, "Failed to get model trends", http.StatusInternalServerError)
return
}

w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(trends); err != nil {
h.logger.Error("Failed to encode model trends", zap.Error(err))
http.Error(w, "Failed to encode response", http.StatusInternalServerError)
}
}

// calculateHealthScore computes a health score (0-100) based on latency and success rate
// Formula: Base score from success rate, penalties for high latency
func calculateHealthScore(avgLatency int64, successRate float64, p99Latency float64) float64 {
// Start with success rate as base (0-100)
score := successRate

// Penalty for average latency
// Excellent: < 500ms (no penalty)
// Good: 500ms-1s (small penalty)
// Degraded: 1s-3s (medium penalty)
// Poor: > 3s (large penalty)
if avgLatency > 5000 {
score -= 30
} else if avgLatency > 3000 {
score -= 20
} else if avgLatency > 1000 {
score -= 10
} else if avgLatency > 500 {
score -= 5
}

// Additional penalty for high p99 latency (tail latency)
if p99Latency > 10000 {
score -= 15
} else if p99Latency > 5000 {
score -= 10
} else if p99Latency > 2000 {
score -= 5
}

// Ensure score stays within bounds
if score < 0 {
score = 0
}
if score > 100 {
score = 100
}

return score
}
Loading