-
Notifications
You must be signed in to change notification settings - Fork 76
Added the ability for plugins to receive the request headers and modify them #760
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ab54af9
4aa87db
0a1093d
840f7d4
8fd2ca5
b7224e0
e5846d1
796f5b5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -70,6 +70,7 @@ func (s *StreamingServer) HandleRequestBody( | |
Model: model, | ||
ResolvedTargetModel: modelName, | ||
Critical: modelObj.Spec.Criticality != nil && *modelObj.Spec.Criticality == v1alpha2.Critical, | ||
Headers: reqCtx.RequestHeaders, | ||
Prompt: prompt, | ||
} | ||
logger.V(logutil.DEBUG).Info("LLM request assembled", "request", llmReq) | ||
|
@@ -109,7 +110,7 @@ func (s *StreamingServer) HandleRequestBody( | |
reqCtx.TargetPod = targetPod.NamespacedName.String() | ||
reqCtx.TargetEndpoint = endpoint | ||
|
||
s.populateRequestHeaderResponse(reqCtx, endpoint, len(requestBodyBytes)) | ||
s.populateRequestHeaderResponse(reqCtx, endpoint, len(requestBodyBytes), res.MutatedHeaders) | ||
|
||
reqCtx.reqBodyResp = &extProcPb.ProcessingResponse{ | ||
// The Endpoint Picker supports two approaches to communicating the target endpoint, as a request header | ||
|
@@ -151,7 +152,12 @@ func (s *StreamingServer) HandleRequestHeaders(ctx context.Context, reqCtx *Requ | |
return err | ||
} | ||
endpoint := pod.Address + ":" + strconv.Itoa(int(pool.Spec.TargetPortNumber)) | ||
s.populateRequestHeaderResponse(reqCtx, endpoint, 0) | ||
s.populateRequestHeaderResponse(reqCtx, endpoint, 0, nil) | ||
} | ||
|
||
for _, header := range req.RequestHeaders.Headers.Headers { | ||
reqCtx.RequestHeaders[header.Key] = header.Value | ||
} | ||
|
||
Comment on lines
+158
to
+161
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this part was already merged into main |
||
return nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -86,6 +86,8 @@ type RequestContext struct { | |
RequestState StreamRequestState | ||
modelServerStreaming bool | ||
|
||
RequestHeaders map[string]string | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is also available already in main but in different field - |
||
|
||
reqHeaderResp *extProcPb.ProcessingResponse | ||
reqBodyResp *extProcPb.ProcessingResponse | ||
reqTrailerResp *extProcPb.ProcessingResponse | ||
|
@@ -117,7 +119,8 @@ func (s *StreamingServer) Process(srv extProcPb.ExternalProcessor_ProcessServer) | |
// Create request context to share states during life time of an HTTP request. | ||
// See https://github.com/envoyproxy/envoy/issues/17540. | ||
reqCtx := &RequestContext{ | ||
RequestState: RequestReceived, | ||
RequestState: RequestReceived, | ||
RequestHeaders: make(map[string]string), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. already in main in |
||
} | ||
|
||
var body []byte | ||
|
@@ -358,7 +361,7 @@ func (r *RequestContext) updateStateAndSendIfNeeded(srv extProcPb.ExternalProces | |
return nil | ||
} | ||
|
||
func (s *StreamingServer) populateRequestHeaderResponse(reqCtx *RequestContext, endpoint string, requestBodyLength int) { | ||
func (s *StreamingServer) populateRequestHeaderResponse(reqCtx *RequestContext, endpoint string, requestBodyLength int, mutatedHeaders map[string]string) { | ||
headers := []*configPb.HeaderValueOption{ | ||
{ | ||
Header: &configPb.HeaderValue{ | ||
|
@@ -377,6 +380,15 @@ func (s *StreamingServer) populateRequestHeaderResponse(reqCtx *RequestContext, | |
}, | ||
}) | ||
} | ||
// Add headers added by filters/scorers | ||
for key, value := range mutatedHeaders { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The headers is a list. If the plugins updated the value of an existing header key, should we update the header here instead of just appending? |
||
headers = append(headers, &configPb.HeaderValueOption{ | ||
Header: &configPb.HeaderValue{ | ||
Key: key, | ||
RawValue: []byte(value), | ||
}, | ||
}) | ||
} | ||
|
||
targetEndpointValue := &structpb.Struct{ | ||
Fields: map[string]*structpb.Value{ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,9 @@ type LLMRequest struct { | |
// Target models is a map of target model name to weight. | ||
TargetModels map[string]int | ||
Prompt string | ||
// Headers during request processing contains all of the request headers. | ||
// During response processing it contains all of the response headers. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: remove spaces |
||
Headers map[string]string | ||
// Resolved target model is the final target model after traffic split. | ||
ResolvedTargetModel string | ||
Critical bool | ||
|
@@ -58,6 +61,8 @@ type SchedulingContext struct { | |
Logger logr.Logger | ||
Req *LLMRequest | ||
PodsSnapshot []Pod | ||
// MutatedHeaders is used by the plugins to add/modify headers | ||
MutatedHeaders map[string]string | ||
} | ||
|
||
func (pm *PodMetrics) String() string { | ||
|
@@ -83,10 +88,11 @@ type PodMetrics struct { | |
func NewSchedulingContext(ctx context.Context, req *LLMRequest, pods []Pod) *SchedulingContext { | ||
logger := log.FromContext(ctx).WithValues("request", req) | ||
return &SchedulingContext{ | ||
Context: ctx, | ||
Logger: logger, | ||
Req: req, | ||
PodsSnapshot: pods, | ||
Context: ctx, | ||
Logger: logger, | ||
Req: req, | ||
PodsSnapshot: pods, | ||
MutatedHeaders: make(map[string]string), | ||
} | ||
} | ||
|
||
|
@@ -100,5 +106,6 @@ func ToSchedulerPodMetrics(pods []backendmetrics.PodMetrics) []Pod { | |
|
||
// Result captures the scheduler result. | ||
type Result struct { | ||
TargetPod Pod | ||
TargetPod Pod | ||
MutatedHeaders map[string]string | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lines 73 and 75 are duplicates?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, fixed