@@ -26,6 +26,7 @@ import (
26
26
const Name = "GPUResourcesFit"
27
27
const CycleStateAllocateRequest = "allocateRequest"
28
28
const CycleStateGPUSchedulingResult = "gpuSchedulingResult"
29
+
29
30
const SchedulerSimulationKey = "schedulerSimulation"
30
31
31
32
var _ framework.PreFilterPlugin = & GPUFit {}
@@ -105,6 +106,11 @@ func (s *GPUFit) PreFilter(ctx context.Context, state *framework.CycleState, pod
105
106
}, framework .NewStatus (framework .Success , "progressive migration for native resources claim" )
106
107
}
107
108
109
+ // Check if DRA mode is enabled for this pod
110
+ if isDRAEnabled (pod ) && hasDRAClaim (pod ) {
111
+ return nil , framework .NewStatus (framework .Skip , "DRA mode enabled, skipping custom GPU prefilter" )
112
+ }
113
+
108
114
// Skip non tensor-fusion mode
109
115
if ! utils .IsTensorFusionWorker (pod ) {
110
116
return nil , framework .NewStatus (framework .Skip , "skip for non tensor-fusion mode" )
@@ -207,6 +213,11 @@ func (s *GPUFit) PreFilterExtensions() framework.PreFilterExtensions {
207
213
}
208
214
209
215
func (s * GPUFit ) Filter (ctx context.Context , state * framework.CycleState , pod * v1.Pod , nodeInfo * framework.NodeInfo ) * framework.Status {
216
+ // Check if DRA mode is enabled for this pod
217
+ if isDRAEnabled (pod ) && hasDRAClaim (pod ) {
218
+ return framework .NewStatus (framework .Skip , "DRA mode enabled, skipping custom GPU filter" )
219
+ }
220
+
210
221
if ! utils .IsTensorFusionWorker (pod ) {
211
222
return framework .NewStatus (framework .Success , "skip for non tensor-fusion mode" )
212
223
}
@@ -228,6 +239,11 @@ func (s *GPUFit) Score(
228
239
pod * v1.Pod ,
229
240
nodeInfo * framework.NodeInfo ,
230
241
) (int64 , * framework.Status ) {
242
+ // Check if DRA mode is enabled for this pod
243
+ if isDRAEnabled (pod ) && hasDRAClaim (pod ) {
244
+ return 0 , framework .NewStatus (framework .Skip , "DRA mode enabled, skipping custom GPU scoring" )
245
+ }
246
+
231
247
// Skip non tensor-fusion mode scheduling
232
248
if ! utils .IsTensorFusionWorker (pod ) {
233
249
return 0 , framework .NewStatus (framework .Success , "" )
@@ -266,6 +282,11 @@ func (s *GPUFit) ScoreExtensions() framework.ScoreExtensions {
266
282
}
267
283
268
284
func (s * GPUFit ) Reserve (ctx context.Context , state * framework.CycleState , pod * v1.Pod , nodeName string ) * framework.Status {
285
+ // Check if DRA mode is enabled for this pod
286
+ if isDRAEnabled (pod ) && hasDRAClaim (pod ) {
287
+ return framework .NewStatus (framework .Success , "DRA mode enabled, skipping custom GPU reservation" )
288
+ }
289
+
269
290
if ! utils .IsTensorFusionWorker (pod ) {
270
291
return framework .NewStatus (framework .Success , "skip for non tensor-fusion mode" )
271
292
}
@@ -312,6 +333,11 @@ func (s *GPUFit) Reserve(ctx context.Context, state *framework.CycleState, pod *
312
333
}
313
334
314
335
func (s * GPUFit ) Unreserve (ctx context.Context , state * framework.CycleState , pod * v1.Pod , nodeName string ) {
336
+ // Check if DRA mode is enabled for this pod
337
+ if isDRAEnabled (pod ) && hasDRAClaim (pod ) {
338
+ return // DRA handles unreservation
339
+ }
340
+
315
341
if ! utils .IsTensorFusionWorker (pod ) {
316
342
return
317
343
}
@@ -331,6 +357,11 @@ func (s *GPUFit) Unreserve(ctx context.Context, state *framework.CycleState, pod
331
357
}
332
358
333
359
func (s * GPUFit ) PostBind (ctx context.Context , state * framework.CycleState , pod * v1.Pod , nodeName string ) {
360
+ // Check if DRA mode is enabled for this pod
361
+ if isDRAEnabled (pod ) && hasDRAClaim (pod ) {
362
+ return // DRA handles post-bind actions
363
+ }
364
+
334
365
if ! utils .IsTensorFusionWorker (pod ) {
335
366
return
336
367
}
@@ -359,3 +390,17 @@ func (s *GPUFit) PostBind(ctx context.Context, state *framework.CycleState, pod
359
390
"Attach GPU device ID info" , "Attach TensorFusion GPU device IDs to Pod: " + gpuIDs )
360
391
}
361
392
}
393
+
394
+ // isDRAEnabled checks if DRA is enabled for a pod
395
+ func isDRAEnabled (pod * v1.Pod ) bool {
396
+ if pod .Annotations == nil {
397
+ return false
398
+ }
399
+ val , ok := pod .Annotations [constants .DRAEnabledAnnotation ]
400
+ return ok && val == constants .TrueStringValue
401
+ }
402
+
403
+ // hasDRAClaim checks if a pod has DRA ResourceClaim references
404
+ func hasDRAClaim (pod * v1.Pod ) bool {
405
+ return len (pod .Spec .ResourceClaims ) > 0
406
+ }
0 commit comments