Skip to content

Commit

Permalink
feat: add minresource to pg (#279)
Browse files Browse the repository at this point in the history
Signed-off-by: shinytang6 <[email protected]>

Signed-off-by: shinytang6 <[email protected]>
  • Loading branch information
shinytang6 authored Dec 5, 2022
1 parent b326ea1 commit efab928
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 2 deletions.
12 changes: 11 additions & 1 deletion pkg/gang_schedule/coscheduler/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
quotav1 "k8s.io/apiserver/pkg/quota/v1"
"k8s.io/klog"
"k8s.io/utils/pointer"
controllerruntime "sigs.k8s.io/controller-runtime"
Expand All @@ -40,6 +41,7 @@ import (
"github.com/alibaba/kubedl/pkg/gang_schedule"
apiv1 "github.com/alibaba/kubedl/pkg/job_controller/api/v1"
"github.com/alibaba/kubedl/pkg/util/k8sutil"
resourceutils "github.com/alibaba/kubedl/pkg/util/resource_utils"
)

func init() {
Expand Down Expand Up @@ -199,17 +201,21 @@ func (kbs *kubeCoscheduler) generateGangByJobUnit(apiVersion, kind, name, namesp
},
Spec: v1alpha1.PodGroupSpec{MinMember: k8sutil.GetTotalReplicas(replicas)},
}
jobResource, _ := resourceutils.JobResourceRequests(replicas)

if aimaster := replicas[apiv1.JobReplicaTypeAIMaster]; aimaster != nil && aimaster.Replicas != nil {
if *aimaster.Replicas > 0 {
pg.Spec.MinMember -= *aimaster.Replicas
jobResource = quotav1.SubtractWithNonNegativeResult(jobResource,
resourceutils.Multiply(int64(*aimaster.Replicas), resourceutils.ReplicaResourceRequests(aimaster)))
}
}

if schedPolicy != nil && schedPolicy.MinAvailable != nil && *schedPolicy.MinAvailable > 0 {
pg.Spec.MinMember = *schedPolicy.MinAvailable
}

pg.Spec.MinResources = &jobResource
return &v1alpha1.PodGroupList{Items: []v1alpha1.PodGroup{pg}}
}

Expand All @@ -222,6 +228,7 @@ func (kbs *kubeCoscheduler) generateGangByRoleUnit(apiVersion, kind, name, names
}
rt := strings.ToLower(string(rtype))
gangName := fmt.Sprintf("%s-%s", name, rt)
resources := resourceutils.ReplicaResourceRequests(spec)
pgs.Items = append(pgs.Items, v1alpha1.PodGroup{
ObjectMeta: metav1.ObjectMeta{
Name: gangName,
Expand All @@ -241,7 +248,10 @@ func (kbs *kubeCoscheduler) generateGangByRoleUnit(apiVersion, kind, name, names
},
},
},
Spec: v1alpha1.PodGroupSpec{MinMember: *spec.Replicas},
Spec: v1alpha1.PodGroupSpec{
MinMember: *spec.Replicas,
MinResources: &resources,
},
})
}
return &pgs
Expand Down
3 changes: 2 additions & 1 deletion pkg/gang_schedule/coscheduler/scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -452,14 +452,15 @@ func createPytorchJob(jobName string, workerReplicas int32, runPolicy *v1.RunPol
}

func createPodGroup(name, uid, jobName, rtype string, minMember int32, owner *metav1.OwnerReference) v1alpha1.PodGroup {
empty := make(corev1.ResourceList)
pg := v1alpha1.PodGroup{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: "default",
ResourceVersion: "1",
Labels: map[string]string{},
},
Spec: v1alpha1.PodGroupSpec{MinMember: minMember},
Spec: v1alpha1.PodGroupSpec{MinMember: minMember, MinResources: &empty},
}
if uid != "" {
pg.UID = types.UID(uid)
Expand Down

0 comments on commit efab928

Please sign in to comment.