Skip to content

Commit 5abc575

Browse files
Implement autoscaling from zero by auto-populating AWSMachineTemplate capacity
1 parent 15a2d14 commit 5abc575

File tree

7 files changed

+518
-0
lines changed

7 files changed

+518
-0
lines changed

api/v1beta2/awsmachinetemplate_types.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ type AWSMachineTemplateSpec struct {
4040
// +kubebuilder:object:root=true
4141
// +kubebuilder:resource:path=awsmachinetemplates,scope=Namespaced,categories=cluster-api,shortName=awsmt
4242
// +kubebuilder:storageversion
43+
// +kubebuilder:subresource:status
4344
// +k8s:defaulter-gen=true
4445

4546
// AWSMachineTemplate is the schema for the Amazon EC2 Machine Templates API.

config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinetemplates.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1138,3 +1138,5 @@ spec:
11381138
type: object
11391139
served: true
11401140
storage: true
1141+
subresources:
1142+
status: {}

config/rbac/role.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ rules:
175175
resources:
176176
- awsclusters/status
177177
- awsfargateprofiles/status
178+
- awsmachinetemplates/status
178179
- rosaclusters/status
179180
- rosanetworks/status
180181
- rosaroleconfigs/status
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package controllers
18+
19+
import (
20+
"context"
21+
22+
"github.com/aws/aws-sdk-go-v2/service/ec2"
23+
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
24+
"github.com/pkg/errors"
25+
corev1 "k8s.io/api/core/v1"
26+
apierrors "k8s.io/apimachinery/pkg/api/errors"
27+
"k8s.io/apimachinery/pkg/api/resource"
28+
ctrl "sigs.k8s.io/controller-runtime"
29+
"sigs.k8s.io/controller-runtime/pkg/client"
30+
"sigs.k8s.io/controller-runtime/pkg/controller"
31+
32+
infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2"
33+
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope"
34+
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/logger"
35+
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/record"
36+
"sigs.k8s.io/cluster-api/util"
37+
"sigs.k8s.io/cluster-api/util/predicates"
38+
)
39+
40+
// AWSMachineTemplateReconciler reconciles AWSMachineTemplate objects.
41+
//
42+
// This controller automatically populates capacity information for AWSMachineTemplate resources
43+
// to enable autoscaling from zero.
44+
//
45+
// See: https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20210310-opt-in-autoscaling-from-zero.md
46+
type AWSMachineTemplateReconciler struct {
47+
client.Client
48+
WatchFilterValue string
49+
}
50+
51+
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachinetemplates,verbs=get;list;watch
52+
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachinetemplates/status,verbs=get;update;patch
53+
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsclusters,verbs=get;list;watch
54+
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters,verbs=get;list;watch
55+
// +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch
56+
57+
// Reconcile populates capacity information for AWSMachineTemplate.
58+
func (r *AWSMachineTemplateReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
59+
log := logger.FromContext(ctx)
60+
61+
// Fetch the AWSMachineTemplate
62+
awsMachineTemplate := &infrav1.AWSMachineTemplate{}
63+
if err := r.Get(ctx, req.NamespacedName, awsMachineTemplate); err != nil {
64+
if apierrors.IsNotFound(err) {
65+
return ctrl.Result{}, nil
66+
}
67+
return ctrl.Result{}, err
68+
}
69+
70+
// Skip if capacity is already set
71+
if len(awsMachineTemplate.Status.Capacity) > 0 {
72+
return ctrl.Result{}, nil
73+
}
74+
75+
// Get instance type from spec
76+
instanceType := awsMachineTemplate.Spec.Template.Spec.InstanceType
77+
if instanceType == "" {
78+
return ctrl.Result{}, nil
79+
}
80+
81+
// Find the region by checking ownerReferences
82+
region, err := r.getRegion(ctx, awsMachineTemplate)
83+
if err != nil {
84+
return ctrl.Result{}, err
85+
}
86+
if region == "" {
87+
return ctrl.Result{}, nil
88+
}
89+
90+
// Create global scope for this region
91+
// Reference: exp/instancestate/awsinstancestate_controller.go:68-76
92+
globalScope, err := scope.NewGlobalScope(scope.GlobalScopeParams{
93+
ControllerName: "awsmachinetemplate",
94+
Region: region,
95+
})
96+
if err != nil {
97+
record.Warnf(awsMachineTemplate, "AWSSessionFailed", "Failed to create AWS session for region %q: %v", region, err)
98+
return ctrl.Result{}, nil
99+
}
100+
101+
// Query instance type capacity
102+
capacity, err := r.getInstanceTypeCapacity(ctx, globalScope, instanceType)
103+
if err != nil {
104+
record.Warnf(awsMachineTemplate, "CapacityQueryFailed", "Failed to query capacity for instance type %q: %v", instanceType, err)
105+
return ctrl.Result{}, nil
106+
}
107+
108+
// Update status with capacity
109+
awsMachineTemplate.Status.Capacity = capacity
110+
111+
if err := r.Status().Update(ctx, awsMachineTemplate); err != nil {
112+
return ctrl.Result{}, errors.Wrap(err, "failed to update AWSMachineTemplate status")
113+
}
114+
115+
log.Info("Successfully populated capacity information", "instanceType", instanceType, "region", region, "capacity", capacity)
116+
return ctrl.Result{}, nil
117+
}
118+
119+
// getRegion finds the region by checking the template's owner cluster reference.
120+
func (r *AWSMachineTemplateReconciler) getRegion(ctx context.Context, template *infrav1.AWSMachineTemplate) (string, error) {
121+
// Get the owner cluster
122+
cluster, err := util.GetOwnerCluster(ctx, r.Client, template.ObjectMeta)
123+
if err != nil {
124+
return "", err
125+
}
126+
if cluster == nil {
127+
return "", errors.New("no owner cluster found")
128+
}
129+
130+
// Get region from AWSCluster (standard EC2-based cluster)
131+
if cluster.Spec.InfrastructureRef != nil && cluster.Spec.InfrastructureRef.Kind == "AWSCluster" {
132+
awsCluster := &infrav1.AWSCluster{}
133+
if err := r.Get(ctx, client.ObjectKey{
134+
Namespace: cluster.Namespace,
135+
Name: cluster.Spec.InfrastructureRef.Name,
136+
}, awsCluster); err != nil {
137+
if !apierrors.IsNotFound(err) {
138+
return "", errors.Wrapf(err, "failed to get AWSCluster %s/%s", cluster.Namespace, cluster.Spec.InfrastructureRef.Name)
139+
}
140+
} else if awsCluster.Spec.Region != "" {
141+
return awsCluster.Spec.Region, nil
142+
}
143+
}
144+
145+
return "", nil
146+
}
147+
148+
// getInstanceTypeCapacity queries AWS EC2 API for instance type capacity.
149+
func (r *AWSMachineTemplateReconciler) getInstanceTypeCapacity(ctx context.Context, globalScope *scope.GlobalScope, instanceType string) (corev1.ResourceList, error) {
150+
// Create EC2 client from global scope
151+
ec2Client := ec2.NewFromConfig(globalScope.Session())
152+
153+
// Query instance type information
154+
input := &ec2.DescribeInstanceTypesInput{
155+
InstanceTypes: []ec2types.InstanceType{ec2types.InstanceType(instanceType)},
156+
}
157+
158+
result, err := ec2Client.DescribeInstanceTypes(ctx, input)
159+
if err != nil {
160+
return nil, errors.Wrapf(err, "failed to describe instance type %q", instanceType)
161+
}
162+
163+
if len(result.InstanceTypes) == 0 {
164+
return nil, errors.Errorf("no information found for instance type %q", instanceType)
165+
}
166+
167+
// Extract capacity information
168+
info := result.InstanceTypes[0]
169+
resourceList := corev1.ResourceList{}
170+
171+
// CPU
172+
if info.VCpuInfo != nil && info.VCpuInfo.DefaultVCpus != nil {
173+
resourceList[corev1.ResourceCPU] = *resource.NewQuantity(int64(*info.VCpuInfo.DefaultVCpus), resource.DecimalSI)
174+
}
175+
176+
// Memory
177+
if info.MemoryInfo != nil && info.MemoryInfo.SizeInMiB != nil {
178+
memoryBytes := *info.MemoryInfo.SizeInMiB * 1024 * 1024
179+
resourceList[corev1.ResourceMemory] = *resource.NewQuantity(memoryBytes, resource.BinarySI)
180+
}
181+
return resourceList, nil
182+
}
183+
184+
// SetupWithManager sets up the controller with the Manager.
185+
func (r *AWSMachineTemplateReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error {
186+
log := logger.FromContext(ctx)
187+
188+
return ctrl.NewControllerManagedBy(mgr).
189+
For(&infrav1.AWSMachineTemplate{}).
190+
WithOptions(options).
191+
WithEventFilter(predicates.ResourceHasFilterLabel(mgr.GetScheme(), log.GetLogger(), r.WatchFilterValue)).
192+
Complete(r)
193+
}

0 commit comments

Comments
 (0)