Skip to content

Commit 68142ef

Browse files
committed
wip: addressing feedback
1 parent e19d000 commit 68142ef

File tree

7 files changed

+16
-230
lines changed

7 files changed

+16
-230
lines changed

api/v1beta1/awsmachine_conversion.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,6 @@ func (src *AWSMachine) ConvertTo(dstRaw conversion.Hub) error {
6565
}
6666

6767
dst.Status.DedicatedHost = restored.Status.DedicatedHost
68-
dst.Status.HostReleaseAttempts = restored.Status.HostReleaseAttempts
69-
dst.Status.LastHostReleaseAttempt = restored.Status.LastHostReleaseAttempt
70-
dst.Status.HostReleaseFailedReason = restored.Status.HostReleaseFailedReason
71-
7268
return nil
7369
}
7470

api/v1beta1/zz_generated.conversion.go

Lines changed: 0 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/v1beta2/awsmachine_types.go

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -466,18 +466,6 @@ type AWSMachineStatus struct {
466466
// This field is populated when DynamicHostAllocation is used.
467467
// +optional
468468
DedicatedHost *DedicatedHostStatus `json:"dedicatedHost,omitempty"`
469-
470-
// HostReleaseAttempts tracks the number of attempts to release the dedicated host.
471-
// +optional
472-
HostReleaseAttempts *int32 `json:"hostReleaseAttempts,omitempty"`
473-
474-
// LastHostReleaseAttempt tracks the timestamp of the last attempt to release the dedicated host.
475-
// +optional
476-
LastHostReleaseAttempt *metav1.Time `json:"lastHostReleaseAttempt,omitempty"`
477-
478-
// HostReleaseFailedReason tracks the reason for the last host release failure.
479-
// +optional
480-
HostReleaseFailedReason *string `json:"hostReleaseFailedReason,omitempty"`
481469
}
482470

483471
// DedicatedHostStatus defines the observed state of a dynamically allocated dedicated host
@@ -488,10 +476,6 @@ type DedicatedHostStatus struct {
488476
// This field is populated when DynamicHostAllocation is used.
489477
// +optional
490478
ID *string `json:"id,omitempty"`
491-
492-
// ReleaseFailureMessage tracks the last failure message for the release host attempt.
493-
// +optional
494-
ReleaseFailureMessage *string `json:"releaseFailureMessage,omitempty"`
495479
}
496480

497481
// +kubebuilder:object:root=true

api/v1beta2/zz_generated.deepcopy.go

Lines changed: 0 additions & 19 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachines.yaml

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1298,10 +1298,6 @@ spec:
12981298
ID tracks the dynamically allocated dedicated host ID.
12991299
This field is populated when DynamicHostAllocation is used.
13001300
type: string
1301-
releaseFailureMessage:
1302-
description: ReleaseFailureMessage tracks the last failure message
1303-
for the release host attempt.
1304-
type: string
13051301
type: object
13061302
failureMessage:
13071303
description: |-
@@ -1341,15 +1337,6 @@ spec:
13411337
can be added as events to the Machine object and/or logged in the
13421338
controller's output.
13431339
type: string
1344-
hostReleaseAttempts:
1345-
description: HostReleaseAttempts tracks the number of attempts to
1346-
release the dedicated host.
1347-
format: int32
1348-
type: integer
1349-
hostReleaseFailedReason:
1350-
description: HostReleaseFailedReason tracks the reason for the last
1351-
host release failure.
1352-
type: string
13531340
instanceState:
13541341
description: InstanceState is the state of the AWS instance for this
13551342
machine.
@@ -1359,11 +1346,6 @@ spec:
13591346
Interruptible reports that this machine is using spot instances and can therefore be interrupted by CAPI when it receives a notice that the spot instance is to be terminated by AWS.
13601347
This will be set to true when SpotMarketOptions is not nil (i.e. this machine is using a spot instance).
13611348
type: boolean
1362-
lastHostReleaseAttempt:
1363-
description: LastHostReleaseAttempt tracks the timestamp of the last
1364-
attempt to release the dedicated host.
1365-
format: date-time
1366-
type: string
13671349
ready:
13681350
description: Ready is true when the provider resource is ready.
13691351
type: boolean

controllers/awsmachine_controller.go

Lines changed: 3 additions & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ import (
3232
"github.com/pkg/errors"
3333
corev1 "k8s.io/api/core/v1"
3434
apierrors "k8s.io/apimachinery/pkg/api/errors"
35-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3635
kerrors "k8s.io/apimachinery/pkg/util/errors"
3736
"k8s.io/client-go/tools/record"
3837
"k8s.io/klog/v2"
@@ -368,58 +367,12 @@ func (r *AWSMachineReconciler) reconcileDelete(ctx context.Context, machineScope
368367
machineScope.AWSMachine.Spec.DynamicHostAllocation != nil {
369368
hostID := *machineScope.AWSMachine.Status.DedicatedHost.ID
370369

371-
// Check if we should retry host release
372-
shouldRetry, retryAfter := shouldRetryHostRelease(machineScope)
373-
374-
if shouldRetry {
375-
// Mark that we're retrying
376-
conditions.MarkFalse(machineScope.AWSMachine, infrav1.DedicatedHostReleaseCondition,
377-
infrav1.DedicatedHostReleaseRetryingReason, clusterv1.ConditionSeverityWarning,
378-
"Retrying dedicated host release, attempt %d", getHostReleaseAttempts(machineScope))
379-
380-
// Update retry tracking
381-
updateHostReleaseRetryTracking(machineScope)
382-
383-
// Patch the object to persist retry tracking
384-
if err := machineScope.PatchObject(); err != nil {
385-
machineScope.Error(err, "failed to patch object with retry tracking")
386-
return ctrl.Result{}, err
387-
}
388-
389-
machineScope.Info("Retrying dedicated host release", "hostID", hostID, "attempt", getHostReleaseAttempts(machineScope))
390-
return ctrl.Result{RequeueAfter: retryAfter}, nil
391-
}
392-
393370
// Attempt to release the dedicated host
394-
machineScope.Info("Releasing dynamically allocated dedicated host", "hostID", hostID, "attempt", getHostReleaseAttempts(machineScope))
371+
machineScope.Info("Releasing dynamically allocated dedicated host", "hostID", hostID)
395372
if err := ec2Service.ReleaseDedicatedHost(ctx, hostID); err != nil {
396373
// Host release failed, set up retry logic
397-
machineScope.Error(err, "failed to release dedicated host", "hostID", hostID, "attempt", getHostReleaseAttempts(machineScope))
374+
machineScope.Error(err, "failed to release dedicated host", "hostID", hostID)
398375
r.Recorder.Eventf(machineScope.AWSMachine, corev1.EventTypeWarning, "FailedReleaseHost", "Failed to release dedicated host %s: %v", hostID, err)
399-
400-
// Update failure tracking
401-
updateHostReleaseFailureTracking(machineScope, err.Error())
402-
403-
// Mark the condition as failed
404-
conditions.MarkFalse(machineScope.AWSMachine, infrav1.DedicatedHostReleaseCondition,
405-
infrav1.DedicatedHostReleaseFailedReason, clusterv1.ConditionSeverityWarning,
406-
"Failed to release dedicated host: %v", err)
407-
408-
// Patch the object to persist failure tracking
409-
if err := machineScope.PatchObject(); err != nil {
410-
machineScope.Error(err, "failed to patch object with failure tracking")
411-
return ctrl.Result{}, err
412-
}
413-
414-
// Check if we've exceeded max retries
415-
if hasExceededMaxHostReleaseRetries(machineScope) {
416-
machineScope.Error(err, "exceeded maximum retry attempts for dedicated host release", "hostID", hostID, "maxAttempts", getMaxHostReleaseRetries())
417-
r.Recorder.Eventf(machineScope.AWSMachine, corev1.EventTypeWarning, "MaxRetriesExceeded", "Exceeded maximum retry attempts for dedicated host %s release", hostID)
418-
// Continue with deletion even if host release fails permanently
419-
} else {
420-
// Return to trigger retry
421-
return ctrl.Result{RequeueAfter: getInitialHostReleaseRetryDelay()}, nil
422-
}
423376
} else {
424377
// Host release succeeded
425378
machineScope.Info("Successfully released dedicated host", "hostID", hostID)
@@ -428,9 +381,6 @@ func (r *AWSMachineReconciler) reconcileDelete(ctx context.Context, machineScope
428381
// Mark the condition as succeeded
429382
conditions.MarkTrue(machineScope.AWSMachine, infrav1.DedicatedHostReleaseCondition)
430383

431-
// Clear retry tracking since we succeeded
432-
clearHostReleaseRetryTracking(machineScope)
433-
434384
// Patch the object to persist success state
435385
if err := machineScope.PatchObject(); err != nil {
436386
machineScope.Error(err, "failed to patch object after successful host release")
@@ -514,7 +464,6 @@ func (r *AWSMachineReconciler) findInstance(machineScope *scope.MachineScope, ec
514464
// Parse the ProviderID.
515465
pid, err := scope.NewProviderID(machineScope.GetProviderID())
516466
if err != nil {
517-
//nolint:staticcheck
518467
if !errors.Is(err, scope.ErrEmptyProviderID) {
519468
return nil, errors.Wrapf(err, "failed to parse Spec.ProviderID")
520469
}
@@ -527,7 +476,7 @@ func (r *AWSMachineReconciler) findInstance(machineScope *scope.MachineScope, ec
527476
} else {
528477
// If the ProviderID is populated, describe the instance using the ID.
529478
// InstanceIfExists() returns error (ErrInstanceNotFoundByID or ErrDescribeInstance) if the instance could not be found.
530-
//nolint:staticcheck
479+
531480
instance, err = ec2svc.InstanceIfExists(ptr.To[string](pid.ID()))
532481
if err != nil {
533482
return nil, err
@@ -1373,94 +1322,3 @@ func (r *AWSMachineReconciler) ensureInstanceMetadataOptions(ec2svc services.EC2
13731322

13741323
return ec2svc.ModifyInstanceMetadataOptions(instance.ID, machine.Spec.InstanceMetadataOptions)
13751324
}
1376-
1377-
// getMaxHostReleaseRetries returns the maximum number of retry attempts for dedicated host release.
1378-
func getMaxHostReleaseRetries() int32 {
1379-
return 5
1380-
}
1381-
1382-
// getInitialHostReleaseRetryDelay returns the initial delay before the first retry.
1383-
func getInitialHostReleaseRetryDelay() time.Duration {
1384-
return 30 * time.Second
1385-
}
1386-
1387-
// getHostReleaseAttempts returns the current number of host release attempts.
1388-
func getHostReleaseAttempts(scope *scope.MachineScope) int32 {
1389-
if scope.AWSMachine.Status.HostReleaseAttempts == nil {
1390-
return 0
1391-
}
1392-
return *scope.AWSMachine.Status.HostReleaseAttempts
1393-
}
1394-
1395-
// shouldRetryHostRelease determines if we should retry host release based on retry tracking.
1396-
func shouldRetryHostRelease(scope *scope.MachineScope) (bool, time.Duration) {
1397-
attempts := getHostReleaseAttempts(scope)
1398-
1399-
// If no attempts yet, don't retry
1400-
if attempts == 0 {
1401-
return false, 0
1402-
}
1403-
1404-
// Check if we've exceeded max retries
1405-
if attempts >= getMaxHostReleaseRetries() {
1406-
return false, 0
1407-
}
1408-
1409-
// Check if enough time has passed since last attempt
1410-
lastAttempt := scope.AWSMachine.Status.LastHostReleaseAttempt
1411-
if lastAttempt == nil {
1412-
return false, 0
1413-
}
1414-
1415-
// Calculate exponential backoff delay
1416-
baseDelay := getInitialHostReleaseRetryDelay()
1417-
multiplier := int64(1)
1418-
for i := int32(1); i < attempts; i++ {
1419-
multiplier *= 2
1420-
}
1421-
backoffDelay := time.Duration(int64(baseDelay) * multiplier)
1422-
1423-
// Cap the maximum delay at 5 minutes
1424-
if backoffDelay > 5*time.Minute {
1425-
backoffDelay = 5 * time.Minute
1426-
}
1427-
1428-
// Check if enough time has passed
1429-
timeSinceLastAttempt := time.Since(lastAttempt.Time)
1430-
if timeSinceLastAttempt < backoffDelay {
1431-
remainingDelay := backoffDelay - timeSinceLastAttempt
1432-
return false, remainingDelay
1433-
}
1434-
1435-
return true, backoffDelay
1436-
}
1437-
1438-
// updateHostReleaseRetryTracking increments the retry attempt counter and updates the timestamp.
1439-
func updateHostReleaseRetryTracking(scope *scope.MachineScope) {
1440-
attempts := getHostReleaseAttempts(scope) + 1
1441-
scope.AWSMachine.Status.HostReleaseAttempts = &attempts
1442-
1443-
now := time.Now()
1444-
scope.AWSMachine.Status.LastHostReleaseAttempt = &metav1.Time{Time: now}
1445-
}
1446-
1447-
// updateHostReleaseFailureTracking updates the failure reason and timestamp.
1448-
func updateHostReleaseFailureTracking(scope *scope.MachineScope, reason string) {
1449-
scope.AWSMachine.Status.HostReleaseFailedReason = &reason
1450-
1451-
// Update the timestamp for the last attempt
1452-
now := time.Now()
1453-
scope.AWSMachine.Status.LastHostReleaseAttempt = &metav1.Time{Time: now}
1454-
}
1455-
1456-
// clearHostReleaseRetryTracking resets all retry tracking fields after successful release.
1457-
func clearHostReleaseRetryTracking(scope *scope.MachineScope) {
1458-
scope.AWSMachine.Status.HostReleaseAttempts = nil
1459-
scope.AWSMachine.Status.LastHostReleaseAttempt = nil
1460-
scope.AWSMachine.Status.HostReleaseFailedReason = nil
1461-
}
1462-
1463-
// hasExceededMaxHostReleaseRetries checks if we've exceeded the maximum retry attempts.
1464-
func hasExceededMaxHostReleaseRetries(scope *scope.MachineScope) bool {
1465-
return getHostReleaseAttempts(scope) >= getMaxHostReleaseRetries()
1466-
}

pkg/cloud/services/ec2/instances.go

Lines changed: 13 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,9 @@ func (s *Service) CreateInstance(ctx context.Context, scope *scope.MachineScope,
267267
input.HostID = aws.String(hostID)
268268
input.HostAffinity = aws.String("host")
269269

270+
if scope.AWSMachine.Status.DedicatedHost == nil {
271+
scope.AWSMachine.Status.DedicatedHost = &infrav1.DedicatedHostStatus{}
272+
}
270273
// Update machine status with allocated host ID
271274
scope.AWSMachine.Status.DedicatedHost.ID = &hostID
272275
} else {
@@ -1301,7 +1304,7 @@ func (s *Service) ensureDedicatedHostAllocation(ctx context.Context, scope *scop
13011304

13021305
// Check if a host is already allocated for this machine
13031306
// Each machine gets its own dedicated host for complete isolation and resource dedication
1304-
if scope.AWSMachine.Status.DedicatedHost.ID != nil {
1307+
if scope.AWSMachine.Status.DedicatedHost != nil && scope.AWSMachine.Status.DedicatedHost.ID != nil {
13051308
existingHostID := aws.ToString(scope.AWSMachine.Status.DedicatedHost.ID)
13061309
s.scope.Info("Found existing allocated host for machine", "hostID", existingHostID, "machine", scope.Name())
13071310
return existingHostID, nil
@@ -1312,32 +1315,17 @@ func (s *Service) ensureDedicatedHostAllocation(ctx context.Context, scope *scop
13121315

13131316
// Get AZ from the machine's subnet
13141317
if scope.AWSMachine.Spec.Subnet != nil {
1315-
var subnet *types.Subnet
1316-
var err error
1317-
if scope.AWSMachine.Spec.Subnet.ID != nil {
1318-
subnet, err = s.getSubnet(scope.AWSMachine.Spec.Subnet.ID)
1319-
if err != nil {
1320-
return "", errors.Wrap(err, "failed to get subnet for host allocation")
1321-
}
1322-
} else if len(scope.AWSMachine.Spec.Subnet.Filters) > 0 {
1323-
// Convert CAPA filters to AWS SDK filters
1324-
awsFilters := make([]types.Filter, len(scope.AWSMachine.Spec.Subnet.Filters))
1325-
for i, f := range scope.AWSMachine.Spec.Subnet.Filters {
1326-
awsFilters[i] = types.Filter{
1327-
Name: aws.String(f.Name),
1328-
Values: f.Values,
1329-
}
1330-
}
1318+
subnetID, err := s.findSubnet(scope)
1319+
if err != nil {
1320+
return "", errors.Wrap(err, "failed to find subnet for host allocation")
1321+
}
13311322

1332-
subnets, err := s.getFilteredSubnets(awsFilters...)
1333-
if err != nil {
1334-
return "", errors.Wrap(err, "failed to get subnet by filters for host allocation")
1335-
}
1336-
// if more than one subnet is found, use the first one. they should all share the same AZ.
1337-
if len(subnets) > 0 {
1338-
subnet = &subnets[0]
1339-
}
1323+
// Get the full subnet object to extract availability zone
1324+
subnet, err := s.getSubnet(&subnetID)
1325+
if err != nil {
1326+
return "", errors.Wrap(err, "failed to get subnet details for host allocation")
13401327
}
1328+
13411329
if subnet != nil && subnet.AvailabilityZone != nil {
13421330
availabilityZone = subnet.AvailabilityZone
13431331
}

0 commit comments

Comments
 (0)