Skip to content

Commit d4f729d

Browse files
committed
wip: addressing feedback
1 parent 7a0a57b commit d4f729d

File tree

8 files changed

+33
-269
lines changed

8 files changed

+33
-269
lines changed

api/v1beta1/awsmachine_conversion.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,6 @@ func (src *AWSMachine) ConvertTo(dstRaw conversion.Hub) error {
6565
}
6666

6767
dst.Status.DedicatedHost = restored.Status.DedicatedHost
68-
dst.Status.HostReleaseAttempts = restored.Status.HostReleaseAttempts
69-
dst.Status.LastHostReleaseAttempt = restored.Status.LastHostReleaseAttempt
70-
dst.Status.HostReleaseFailedReason = restored.Status.HostReleaseFailedReason
71-
7268
return nil
7369
}
7470

api/v1beta1/zz_generated.conversion.go

Lines changed: 0 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/v1beta2/awsmachine_types.go

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -466,18 +466,6 @@ type AWSMachineStatus struct {
466466
// This field is populated when DynamicHostAllocation is used.
467467
// +optional
468468
DedicatedHost *DedicatedHostStatus `json:"dedicatedHost,omitempty"`
469-
470-
// HostReleaseAttempts tracks the number of attempts to release the dedicated host.
471-
// +optional
472-
HostReleaseAttempts *int32 `json:"hostReleaseAttempts,omitempty"`
473-
474-
// LastHostReleaseAttempt tracks the timestamp of the last attempt to release the dedicated host.
475-
// +optional
476-
LastHostReleaseAttempt *metav1.Time `json:"lastHostReleaseAttempt,omitempty"`
477-
478-
// HostReleaseFailedReason tracks the reason for the last host release failure.
479-
// +optional
480-
HostReleaseFailedReason *string `json:"hostReleaseFailedReason,omitempty"`
481469
}
482470

483471
// DedicatedHostStatus defines the observed state of a dynamically allocated dedicated host
@@ -488,10 +476,6 @@ type DedicatedHostStatus struct {
488476
// This field is populated when DynamicHostAllocation is used.
489477
// +optional
490478
ID *string `json:"id,omitempty"`
491-
492-
// ReleaseFailureMessage tracks the last failure message for the release host attempt.
493-
// +optional
494-
ReleaseFailureMessage *string `json:"releaseFailureMessage,omitempty"`
495479
}
496480

497481
// +kubebuilder:object:root=true

api/v1beta2/conditions_consts.go

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -196,12 +196,6 @@ const (
196196
// S3BucketFailedReason is used when any errors occur during reconciliation of an S3 bucket.
197197
S3BucketFailedReason = "S3BucketCreationFailed"
198198

199-
// DedicatedHostReleaseSucceededReason used when the dedicated host is successfully released.
200-
DedicatedHostReleaseSucceededReason = "DedicatedHostReleaseSucceeded"
201-
202199
// DedicatedHostReleaseFailedReason used when the dedicated host release fails.
203200
DedicatedHostReleaseFailedReason = "DedicatedHostReleaseFailed"
204-
205-
// DedicatedHostReleaseRetryingReason used when the dedicated host release is being retried.
206-
DedicatedHostReleaseRetryingReason = "DedicatedHostReleaseRetrying"
207201
)

api/v1beta2/zz_generated.deepcopy.go

Lines changed: 0 additions & 19 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachines.yaml

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1298,10 +1298,6 @@ spec:
12981298
ID tracks the dynamically allocated dedicated host ID.
12991299
This field is populated when DynamicHostAllocation is used.
13001300
type: string
1301-
releaseFailureMessage:
1302-
description: ReleaseFailureMessage tracks the last failure message
1303-
for the release host attempt.
1304-
type: string
13051301
type: object
13061302
failureMessage:
13071303
description: |-
@@ -1341,15 +1337,6 @@ spec:
13411337
can be added as events to the Machine object and/or logged in the
13421338
controller's output.
13431339
type: string
1344-
hostReleaseAttempts:
1345-
description: HostReleaseAttempts tracks the number of attempts to
1346-
release the dedicated host.
1347-
format: int32
1348-
type: integer
1349-
hostReleaseFailedReason:
1350-
description: HostReleaseFailedReason tracks the reason for the last
1351-
host release failure.
1352-
type: string
13531340
instanceState:
13541341
description: InstanceState is the state of the AWS instance for this
13551342
machine.
@@ -1359,11 +1346,6 @@ spec:
13591346
Interruptible reports that this machine is using spot instances and can therefore be interrupted by CAPI when it receives a notice that the spot instance is to be terminated by AWS.
13601347
This will be set to true when SpotMarketOptions is not nil (i.e. this machine is using a spot instance).
13611348
type: boolean
1362-
lastHostReleaseAttempt:
1363-
description: LastHostReleaseAttempt tracks the timestamp of the last
1364-
attempt to release the dedicated host.
1365-
format: date-time
1366-
type: string
13671349
ready:
13681350
description: Ready is true when the provider resource is ready.
13691351
type: boolean

controllers/awsmachine_controller.go

Lines changed: 15 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ import (
3232
"github.com/pkg/errors"
3333
corev1 "k8s.io/api/core/v1"
3434
apierrors "k8s.io/apimachinery/pkg/api/errors"
35-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3635
kerrors "k8s.io/apimachinery/pkg/util/errors"
3736
"k8s.io/client-go/tools/record"
3837
"k8s.io/klog/v2"
@@ -368,74 +367,26 @@ func (r *AWSMachineReconciler) reconcileDelete(ctx context.Context, machineScope
368367
machineScope.AWSMachine.Spec.DynamicHostAllocation != nil {
369368
hostID := *machineScope.AWSMachine.Status.DedicatedHost.ID
370369

371-
// Check if we should retry host release
372-
shouldRetry, retryAfter := shouldRetryHostRelease(machineScope)
373-
374-
if shouldRetry {
375-
// Mark that we're retrying
376-
conditions.MarkFalse(machineScope.AWSMachine, infrav1.DedicatedHostReleaseCondition,
377-
infrav1.DedicatedHostReleaseRetryingReason, clusterv1.ConditionSeverityWarning,
378-
"Retrying dedicated host release, attempt %d", getHostReleaseAttempts(machineScope))
379-
380-
// Update retry tracking
381-
updateHostReleaseRetryTracking(machineScope)
382-
383-
// Patch the object to persist retry tracking
384-
if err := machineScope.PatchObject(); err != nil {
385-
machineScope.Error(err, "failed to patch object with retry tracking")
386-
return ctrl.Result{}, err
387-
}
388-
389-
machineScope.Info("Retrying dedicated host release", "hostID", hostID, "attempt", getHostReleaseAttempts(machineScope))
390-
return ctrl.Result{RequeueAfter: retryAfter}, nil
391-
}
392-
393370
// Attempt to release the dedicated host
394-
machineScope.Info("Releasing dynamically allocated dedicated host", "hostID", hostID, "attempt", getHostReleaseAttempts(machineScope))
371+
machineScope.Info("Releasing dynamically allocated dedicated host", "hostID", hostID)
395372
if err := ec2Service.ReleaseDedicatedHost(ctx, hostID); err != nil {
396-
// Host release failed, set up retry logic
397-
machineScope.Error(err, "failed to release dedicated host", "hostID", hostID, "attempt", getHostReleaseAttempts(machineScope))
373+
machineScope.Error(err, "failed to release dedicated host", "hostID", hostID)
374+
conditions.MarkFalse(machineScope.AWSMachine, infrav1.DedicatedHostReleaseCondition, infrav1.DedicatedHostReleaseFailedReason, clusterv1.ConditionSeverityError, "%s", err.Error())
398375
r.Recorder.Eventf(machineScope.AWSMachine, corev1.EventTypeWarning, "FailedReleaseHost", "Failed to release dedicated host %s: %v", hostID, err)
376+
return ctrl.Result{}, err
377+
}
399378

400-
// Update failure tracking
401-
updateHostReleaseFailureTracking(machineScope, err.Error())
402-
403-
// Mark the condition as failed
404-
conditions.MarkFalse(machineScope.AWSMachine, infrav1.DedicatedHostReleaseCondition,
405-
infrav1.DedicatedHostReleaseFailedReason, clusterv1.ConditionSeverityWarning,
406-
"Failed to release dedicated host: %v", err)
407-
408-
// Patch the object to persist failure tracking
409-
if err := machineScope.PatchObject(); err != nil {
410-
machineScope.Error(err, "failed to patch object with failure tracking")
411-
return ctrl.Result{}, err
412-
}
413-
414-
// Check if we've exceeded max retries
415-
if hasExceededMaxHostReleaseRetries(machineScope) {
416-
machineScope.Error(err, "exceeded maximum retry attempts for dedicated host release", "hostID", hostID, "maxAttempts", getMaxHostReleaseRetries())
417-
r.Recorder.Eventf(machineScope.AWSMachine, corev1.EventTypeWarning, "MaxRetriesExceeded", "Exceeded maximum retry attempts for dedicated host %s release", hostID)
418-
// Continue with deletion even if host release fails permanently
419-
} else {
420-
// Return to trigger retry
421-
return ctrl.Result{RequeueAfter: getInitialHostReleaseRetryDelay()}, nil
422-
}
423-
} else {
424-
// Host release succeeded
425-
machineScope.Info("Successfully released dedicated host", "hostID", hostID)
426-
r.Recorder.Eventf(machineScope.AWSMachine, corev1.EventTypeNormal, "SuccessfulReleaseHost", "Released dedicated host %s", hostID)
427-
428-
// Mark the condition as succeeded
429-
conditions.MarkTrue(machineScope.AWSMachine, infrav1.DedicatedHostReleaseCondition)
379+
// Host release succeeded
380+
machineScope.Info("Successfully released dedicated host", "hostID", hostID)
381+
r.Recorder.Eventf(machineScope.AWSMachine, corev1.EventTypeNormal, "SuccessfulReleaseHost", "Released dedicated host %s", hostID)
430382

431-
// Clear retry tracking since we succeeded
432-
clearHostReleaseRetryTracking(machineScope)
383+
// Mark the condition as succeeded
384+
conditions.MarkTrue(machineScope.AWSMachine, infrav1.DedicatedHostReleaseCondition)
433385

434-
// Patch the object to persist success state
435-
if err := machineScope.PatchObject(); err != nil {
436-
machineScope.Error(err, "failed to patch object after successful host release")
437-
return ctrl.Result{}, err
438-
}
386+
// Patch the object to persist success state
387+
if err := machineScope.PatchObject(); err != nil {
388+
machineScope.Error(err, "failed to patch object after successful host release")
389+
return ctrl.Result{}, err
439390
}
440391
}
441392

@@ -527,6 +478,7 @@ func (r *AWSMachineReconciler) findInstance(machineScope *scope.MachineScope, ec
527478
} else {
528479
// If the ProviderID is populated, describe the instance using the ID.
529480
// InstanceIfExists() returns error (ErrInstanceNotFoundByID or ErrDescribeInstance) if the instance could not be found.
481+
530482
//nolint:staticcheck
531483
instance, err = ec2svc.InstanceIfExists(ptr.To[string](pid.ID()))
532484
if err != nil {
@@ -1373,94 +1325,3 @@ func (r *AWSMachineReconciler) ensureInstanceMetadataOptions(ec2svc services.EC2
13731325

13741326
return ec2svc.ModifyInstanceMetadataOptions(instance.ID, machine.Spec.InstanceMetadataOptions)
13751327
}
1376-
1377-
// getMaxHostReleaseRetries returns the maximum number of retry attempts for dedicated host release.
1378-
func getMaxHostReleaseRetries() int32 {
1379-
return 5
1380-
}
1381-
1382-
// getInitialHostReleaseRetryDelay returns the initial delay before the first retry.
1383-
func getInitialHostReleaseRetryDelay() time.Duration {
1384-
return 30 * time.Second
1385-
}
1386-
1387-
// getHostReleaseAttempts returns the current number of host release attempts.
1388-
func getHostReleaseAttempts(scope *scope.MachineScope) int32 {
1389-
if scope.AWSMachine.Status.HostReleaseAttempts == nil {
1390-
return 0
1391-
}
1392-
return *scope.AWSMachine.Status.HostReleaseAttempts
1393-
}
1394-
1395-
// shouldRetryHostRelease determines if we should retry host release based on retry tracking.
1396-
func shouldRetryHostRelease(scope *scope.MachineScope) (bool, time.Duration) {
1397-
attempts := getHostReleaseAttempts(scope)
1398-
1399-
// If no attempts yet, don't retry
1400-
if attempts == 0 {
1401-
return false, 0
1402-
}
1403-
1404-
// Check if we've exceeded max retries
1405-
if attempts >= getMaxHostReleaseRetries() {
1406-
return false, 0
1407-
}
1408-
1409-
// Check if enough time has passed since last attempt
1410-
lastAttempt := scope.AWSMachine.Status.LastHostReleaseAttempt
1411-
if lastAttempt == nil {
1412-
return false, 0
1413-
}
1414-
1415-
// Calculate exponential backoff delay
1416-
baseDelay := getInitialHostReleaseRetryDelay()
1417-
multiplier := int64(1)
1418-
for i := int32(1); i < attempts; i++ {
1419-
multiplier *= 2
1420-
}
1421-
backoffDelay := time.Duration(int64(baseDelay) * multiplier)
1422-
1423-
// Cap the maximum delay at 5 minutes
1424-
if backoffDelay > 5*time.Minute {
1425-
backoffDelay = 5 * time.Minute
1426-
}
1427-
1428-
// Check if enough time has passed
1429-
timeSinceLastAttempt := time.Since(lastAttempt.Time)
1430-
if timeSinceLastAttempt < backoffDelay {
1431-
remainingDelay := backoffDelay - timeSinceLastAttempt
1432-
return false, remainingDelay
1433-
}
1434-
1435-
return true, backoffDelay
1436-
}
1437-
1438-
// updateHostReleaseRetryTracking increments the retry attempt counter and updates the timestamp.
1439-
func updateHostReleaseRetryTracking(scope *scope.MachineScope) {
1440-
attempts := getHostReleaseAttempts(scope) + 1
1441-
scope.AWSMachine.Status.HostReleaseAttempts = &attempts
1442-
1443-
now := time.Now()
1444-
scope.AWSMachine.Status.LastHostReleaseAttempt = &metav1.Time{Time: now}
1445-
}
1446-
1447-
// updateHostReleaseFailureTracking updates the failure reason and timestamp.
1448-
func updateHostReleaseFailureTracking(scope *scope.MachineScope, reason string) {
1449-
scope.AWSMachine.Status.HostReleaseFailedReason = &reason
1450-
1451-
// Update the timestamp for the last attempt
1452-
now := time.Now()
1453-
scope.AWSMachine.Status.LastHostReleaseAttempt = &metav1.Time{Time: now}
1454-
}
1455-
1456-
// clearHostReleaseRetryTracking resets all retry tracking fields after successful release.
1457-
func clearHostReleaseRetryTracking(scope *scope.MachineScope) {
1458-
scope.AWSMachine.Status.HostReleaseAttempts = nil
1459-
scope.AWSMachine.Status.LastHostReleaseAttempt = nil
1460-
scope.AWSMachine.Status.HostReleaseFailedReason = nil
1461-
}
1462-
1463-
// hasExceededMaxHostReleaseRetries checks if we've exceeded the maximum retry attempts.
1464-
func hasExceededMaxHostReleaseRetries(scope *scope.MachineScope) bool {
1465-
return getHostReleaseAttempts(scope) >= getMaxHostReleaseRetries()
1466-
}

0 commit comments

Comments
 (0)