-
Notifications
You must be signed in to change notification settings - Fork 550
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
OCPBUGS-37982: Bug fix: Reduce Frequency of Update Requests for Copied CSVs #3497
base: master
Are you sure you want to change the base?
Changes from all commits
5ffd776
7739d74
5542840
588bba7
6a0467a
f6c207b
4b2c88b
c6a979f
8c0659a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -360,7 +360,7 @@ func (a *Operator) pruneProvidedAPIs(group *operatorsv1.OperatorGroup, groupProv | |
} | ||
|
||
// Prune providedAPIs annotation if the cluster has fewer providedAPIs (handles CSV deletion) | ||
//if intersection := groupProvidedAPIs.Intersection(providedAPIsFromCSVs); len(intersection) < len(groupProvidedAPIs) { | ||
// if intersection := groupProvidedAPIs.Intersection(providedAPIsFromCSVs); len(intersection) < len(groupProvidedAPIs) { | ||
if len(intersection) < len(groupProvidedAPIs) { | ||
difference := groupProvidedAPIs.Difference(intersection) | ||
logger := logger.WithFields(logrus.Fields{ | ||
|
@@ -790,6 +790,11 @@ func copyableCSVHash(original *v1alpha1.ClusterServiceVersion) (string, string, | |
return newHash, originalHash, nil | ||
} | ||
|
||
const ( | ||
nonStatusCopyHashAnnotation = "olm.operatorframework.io/nonStatusCopyHash" | ||
statusCopyHashAnnotation = "olm.operatorframework.io/statusCopyHash" | ||
) | ||
|
||
// If returned error is not nil, the returned ClusterServiceVersion | ||
// has only the Name, Namespace, and UID fields set. | ||
func (a *Operator) copyToNamespace(prototype *v1alpha1.ClusterServiceVersion, nsFrom, nsTo, nonstatus, status string) (*v1alpha1.ClusterServiceVersion, error) { | ||
|
@@ -803,6 +808,7 @@ func (a *Operator) copyToNamespace(prototype *v1alpha1.ClusterServiceVersion, ns | |
|
||
existing, err := a.copiedCSVLister.Namespace(nsTo).Get(prototype.GetName()) | ||
if apierrors.IsNotFound(err) { | ||
prototype.Annotations[nonStatusCopyHashAnnotation] = nonstatus | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good point possibly. checking... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So looking at it closer it seems like we shouldn't change it, here's my reasoning: Keeping the annotation logic here, in We're reusing prototype and accounting for possibly set annotations. If we move the logic to In Aside from the hash handling we'd still need to be doing the above work in |
||
created, err := a.client.OperatorsV1alpha1().ClusterServiceVersions(nsTo).Create(context.TODO(), prototype, metav1.CreateOptions{}) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to create new CSV: %w", err) | ||
|
@@ -811,6 +817,10 @@ func (a *Operator) copyToNamespace(prototype *v1alpha1.ClusterServiceVersion, ns | |
if _, err := a.client.OperatorsV1alpha1().ClusterServiceVersions(nsTo).UpdateStatus(context.TODO(), created, metav1.UpdateOptions{}); err != nil { | ||
return nil, fmt.Errorf("failed to update status on new CSV: %w", err) | ||
} | ||
prototype.Annotations[statusCopyHashAnnotation] = status | ||
if _, err = a.client.OperatorsV1alpha1().ClusterServiceVersions(nsTo).Update(context.TODO(), prototype, metav1.UpdateOptions{}); err != nil { | ||
return nil, fmt.Errorf("failed to update annotations after updating status: %w", err) | ||
} | ||
return &v1alpha1.ClusterServiceVersion{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: created.Name, | ||
|
@@ -825,11 +835,15 @@ func (a *Operator) copyToNamespace(prototype *v1alpha1.ClusterServiceVersion, ns | |
prototype.Namespace = existing.Namespace | ||
prototype.ResourceVersion = existing.ResourceVersion | ||
prototype.UID = existing.UID | ||
existingNonStatus := existing.Annotations["$copyhash-nonstatus"] | ||
existingStatus := existing.Annotations["$copyhash-status"] | ||
// Get the non-status and status hash of the existing copied CSV | ||
existingNonStatus := existing.Annotations[nonStatusCopyHashAnnotation] | ||
existingStatus := existing.Annotations[statusCopyHashAnnotation] | ||
|
||
var updated *v1alpha1.ClusterServiceVersion | ||
// Always set the in-memory prototype's nonstatus annotation: | ||
prototype.Annotations[nonStatusCopyHashAnnotation] = nonstatus | ||
if existingNonStatus != nonstatus { | ||
// include updates to the non-status hash annotation if there is a mismatch | ||
if updated, err = a.client.OperatorsV1alpha1().ClusterServiceVersions(nsTo).Update(context.TODO(), prototype, metav1.UpdateOptions{}); err != nil { | ||
return nil, fmt.Errorf("failed to update: %w", err) | ||
} | ||
|
@@ -843,6 +857,17 @@ func (a *Operator) copyToNamespace(prototype *v1alpha1.ClusterServiceVersion, ns | |
if _, err = a.client.OperatorsV1alpha1().ClusterServiceVersions(nsTo).UpdateStatus(context.TODO(), updated, metav1.UpdateOptions{}); err != nil { | ||
return nil, fmt.Errorf("failed to update status: %w", err) | ||
} | ||
// Update the status first if the existing copied CSV status hash doesn't match what we expect | ||
// to prevent a scenario where the hash annotations match but the contents do not. | ||
// We also need to update the CSV itself in this case to ensure we set the status hash annotation. | ||
prototype.Annotations[statusCopyHashAnnotation] = status | ||
if updated, err = a.client.OperatorsV1alpha1().ClusterServiceVersions(nsTo).Update(context.TODO(), prototype, metav1.UpdateOptions{}); err != nil { | ||
return nil, fmt.Errorf("failed to update: %w", err) | ||
} | ||
} else { | ||
// Even if they're the same, ensure the returned prototype is annotated. | ||
prototype.Annotations[statusCopyHashAnnotation] = status | ||
updated = prototype | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From the code implemented in this PR to the current state, the main addition seems to be this else block (beyond tests). I’m not entirely sure I fully understand—are we also looking to implement what’s outlined in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is first pass, basically, just merge the old PR. With this PR we're taking path of but the else is not the only thing done here, the main thing added is the tracking hashes so we can tell what's in need of update. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
We previously agreed that the old PR wasn't quite the right approach, correct? Given that, I’m not sure it makes sense to merge it as-is. If we need to do a release before we have the proper solution in place, we might include a change we don’t want. That doesn’t seem ideal to me. That is a case that I would request changes since it does not provide the desired solution, or fix the problem accordingly as defined in the doc. See that the doc has a section about that It’s fine to add it as you did, but what do you think about creating a commit on top with the solution we intend to use? Could we focus on implementing the correct fix for the bug instead? c/c @tmshort There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please see @tmshort comment on the doc. Idea being, merging this PR is a first step, it gives some relief, then we'll make another pass after this settles. Settling involves seeing much less API activity, especially on clusters with many namespaces for the CSV to be copied to. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and settling also involves seeing if the things mentioned in the doc, primarily whether OLM not correcting user-modified copied CSVs, will be a real-world problem. |
||
return &v1alpha1.ClusterServiceVersion{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
|
@@ -939,7 +964,6 @@ func namespacesChanged(clusterNamespaces []string, statusNamespaces []string) bo | |
|
||
func (a *Operator) getOperatorGroupTargets(op *operatorsv1.OperatorGroup) (map[string]struct{}, error) { | ||
selector, err := metav1.LabelSelectorAsSelector(op.Spec.Selector) | ||
|
||
if err != nil { | ||
return nil, err | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm assuming all the changes here are due to lint?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yeah, and I just ran
make lint
locally to make sure nothing changed. nothing changed.