-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: recover from configuration failures #151
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
import com.aws.greengrass.clientdevices.auth.configuration.GroupConfiguration; | ||
import com.aws.greengrass.clientdevices.auth.configuration.GroupManager; | ||
import com.aws.greengrass.clientdevices.auth.connectivity.CISShadowMonitor; | ||
import com.aws.greengrass.clientdevices.auth.exception.InvalidConfigurationException; | ||
import com.aws.greengrass.clientdevices.auth.infra.NetworkState; | ||
import com.aws.greengrass.clientdevices.auth.session.MqttSessionFactory; | ||
import com.aws.greengrass.clientdevices.auth.session.SessionConfig; | ||
|
@@ -26,6 +27,7 @@ | |
import com.aws.greengrass.config.Topics; | ||
import com.aws.greengrass.config.WhatHappened; | ||
import com.aws.greengrass.dependency.ImplementsService; | ||
import com.aws.greengrass.dependency.State; | ||
import com.aws.greengrass.ipc.AuthorizeClientDeviceActionOperationHandler; | ||
import com.aws.greengrass.ipc.GetClientDeviceAuthTokenOperationHandler; | ||
import com.aws.greengrass.ipc.SubscribeToCertificateUpdatesOperationHandler; | ||
|
@@ -70,7 +72,10 @@ public class ClientDevicesAuthService extends PluginService { | |
// Create a threadpool for calling the cloud. Single thread will be used by default. | ||
private ThreadPoolExecutor cloudCallThreadPool; | ||
private int cloudCallQueueSize; | ||
|
||
private final Object configLock = new Object(); | ||
private CDAConfiguration cdaConfiguration; | ||
private volatile boolean configurationErrored = false; | ||
|
||
|
||
/** | ||
|
@@ -136,11 +141,38 @@ private void subscribeToConfigChanges() { | |
} | ||
|
||
private void onConfigurationChanged() { | ||
try { | ||
cdaConfiguration = CDAConfiguration.from(cdaConfiguration, getConfig()); | ||
} catch (URISyntaxException e) { | ||
serviceErrored(e); | ||
// Note: The nucleus emits multiple configuration changed events, one per key that changed. It will also | ||
// keep emitting them regardless of the state it is current in. If the configuration was incorrect, we want the | ||
// service to error, but we don't want to check again until the nucleus has run the remediation steps (when the | ||
// service errors, the nucleus will try to call shutdown -> install -> startup). | ||
Comment on lines
+146
to
+147
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you help me understand this? My mental model is that we shouldn't need to care about what Nucleus is doing, and that we can rely entirely on configuration updates. If we are broken, and a configuration change fixes us, then request reinstall? Are there scenarios where that will not work? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, it should be possible to make this as simple as, on config change, if state is broken, then request reinstall. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jbutler and I chatted about this offline - we are going to dig deeper and see if it is required |
||
if (configurationErrored && !inState(State.BROKEN)) { | ||
return; | ||
} | ||
|
||
// Note: Need to synchronize this block given multiple threads can be reading the value of configurationErrored | ||
// and changing it. | ||
synchronized (configLock) { | ||
try { | ||
CDAConfiguration configuration = CDAConfiguration.from(cdaConfiguration, getConfig()); | ||
|
||
if (configuration.isEqual(cdaConfiguration)) { | ||
return; | ||
} | ||
|
||
cdaConfiguration = configuration; | ||
|
||
// Good configuration and was previously broken | ||
if (inState(State.BROKEN)) { | ||
logger.info("Service is {} and configuration changed. Attempting to reinstall.", State.BROKEN); | ||
configurationErrored = false; | ||
requestReinstall(); | ||
} | ||
} catch (URISyntaxException | InvalidConfigurationException e) { | ||
configurationErrored = true; | ||
serviceErrored(e); | ||
} | ||
} | ||
|
||
} | ||
|
||
private void configChangeHandler(WhatHappened whatHappened, Node node) { | ||
|
@@ -183,6 +215,13 @@ private void configChangeHandler(WhatHappened whatHappened, Node node) { | |
protected void startup() throws InterruptedException { | ||
context.get(CertificateManager.class).startMonitors(); | ||
super.startup(); | ||
|
||
synchronized (configLock) { | ||
if (configurationErrored) { | ||
configurationErrored = false; | ||
onConfigurationChanged(); | ||
} | ||
} | ||
Comment on lines
+219
to
+224
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought the issue was that if we are broken, then Nucleus doesn't call |
||
} | ||
|
||
@Override | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure if this is necessary at all, but future reference, you could just synchronize on
configurationErrored
instead and then you also don't needvolatile