Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CHEF-11758] Add support for config verification of backup in HA restore command #8661

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
271 changes: 262 additions & 9 deletions components/automate-cli/cmd/chef-automate/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ const (
AUTOMATE_CMD_STOP = "sudo systemctl stop chef-automate"
AUTOMATE_CMD_START = "sudo systemctl start chef-automate"
BACKUP_CONFIG = "file_system"
OS_SNAPSHOT_URL = "http://localhost:10144/_snapshot?pretty"
)

type BackupFromBashtion interface {
Expand All @@ -60,15 +61,16 @@ var backupCmdFlags = struct {
noProgress bool
requestTimeout int64

baseBackupDir string
channel string
overrideOrigin string
hartifactsPath string
upgrade bool
skipPreflight bool
skipBootstrap bool
airgap string
yes bool
baseBackupDir string
channel string
overrideOrigin string
hartifactsPath string
upgrade bool
skipPreflight bool
skipBootstrap bool
airgap string
yes bool
verifyRestoreConfig bool

createWaitTimeout int64
listWaitTimeout int64
Expand All @@ -92,6 +94,28 @@ var backupCmdFlags = struct {
setConfigPath string
}{}

type osSnapshot struct {
Es6Compliance *caXservice `json:"chef-automate-es6-compliance-service"`
Es6Erchef *caXservice `json:"chef-automate-es6-automate-cs-oc-erchef"`
Es6EventFeed *caXservice `json:"chef-automate-es6-event-feed-service"`
Es6Ingest *caXservice `json:"chef-automate-es6-ingest-service"`
Es5Compliance *caXservice `json:"chef-automate-es5-compliance-service"`
Es5Erchef *caXservice `json:"chef-automate-es5-automate-cs-oc-erchef"`
Es5EventFeed *caXservice `json:"chef-automate-es5-event-feed-service"`
Es5Ingest *caXservice `json:"chef-automate-es5-ingest-service"`
}

type caXservice struct {
Type string `json:"type"`
Settings *settings `json:"settings"`
}

type settings struct {
Location string `json:"location"`
BasePath string `json:"base_path"`
Bucket string `json:"bucket"`
}

func init() {
integrityBackupCmd.AddCommand(integrityBackupValidateCmd)
integrityBackupCmd.AddCommand(integrityBackupShowCmd)
Expand Down Expand Up @@ -141,6 +165,7 @@ func init() {
restoreBackupCmd.PersistentFlags().Int64VarP(&backupCmdFlags.restoreWaitTimeout, "wait-timeout", "t", 43200, "How long to wait for a operation to complete before raising an error")
restoreBackupCmd.PersistentFlags().StringVar(&backupCmdFlags.patchConfigPath, "patch-config", "", "Path to patch config if required")
restoreBackupCmd.PersistentFlags().StringVar(&backupCmdFlags.setConfigPath, "set-config", "", "Path to set config if required")
restoreBackupCmd.PersistentFlags().BoolVarP(&backupCmdFlags.verifyRestoreConfig, "verify-restore-config", "", false, "Verifies automate and opensearch configs when restoring a backup")

deleteBackupCmd.PersistentFlags().BoolVar(&backupDeleteCmdFlags.yes, "yes", false, "Agree to all prompts")
deleteBackupCmd.PersistentFlags().Int64VarP(&backupCmdFlags.deleteWaitTimeout, "wait-timeout", "t", 43200, "How long to wait for a operation to complete before raising an error")
Expand Down Expand Up @@ -332,6 +357,28 @@ func handleBackupCommands(cmd *cobra.Command, args []string, commandString strin
os.Exit(1)
}
if strings.Contains(cmd.CommandPath(), "restore") {
if isA2HARBFileExist() {
var allowRestore bool
var er error
if backupCmdFlags.verifyRestoreConfig {
sshConfig := &SSHConfig{
sshUser: infra.Outputs.SSHUser.Value,
sshKeyFile: infra.Outputs.SSHKeyFile.Value,
sshPort: infra.Outputs.SSHPort.Value,
}
sshUtil := NewSSHUtil(sshConfig)
pullConfig := NewPullConfigs(infra, sshUtil)
allowRestore, er = compareBackupPaths(infra, sshUtil, pullConfig)
if er != nil {
return er
}
if !allowRestore {
return errors.New("There is discrepancy in the config paths. Restore operation will fail. Please correct the backup paths before proceeding to restore the backup")
}
fmt.Println("Configuration looks good. Please proceed to restore the backup without passing --verify-restore-config")
return nil
}
}
if !backupCmdFlags.yes && !backupCmdFlags.skipPreflight {
yes, err := writer.Confirm(strings.TrimSpace(a2AlreadyDeployedMessage))
if err != nil {
Expand Down Expand Up @@ -1387,3 +1434,209 @@ func getBackupStatusAsCompletedOrFailed(backup *api.BackupTask) bool {
return false

}

func compareBackupPaths(infra *AutomateHAInfraDetails, sshUtil SSHUtil, pullConfig PullConfigs) (bool, error) {
automateIps := infra.Outputs.AutomatePrivateIps.Value
if automateIps == nil || len(automateIps) < 1 {
return false, errors.New("Automate private IPs are empty")
}
sshUtil.getSSHConfig().hostIP = automateIps[0]

a2ConfigMap, _, err := pullConfig.pullAutomateConfigs(false)
if err != nil {
return false, err
}
if len(a2ConfigMap) == 0 {
return false, errors.New("no automate configs found")
}

backupType, backupLocation, err := determineBkpConfig(a2ConfigMap, "", "objectStorage", "fileStorage")
logrus.Debugf("Backuptype is %s and backupLocation is %s", backupType, backupLocation)
if err != nil {
return false, err
}

automateBackupPath, err := pullConfig.getBackupPathFromAutomateConfig(a2ConfigMap, backupLocation)
if err != nil {
return false, err
}
logrus.Debugf("Backup path set in automate config is %s", automateBackupPath)

snapshotServiceBackupPath, allowRestore, err := compareSnapshotLocationOfServices(backupLocation, sshUtil)
if err != nil {
return false, err
}
if !allowRestore {
return false, errors.New("There is discrepancy in indices of snapshot locations of services. Indices of all snapshots should be the same. ")
}
logrus.Debugf("SnapshotServiceBackupPath: %s", snapshotServiceBackupPath)

if backupType == "fileStorage" {
osPath, err := pullConfig.getBackupPathFromOpensearchConfig()
if err != nil {
return false, err
}
logrus.Debugf("Path set in opensearch config: %s", osPath)

if automateBackupPath != osPath {
err := fmt.Errorf("discrepancy in the backup paths. All backup paths should point to the same location. Backup path configured in automate node: %s, path_repo configured in opensearch node: %s. Please check the paths listed and ensure to configure all backup paths to point to the same destination. ", automateBackupPath, osPath)
return false, err
}
}
if snapshotServiceBackupPath == "" {
return true, nil
}
if automateBackupPath != snapshotServiceBackupPath {
err := fmt.Errorf("discrepancy in the backup paths. All backup paths should point to the same location. Backup path configured in automate node: %s, indices of snapshot services: %s. Please check the paths listed and ensure to configure all backup paths to point to the same destination. ", automateBackupPath, snapshotServiceBackupPath)
return false, err
}
return true, nil
}

func compareSnapshotLocationOfServices(backupLocation string, sshUtil SSHUtil) (string, bool, error) {
var isEs5Service, isEs6Service, allowRestore bool
var snapshotEs5ServicePath, snapshotEs6ServicePath string
osSnapshot, err := getSnapshotLocationsOfServices(sshUtil)

if err != nil {
return "", false, err
}
if osSnapshot.Es6Compliance == nil && osSnapshot.Es6EventFeed == nil && osSnapshot.Es6Erchef == nil && osSnapshot.Es6Ingest == nil {
isEs6Service = false
} else {
isEs6Service = true
}
if osSnapshot.Es5Compliance == nil && osSnapshot.Es5EventFeed == nil && osSnapshot.Es5Erchef == nil && osSnapshot.Es5Ingest == nil {
isEs5Service = false
} else {
isEs5Service = true
}
if !isEs5Service && !isEs6Service {
logrus.Debug("no snapshots found in the cluster")
allowRestore = true
return "", allowRestore, nil
} else {
switch backupLocation {
case "fs":
if isEs6Service {
if osSnapshot.Es6Compliance.Type == backupLocation && osSnapshot.Es6Erchef.Type == backupLocation && osSnapshot.Es6EventFeed.Type == backupLocation && osSnapshot.Es6Ingest.Type == backupLocation {
snapshotEs6ServicePath, allowRestore = getSnapshotPath(getSnapshotPathDetails(osSnapshot.Es6Compliance.Settings.Location),
getSnapshotPathDetails(osSnapshot.Es6Erchef.Settings.Location), getSnapshotPathDetails(osSnapshot.Es6EventFeed.Settings.Location),
getSnapshotPathDetails(osSnapshot.Es6Ingest.Settings.Location))
} else {
err := fmt.Errorf("discrepancy in the backup types. Backup path of all indices should be %s. Refer `https://docs.chef.io/automate/ha_backup_restore_file_system/#troubleshooting` to do a clean up of the snapshot repo", backupLocation)
return "", false, err
}
}
if isEs5Service {
if osSnapshot.Es5Compliance.Type == backupLocation && osSnapshot.Es5Erchef.Type == backupLocation && osSnapshot.Es5EventFeed.Type == backupLocation && osSnapshot.Es5Ingest.Type == backupLocation {
snapshotEs5ServicePath, allowRestore = getSnapshotPath(getSnapshotPathDetails(osSnapshot.Es5Compliance.Settings.Location),
getSnapshotPathDetails(osSnapshot.Es5Erchef.Settings.Location), getSnapshotPathDetails(osSnapshot.Es5EventFeed.Settings.Location),
getSnapshotPathDetails(osSnapshot.Es5Ingest.Settings.Location))
} else {
err := fmt.Errorf("discrepancy in the backup types. Backup type of all indices should be %s. Refer `https://docs.chef.io/automate/ha_backup_restore_file_system/#troubleshooting` to do a clean up of the snapshot repo", backupLocation)
return "", false, err
}
}

case "s3":
if isEs6Service {
if osSnapshot.Es6Compliance.Type == backupLocation && osSnapshot.Es6Erchef.Type == backupLocation && osSnapshot.Es6EventFeed.Type == backupLocation && osSnapshot.Es6Ingest.Type == backupLocation {
snapshotEs6ServicePath, allowRestore = getSnapshotPath(getSnapshotPathDetails(osSnapshot.Es6Compliance.Settings.BasePath),
getSnapshotPathDetails(osSnapshot.Es6Erchef.Settings.BasePath), getSnapshotPathDetails(osSnapshot.Es6EventFeed.Settings.BasePath),
getSnapshotPathDetails(osSnapshot.Es6Ingest.Settings.BasePath))
} else {
err := fmt.Errorf("discrepancy in the backup types. Backup type of all indices should be %s. Refer `https://docs.chef.io/automate/ha_backup_restore_object_storage/#troubleshooting` to do a clean up of the snapshot repo", backupLocation)
return "", false, err
}
}
if isEs5Service {
if osSnapshot.Es5Compliance.Type == backupLocation && osSnapshot.Es5Erchef.Type == backupLocation && osSnapshot.Es5EventFeed.Type == backupLocation && osSnapshot.Es5Ingest.Type == backupLocation {
snapshotEs5ServicePath, allowRestore = getSnapshotPath(getSnapshotPathDetails(osSnapshot.Es5Compliance.Settings.BasePath),
getSnapshotPathDetails(osSnapshot.Es5Erchef.Settings.BasePath), getSnapshotPathDetails(osSnapshot.Es5EventFeed.Settings.BasePath),
getSnapshotPathDetails(osSnapshot.Es5Ingest.Settings.BasePath))
} else {
err := fmt.Errorf("discrepancy in the backup types. Backup type of all indices should be %s. Refer `https://docs.chef.io/automate/ha_backup_restore_object_storage/#troubleshooting` to do a clean up of the snapshot repo", backupLocation)
return "", false, err
}
}

case "gcs":
if isEs6Service {
if osSnapshot.Es6Compliance.Type == backupLocation && osSnapshot.Es6Erchef.Type == backupLocation && osSnapshot.Es6EventFeed.Type == backupLocation && osSnapshot.Es6Ingest.Type == backupLocation {
snapshotEs6ServicePath, allowRestore = getSnapshotPath(getSnapshotPathDetails(osSnapshot.Es6Compliance.Settings.BasePath),
getSnapshotPathDetails(osSnapshot.Es6Erchef.Settings.BasePath), getSnapshotPathDetails(osSnapshot.Es6EventFeed.Settings.BasePath),
getSnapshotPathDetails(osSnapshot.Es6Ingest.Settings.BasePath))
} else {
err := fmt.Errorf("discrepancy in the backup types. Backup type of all indices should be %s. Refer `https://docs.chef.io/automate/ha_backup_restore_object_storage/#troubleshooting` to do a clean up of the snapshot repo", backupLocation)
return "", false, err
}
}
if isEs5Service {
if osSnapshot.Es5Compliance.Type == backupLocation && osSnapshot.Es5Erchef.Type == backupLocation && osSnapshot.Es5EventFeed.Type == backupLocation && osSnapshot.Es5Ingest.Type == backupLocation {
snapshotEs5ServicePath, allowRestore = getSnapshotPath(getSnapshotPathDetails(osSnapshot.Es5Compliance.Settings.BasePath),
getSnapshotPathDetails(osSnapshot.Es5Erchef.Settings.BasePath), getSnapshotPathDetails(osSnapshot.Es5EventFeed.Settings.BasePath),
getSnapshotPathDetails(osSnapshot.Es5Ingest.Settings.BasePath))
} else {
err := fmt.Errorf("discrepancy in the backup types. Backup type of all indices should be %s. Refer `https://docs.chef.io/automate/ha_backup_restore_object_storage/#troubleshooting` to do a clean up of the snapshot repo", backupLocation)
return "", false, err
}
}
default:
logrus.Debug("default case executed")
return "", false, fmt.Errorf("not supported backup type: %v", backupLocation)
}
if snapshotEs5ServicePath == snapshotEs6ServicePath {
allowRestore = true
return snapshotEs6ServicePath, allowRestore, nil
}
logrus.Debugf("Response from compareSnapshotLocationOfServices. allowRestore flag: %t, isEs5Service flag: %t, isEs6Service: %t, snapshotEs5ServicePath: %s, snapshotEs6ServicePath: %s",
allowRestore, isEs5Service, isEs6Service, snapshotEs5ServicePath, snapshotEs6ServicePath)

if isEs5Service && isEs6Service && snapshotEs5ServicePath == snapshotEs6ServicePath {
return snapshotEs6ServicePath, allowRestore, nil
} else if isEs6Service && !isEs5Service {
return snapshotEs6ServicePath, allowRestore, nil
} else {
return snapshotEs5ServicePath, allowRestore, nil
}
}
}

func getSnapshotLocationsOfServices(sshUtil SSHUtil) (*osSnapshot, error) {

// Execute curl command for OS snapshots on Automate server
resp, err := sshUtil.connectAndExecuteCommandOnRemote(fmt.Sprintf("curl -s %s", OS_SNAPSHOT_URL), false)
if err != nil {
return nil, err
}

logrus.Debug("response from curl for snapshot locations of services: ")
logrus.Debug(resp)

logrus.Debug("Unmarshalling GET response...")
var oss osSnapshot
err = json.Unmarshal([]byte(resp), &oss)
if err != nil {
return nil, err
}
return &oss, nil
}

func getSnapshotPath(complianceServicePath string, erchefServicePath string, eventFeedServicePath string, ingestServicePath string) (string, bool) {
var allowRestore bool
var snapshotServicePath string
if complianceServicePath == erchefServicePath && erchefServicePath == eventFeedServicePath && erchefServicePath == ingestServicePath {
allowRestore = true
snapshotServicePath = complianceServicePath
}
return snapshotServicePath, allowRestore
}

func getSnapshotPathDetails(snapshotPath string) string {
parts := strings.Split(snapshotPath, "/automate-elasticsearch-data")
Copy link
Collaborator

@vivekshankar1 vivekshankar1 Dec 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please handle the situation here if lets say parts length is zero

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

if len(parts) > 0 {
return parts[0]
}
return ""
}
36 changes: 24 additions & 12 deletions components/automate-cli/cmd/chef-automate/deployedConfig_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -286,18 +286,20 @@ var awsConfig = &AwsConfigToml{
}

type MockPullConfigs struct {
fetchInfraConfigFunc func(removeUnreachableNodes bool) (*ExistingInfraConfigToml, map[string][]string, error)
fetchAwsConfigFunc func(removeUnreachableNodes bool) (*AwsConfigToml, map[string][]string, error)
pullOpensearchConfigsFunc func(removeUnreachableNodes bool) (map[string]*ConfigKeys, []string, error)
pullPGConfigsFunc func(removeUnreachableNodes bool) (map[string]*ConfigKeys, []string, error)
pullAutomateConfigsFunc func(removeUnreachableNodes bool) (map[string]*dc.AutomateConfig, []string, error)
pullChefServerConfigsFunc func(removeUnreachableNodes bool) (map[string]*dc.AutomateConfig, []string, error)
generateInfraConfigFunc func(removeUnreachableNodes bool) (*ExistingInfraConfigToml, map[string][]string, error)
generateAwsConfigFunc func(removeUnreachableNodes bool) (*AwsConfigToml, map[string][]string, error)
getExceptionIpsFunc func() []string
setExceptionIpsFunc func(ips []string)
getOsCertsByIpFunc func(map[string]*ConfigKeys) []CertByIP
setInfraAndSSHUtilFunc func(*AutomateHAInfraDetails, SSHUtil)
fetchInfraConfigFunc func(removeUnreachableNodes bool) (*ExistingInfraConfigToml, map[string][]string, error)
fetchAwsConfigFunc func(removeUnreachableNodes bool) (*AwsConfigToml, map[string][]string, error)
pullOpensearchConfigsFunc func(removeUnreachableNodes bool) (map[string]*ConfigKeys, []string, error)
pullPGConfigsFunc func(removeUnreachableNodes bool) (map[string]*ConfigKeys, []string, error)
pullAutomateConfigsFunc func(removeUnreachableNodes bool) (map[string]*dc.AutomateConfig, []string, error)
pullChefServerConfigsFunc func(removeUnreachableNodes bool) (map[string]*dc.AutomateConfig, []string, error)
generateInfraConfigFunc func(removeUnreachableNodes bool) (*ExistingInfraConfigToml, map[string][]string, error)
generateAwsConfigFunc func(removeUnreachableNodes bool) (*AwsConfigToml, map[string][]string, error)
getExceptionIpsFunc func() []string
setExceptionIpsFunc func(ips []string)
getOsCertsByIpFunc func(map[string]*ConfigKeys) []CertByIP
setInfraAndSSHUtilFunc func(*AutomateHAInfraDetails, SSHUtil)
getBackupPathFromAutomateConfigFunc func(map[string]*dc.AutomateConfig, string) (string, error)
getBackupPathFromOpensearchConfigFunc func() (string, error)
}

func (m *MockPullConfigs) setInfraAndSSHUtil(*AutomateHAInfraDetails, SSHUtil) {
Expand Down Expand Up @@ -347,6 +349,16 @@ func (m *MockPullConfigs) getOsCertsByIp(configKeysMap map[string]*ConfigKeys) [
return m.getOsCertsByIpFunc(configKeysMap)
}

func (m *MockPullConfigs) getBackupPathFromAutomateConfig(a2ConfigMap map[string]*dc.AutomateConfig, backupLocation string) (string, error) {
return m.getBackupPathFromAutomateConfigFunc(a2ConfigMap, backupLocation)
}

func (m *MockPullConfigs) getBackupPathFromOpensearchConfig() (string, error) {
return m.getBackupPathFromOpensearchConfigFunc()
}



func TestPopulateHaCommonConfig(t *testing.T) {
tests := []struct {
name string
Expand Down
Loading
Loading