Skip to content

Commit 518338f

Browse files
authored
HDDS-11946. Require all ozone repair commands to support a --dry-run option (#7682)
1 parent 642b1c7 commit 518338f

File tree

11 files changed

+125
-72
lines changed

11 files changed

+125
-72
lines changed

hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -346,8 +346,8 @@ private int dryRun(String... args) {
346346

347347
private int execute(boolean dryRun, String... args) {
348348
List<String> argList = new ArrayList<>(Arrays.asList("om", "fso-tree", "--db", dbPath));
349-
if (!dryRun) {
350-
argList.add("--repair");
349+
if (dryRun) {
350+
argList.add("--dry-run");
351351
}
352352
argList.addAll(Arrays.asList(args));
353353

hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneRepairShell.java

+9-3
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ public class TestOzoneRepairShell {
5151
private GenericTestUtils.PrintStreamCapturer err;
5252
private static MiniOzoneCluster cluster = null;
5353
private static OzoneConfiguration conf = null;
54+
private static String om;
5455

5556
private static final String TRANSACTION_INFO_TABLE_TERM_INDEX_PATTERN = "([0-9]+#[0-9]+)";
5657

@@ -59,6 +60,7 @@ public static void init() throws Exception {
5960
conf = new OzoneConfiguration();
6061
cluster = MiniOzoneCluster.newBuilder(conf).build();
6162
cluster.waitForClusterToBeReady();
63+
om = conf.get(OZONE_OM_ADDRESS_KEY);
6264
}
6365

6466
@BeforeEach
@@ -136,17 +138,21 @@ private String[] parseScanOutput(String output) {
136138
public void testQuotaRepair() throws Exception {
137139
CommandLine cmd = new OzoneRepair().getCmd();
138140

139-
int exitCode = cmd.execute("om", "quota", "status", "--service-host", conf.get(OZONE_OM_ADDRESS_KEY));
141+
int exitCode = cmd.execute("om", "quota", "status", "--service-host", om);
140142
assertEquals(0, exitCode, err);
141143

144+
cmd.execute("om", "quota", "start", "--dry-run", "--service-host", om);
145+
cmd.execute("om", "quota", "status", "--service-host", om);
146+
assertThat(out.get()).doesNotContain("lastRun");
147+
142148
exitCode = withTextFromSystemIn("y")
143-
.execute(() -> cmd.execute("om", "quota", "start", "--service-host", conf.get(OZONE_OM_ADDRESS_KEY)));
149+
.execute(() -> cmd.execute("om", "quota", "start", "--service-host", om));
144150
assertEquals(0, exitCode, err);
145151

146152
GenericTestUtils.waitFor(() -> {
147153
out.reset();
148154
// verify quota trigger is completed having non-zero lastRunFinishedTime
149-
cmd.execute("om", "quota", "status", "--service-host", conf.get(OZONE_OM_ADDRESS_KEY));
155+
cmd.execute("om", "quota", "status", "--service-host", om);
150156
try {
151157
return out.get().contains("\"lastRunFinishedTime\":\"\"");
152158
} catch (Exception ex) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.ozone.repair;
19+
20+
/** Marker interface for repair subcommands that do not modify state. */
21+
public interface ReadOnlyCommand {
22+
// marker
23+
}

hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/RepairTool.java

+17-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.util.concurrent.Callable;
2626

2727
/** Parent class for all actionable repair commands. */
28+
@CommandLine.Command
2829
public abstract class RepairTool extends AbstractSubcommand implements Callable<Void> {
2930

3031
private static final String WARNING_SYS_USER_MESSAGE =
@@ -35,12 +36,20 @@ public abstract class RepairTool extends AbstractSubcommand implements Callable<
3536
description = "Use this flag if you want to bypass the check in false-positive cases.")
3637
private boolean force;
3738

39+
@CommandLine.Option(names = {"--dry-run"},
40+
defaultValue = "false",
41+
fallbackValue = "true",
42+
description = "Simulate repair, but do not make any changes")
43+
private boolean dryRun;
44+
3845
/** Hook method for subclasses for performing actual repair task. */
3946
protected abstract void execute() throws Exception;
4047

4148
@Override
4249
public final Void call() throws Exception {
43-
confirmUser();
50+
if (!dryRun) {
51+
confirmUser();
52+
}
4453
execute();
4554
return null;
4655
}
@@ -65,6 +74,10 @@ protected boolean checkIfServiceIsRunning(String serviceName) {
6574
return false;
6675
}
6776

77+
protected boolean isDryRun() {
78+
return dryRun;
79+
}
80+
6881
protected void info(String msg, Object... args) {
6982
out().println(formatMessage(msg, args));
7083
}
@@ -77,6 +90,9 @@ private String formatMessage(String msg, Object[] args) {
7790
if (args != null && args.length > 0) {
7891
msg = String.format(msg, args);
7992
}
93+
if (isDryRun()) {
94+
msg = "[dry run] " + msg;
95+
}
8096
return msg;
8197
}
8298

hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java

+9-35
Original file line numberDiff line numberDiff line change
@@ -85,11 +85,6 @@ public class FSORepairTool extends RepairTool {
8585
description = "Path to OM RocksDB")
8686
private String omDBPath;
8787

88-
@CommandLine.Option(names = {"-r", "--repair"},
89-
defaultValue = "false",
90-
description = "Run in repair mode to move unreferenced files and directories to deleted tables.")
91-
private boolean repair;
92-
9388
@CommandLine.Option(names = {"-v", "--volume"},
9489
description = "Filter by volume name. Add '/' before the volume name.")
9590
private String volumeFilter;
@@ -99,19 +94,14 @@ public class FSORepairTool extends RepairTool {
9994
private String bucketFilter;
10095

10196
@CommandLine.Option(names = {"--verbose"},
102-
description = "Verbose output. Show all intermediate steps and deleted keys info.")
97+
description = "Verbose output. Show all intermediate steps.")
10398
private boolean verbose;
10499

105100
@Override
106101
public void execute() throws Exception {
107102
if (checkIfServiceIsRunning("OM")) {
108103
return;
109104
}
110-
if (repair) {
111-
info("FSO Repair Tool is running in repair mode");
112-
} else {
113-
info("FSO Repair Tool is running in debug mode");
114-
}
115105
try {
116106
Impl repairTool = new Impl();
117107
repairTool.run();
@@ -274,18 +264,12 @@ private boolean checkIfSnapshotExistsForBucket(String volumeName, String bucketN
274264
}
275265

276266
private void processBucket(OmVolumeArgs volume, OmBucketInfo bucketInfo) throws IOException {
277-
info("Processing bucket: " + volume.getVolume() + "/" + bucketInfo.getBucketName());
278267
if (checkIfSnapshotExistsForBucket(volume.getVolume(), bucketInfo.getBucketName())) {
279-
if (!repair) {
280-
info(
281-
"Snapshot detected in bucket '" + volume.getVolume() + "/" + bucketInfo.getBucketName() + "'. ");
282-
} else {
283-
info(
284-
"Skipping repair for bucket '" + volume.getVolume() + "/" + bucketInfo.getBucketName() + "' " +
285-
"due to snapshot presence.");
286-
return;
287-
}
268+
info("Skipping repair for bucket '" + volume.getVolume() + "/" + bucketInfo.getBucketName() + "' " +
269+
"due to snapshot presence.");
270+
return;
288271
}
272+
info("Processing bucket: " + volume.getVolume() + "/" + bucketInfo.getBucketName());
289273
markReachableObjectsInBucket(volume, bucketInfo);
290274
handleUnreachableAndUnreferencedObjects(volume, bucketInfo);
291275
}
@@ -359,15 +343,10 @@ private void handleUnreachableAndUnreferencedObjects(OmVolumeArgs volume, OmBuck
359343

360344
if (!isReachable(dirKey)) {
361345
if (!isDirectoryInDeletedDirTable(dirKey)) {
362-
info("Found unreferenced directory: " + dirKey);
363346
unreferencedStats.addDir();
364347

365-
if (!repair) {
366-
if (verbose) {
367-
info("Marking unreferenced directory " + dirKey + " for deletion.");
368-
}
369-
} else {
370-
info("Deleting unreferenced directory " + dirKey);
348+
info("Deleting unreferenced directory " + dirKey);
349+
if (!isDryRun()) {
371350
OmDirectoryInfo dirInfo = dirEntry.getValue();
372351
markDirectoryForDeletion(volume.getVolume(), bucket.getBucketName(), dirKey, dirInfo);
373352
}
@@ -393,15 +372,10 @@ private void handleUnreachableAndUnreferencedObjects(OmVolumeArgs volume, OmBuck
393372
OmKeyInfo fileInfo = fileEntry.getValue();
394373
if (!isReachable(fileKey)) {
395374
if (!isFileKeyInDeletedTable(fileKey)) {
396-
info("Found unreferenced file: " + fileKey);
397375
unreferencedStats.addFile(fileInfo.getDataSize());
398376

399-
if (!repair) {
400-
if (verbose) {
401-
info("Marking unreferenced file " + fileKey + " for deletion." + fileKey);
402-
}
403-
} else {
404-
info("Deleting unreferenced file " + fileKey);
377+
info("Deleting unreferenced file " + fileKey);
378+
if (!isDryRun()) {
405379
markFileForDeletion(fileKey, fileInfo);
406380
}
407381
} else {

hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/SnapshotChainRepair.java

+7-11
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,6 @@ public class SnapshotChainRepair extends RepairTool {
7676
description = "Path previous snapshotId to set for the given snapshot")
7777
private UUID pathPreviousSnapshotId;
7878

79-
@CommandLine.Option(names = {"--dry-run"},
80-
required = true,
81-
description = "To dry-run the command.", defaultValue = "true")
82-
private boolean dryRun;
83-
8479
@Override
8580
public void execute() throws Exception {
8681
if (checkIfServiceIsRunning("OM")) {
@@ -139,12 +134,13 @@ public void execute() throws Exception {
139134
snapshotInfo.setGlobalPreviousSnapshotId(globalPreviousSnapshotId);
140135
snapshotInfo.setPathPreviousSnapshotId(pathPreviousSnapshotId);
141136

142-
if (dryRun) {
143-
info("SnapshotInfo would be updated to : %s", snapshotInfo);
144-
} else {
145-
byte[] snapshotInfoBytes = SnapshotInfo.getCodec().toPersistedFormat(snapshotInfo);
146-
db.get()
147-
.put(snapshotInfoCfh, StringCodec.get().toPersistedFormat(snapshotInfoTableKey), snapshotInfoBytes);
137+
info("Updating SnapshotInfo to %s", snapshotInfo);
138+
139+
byte[] snapshotInfoBytes = SnapshotInfo.getCodec().toPersistedFormat(snapshotInfo);
140+
byte[] persistedFormat = StringCodec.get().toPersistedFormat(snapshotInfoTableKey);
141+
142+
if (!isDryRun()) {
143+
db.get().put(snapshotInfoCfh, persistedFormat, snapshotInfoBytes);
148144

149145
info("Snapshot Info is updated to : %s",
150146
RocksDBUtils.getValue(db, snapshotInfoCfh, snapshotInfoTableKey, SnapshotInfo.getCodec()));

hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/TransactionInfoRepair.java

+10-5
Original file line numberDiff line numberDiff line change
@@ -89,12 +89,17 @@ public void execute() throws Exception {
8989
TransactionInfo transactionInfo = TransactionInfo.valueOf(highestTransactionTerm, highestTransactionIndex);
9090

9191
byte[] transactionInfoBytes = TransactionInfo.getCodec().toPersistedFormat(transactionInfo);
92-
db.get()
93-
.put(transactionInfoCfh, StringCodec.get().toPersistedFormat(TRANSACTION_INFO_KEY), transactionInfoBytes);
92+
byte[] key = StringCodec.get().toPersistedFormat(TRANSACTION_INFO_KEY);
9493

95-
info("The highest transaction info has been updated to: %s",
96-
RocksDBUtils.getValue(db, transactionInfoCfh, TRANSACTION_INFO_KEY,
97-
TransactionInfo.getCodec()).getTermIndex());
94+
info("Updating transaction info to %s", transactionInfo.getTermIndex());
95+
96+
if (!isDryRun()) {
97+
db.get().put(transactionInfoCfh, key, transactionInfoBytes);
98+
99+
info("The highest transaction info has been updated to: %s",
100+
RocksDBUtils.getValue(db, transactionInfoCfh, TRANSACTION_INFO_KEY,
101+
TransactionInfo.getCodec()).getTermIndex());
102+
}
98103
} catch (RocksDBException exception) {
99104
error("Failed to update the RocksDB for the given path: %s", dbPath);
100105
error(

hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/quota/QuotaStatus.java

+5-4
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import java.util.concurrent.Callable;
2525
import org.apache.hadoop.hdds.cli.HddsVersionProvider;
2626
import org.apache.hadoop.ozone.om.protocol.OzoneManagerProtocol;
27+
import org.apache.hadoop.ozone.repair.ReadOnlyCommand;
2728
import picocli.CommandLine;
2829

2930
/**
@@ -35,7 +36,7 @@
3536
mixinStandardHelpOptions = true,
3637
versionProvider = HddsVersionProvider.class
3738
)
38-
public class QuotaStatus implements Callable<Void> {
39+
public class QuotaStatus implements Callable<Void>, ReadOnlyCommand {
3940

4041
@CommandLine.Option(
4142
names = {"--service-id", "--om-service-id"},
@@ -57,9 +58,9 @@ public class QuotaStatus implements Callable<Void> {
5758

5859
@Override
5960
public Void call() throws Exception {
60-
OzoneManagerProtocol ozoneManagerClient =
61-
parent.createOmClient(omServiceId, omHost, false);
62-
System.out.println(ozoneManagerClient.getQuotaRepairStatus());
61+
try (OzoneManagerProtocol omClient = parent.createOmClient(omServiceId, omHost, false)) {
62+
System.out.println(omClient.getQuotaRepairStatus());
63+
}
6364
return null;
6465
}
6566
}

hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/quota/QuotaTrigger.java

+4-2
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,10 @@ public void execute() throws Exception {
7777
bucketList.isEmpty()
7878
? "all buckets"
7979
: ("buckets " + buckets));
80-
omClient.startQuotaRepair(bucketList);
81-
info(omClient.getQuotaRepairStatus());
80+
if (!isDryRun()) {
81+
omClient.startQuotaRepair(bucketList);
82+
info(omClient.getQuotaRepairStatus());
83+
}
8284
}
8385
}
8486
}

hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/scm/cert/RecoverSCMCertificate.java

+11-9
Original file line numberDiff line numberDiff line change
@@ -194,29 +194,31 @@ private void storeCerts(X509Certificate scmCertificate,
194194
CertificateCodec certCodec =
195195
new CertificateCodec(securityConfig, SCMCertificateClient.COMPONENT_NAME);
196196

197-
info("Writing certs to path : %s", certCodec.getLocation());
198-
199197
CertPath certPath = addRootCertInPath(scmCertificate, rootCertificate);
200198
CertPath rootCertPath = getRootCertPath(rootCertificate);
201199
String encodedCert = CertificateCodec.getPEMEncodedString(certPath);
202200
String certName = String.format(CERT_FILE_NAME_FORMAT,
203201
CAType.NONE.getFileNamePrefix() + scmCertificate.getSerialNumber());
204-
certCodec.writeCertificate(certName, encodedCert);
202+
writeCertificate(certCodec, certName, encodedCert);
205203

206204
String rootCertName = String.format(CERT_FILE_NAME_FORMAT,
207205
CAType.SUBORDINATE.getFileNamePrefix() + rootCertificate.getSerialNumber());
208206
String encodedRootCert = CertificateCodec.getPEMEncodedString(rootCertPath);
209-
certCodec.writeCertificate(rootCertName, encodedRootCert);
207+
writeCertificate(certCodec, rootCertName, encodedRootCert);
210208

211-
certCodec.writeCertificate(certCodec.getLocation().toAbsolutePath(),
212-
securityConfig.getCertificateFileName(), encodedCert);
209+
writeCertificate(certCodec, securityConfig.getCertificateFileName(), encodedCert);
213210

214211
if (isRootCA) {
215212
CertificateCodec rootCertCodec =
216213
new CertificateCodec(securityConfig, OzoneConsts.SCM_ROOT_CA_COMPONENT_NAME);
217-
info("Writing root certs to path : %s", rootCertCodec.getLocation());
218-
rootCertCodec.writeCertificate(rootCertCodec.getLocation().toAbsolutePath(),
219-
securityConfig.getCertificateFileName(), encodedRootCert);
214+
writeCertificate(rootCertCodec, securityConfig.getCertificateFileName(), encodedRootCert);
215+
}
216+
}
217+
218+
private void writeCertificate(CertificateCodec codec, String name, String encodedCert) throws IOException {
219+
info("Writing cert %s to %s", name, codec.getLocation());
220+
if (!isDryRun()) {
221+
codec.writeCertificate(name, encodedCert);
220222
}
221223
}
222224

hadoop-ozone/tools/src/test/java/org/apache/hadoop/ozone/repair/TestOzoneRepair.java

+28
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import java.io.ByteArrayOutputStream;
2929
import java.io.PrintStream;
3030
import java.util.List;
31+
import java.util.Map;
3132

3233
import static java.nio.charset.StandardCharsets.UTF_8;
3334
import static java.util.Arrays.asList;
@@ -68,6 +69,33 @@ public void reset() {
6869
System.setProperty("user.name", OLD_USER);
6970
}
7071

72+
/** All leaf subcommands should support {@code --dry-run},
73+
* except if marked as {@link ReadOnlyCommand}. */
74+
@Test
75+
void subcommandsSupportDryRun() {
76+
assertSubcommandOptionRecursively(new OzoneRepair().getCmd());
77+
}
78+
79+
private static void assertSubcommandOptionRecursively(CommandLine cmd) {
80+
Map<String, CommandLine> subcommands = cmd.getSubcommands();
81+
if (subcommands.isEmpty()) {
82+
// leaf command
83+
CommandLine.Model.CommandSpec spec = cmd.getCommandSpec();
84+
Object userObject = spec.userObject();
85+
if (!(userObject instanceof ReadOnlyCommand)) {
86+
assertThat(spec.optionsMap().keySet())
87+
.as(() -> "'" + spec.qualifiedName() + "' defined by " + userObject.getClass()
88+
+ " should support --dry-run or implement " + ReadOnlyCommand.class)
89+
.contains("--dry-run");
90+
}
91+
} else {
92+
// parent command
93+
for (CommandLine sub : subcommands.values()) {
94+
assertSubcommandOptionRecursively(sub);
95+
}
96+
}
97+
}
98+
7199
@Test
72100
void testOzoneRepairWhenUserIsRemindedSystemUserAndDeclinesToProceed() throws Exception {
73101
OzoneRepair ozoneRepair = new OzoneRepair();

0 commit comments

Comments
 (0)