Skip to content

Commit 0e658b7

Browse files
Don't test corruption detection within CFS checksum (elastic#33911)
Closes elastic#33881 (cherry picked from commit 477391d)
1 parent 6bb42c6 commit 0e658b7

File tree

2 files changed

+112
-16
lines changed

2 files changed

+112
-16
lines changed

test/framework/src/main/java/org/elasticsearch/test/CorruptionUtils.java

+31-16
Original file line numberDiff line numberDiff line change
@@ -60,24 +60,19 @@ public static void corruptFile(Random random, Path... files) throws IOException
6060
checksumBeforeCorruption = CodecUtil.retrieveChecksum(input);
6161
}
6262
try (FileChannel raf = FileChannel.open(fileToCorrupt, StandardOpenOption.READ, StandardOpenOption.WRITE)) {
63-
// read
64-
raf.position(random.nextInt((int) Math.min(Integer.MAX_VALUE, raf.size())));
65-
long filePointer = raf.position();
66-
ByteBuffer bb = ByteBuffer.wrap(new byte[1]);
67-
raf.read(bb);
68-
bb.flip();
63+
long maxPosition = raf.size();
6964

70-
// corrupt
71-
byte oldValue = bb.get(0);
72-
byte newValue = (byte) (oldValue + 1);
73-
bb.put(0, newValue);
74-
75-
// rewrite
76-
raf.position(filePointer);
77-
raf.write(bb);
78-
logger.info("Corrupting file -- flipping at position {} from {} to {} file: {}", filePointer,
79-
Integer.toHexString(oldValue), Integer.toHexString(newValue), fileToCorrupt.getFileName());
65+
if (fileToCorrupt.getFileName().toString().endsWith(".cfs") && maxPosition > 4) {
66+
// TODO: it is known that Lucene does not check the checksum of CFS file (CompoundFileS, like an archive)
67+
// see note at https://github.com/elastic/elasticsearch/pull/33911
68+
// so far, don't corrupt crc32 part of checksum (last 4 bytes) of cfs file
69+
// checksum is 8 bytes: first 4 bytes have to be zeros, while crc32 value is not verified
70+
maxPosition -= 4;
71+
}
72+
final int position = random.nextInt((int) Math.min(Integer.MAX_VALUE, maxPosition));
73+
corruptAt(fileToCorrupt, raf, position);
8074
}
75+
8176
long checksumAfterCorruption;
8277
long actualChecksumAfterCorruption;
8378
try (ChecksumIndexInput input = dir.openChecksumInput(fileToCorrupt.getFileName().toString(), IOContext.DEFAULT)) {
@@ -102,5 +97,25 @@ public static void corruptFile(Random random, Path... files) throws IOException
10297
}
10398
}
10499

100+
static void corruptAt(Path path, FileChannel channel, int position) throws IOException {
101+
// read
102+
channel.position(position);
103+
long filePointer = channel.position();
104+
ByteBuffer bb = ByteBuffer.wrap(new byte[1]);
105+
channel.read(bb);
106+
bb.flip();
107+
108+
// corrupt
109+
byte oldValue = bb.get(0);
110+
byte newValue = (byte) (oldValue + 1);
111+
bb.put(0, newValue);
112+
113+
// rewrite
114+
channel.position(filePointer);
115+
channel.write(bb);
116+
logger.info("Corrupting file -- flipping at position {} from {} to {} file: {}", filePointer,
117+
Integer.toHexString(oldValue), Integer.toHexString(newValue), path.getFileName());
118+
}
119+
105120

106121
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.test;
20+
21+
import org.apache.lucene.index.CheckIndex;
22+
import org.apache.lucene.store.SimpleFSDirectory;
23+
import org.elasticsearch.action.admin.indices.flush.FlushRequest;
24+
import org.elasticsearch.index.shard.IndexShard;
25+
import org.elasticsearch.index.shard.IndexShardTestCase;
26+
import org.elasticsearch.index.shard.ShardPath;
27+
28+
import java.nio.channels.FileChannel;
29+
import java.nio.file.Files;
30+
import java.nio.file.Path;
31+
import java.nio.file.StandardOpenOption;
32+
import java.util.stream.Stream;
33+
34+
import static org.elasticsearch.test.CorruptionUtils.corruptAt;
35+
import static org.hamcrest.Matchers.equalTo;
36+
import static org.hamcrest.Matchers.lessThan;
37+
38+
public class CorruptionUtilsTests extends IndexShardTestCase {
39+
40+
/**
41+
* There is a dependency on Lucene bug fix
42+
* https://github.com/elastic/elasticsearch/pull/33911
43+
*/
44+
public void testLuceneCheckIndexIgnoresLast4Bytes() throws Exception {
45+
final IndexShard indexShard = newStartedShard(true);
46+
47+
final long numDocs = between(10, 100);
48+
for (long i = 0; i < numDocs; i++) {
49+
indexDoc(indexShard, "_doc", Long.toString(i), "{}");
50+
}
51+
indexShard.flush(new FlushRequest());
52+
closeShards(indexShard);
53+
54+
final ShardPath shardPath = indexShard.shardPath();
55+
56+
final Path indexPath = shardPath.getDataPath().resolve(ShardPath.INDEX_FOLDER_NAME);
57+
58+
final Path cfsFile;
59+
try (Stream<Path> paths = Files.walk(indexPath)) {
60+
cfsFile = paths.filter(p -> p.getFileName().toString().endsWith(".cfs")).findFirst()
61+
.orElseThrow(() -> new IllegalStateException("CFS file has to be there"));
62+
}
63+
64+
try (FileChannel raf = FileChannel.open(cfsFile, StandardOpenOption.READ, StandardOpenOption.WRITE)) {
65+
assertThat(raf.size(), lessThan(Integer.MAX_VALUE * 1L));
66+
final int maxPosition = (int) raf.size();
67+
// corrupt only last 4 bytes!
68+
final int position = randomIntBetween(maxPosition - 4, maxPosition - 1);
69+
corruptAt(cfsFile, raf, position);
70+
}
71+
72+
final CheckIndex.Status status;
73+
try (CheckIndex checkIndex = new CheckIndex(new SimpleFSDirectory(indexPath))) {
74+
status = checkIndex.checkIndex();
75+
}
76+
77+
assertThat("That's a good news! "
78+
+ "Lucene now validates CRC32 of CFS file: time to drop workaround at CorruptionUtils (and this test)",
79+
status.clean, equalTo(true));
80+
}
81+
}

0 commit comments

Comments
 (0)