From 1fb9817335d11f8925ed2c4e6a1c1ee827a30991 Mon Sep 17 00:00:00 2001 From: Rahul Goswami Date: Mon, 17 Nov 2025 07:12:55 -0500 Subject: [PATCH 1/5] Backport #14607 (Index open performs version check on each segment, ignores indexCreatedVersionMajor) to brnach_10x --- lucene/CHANGES.txt | 2 + .../TestAncientIndicesCompatibility.java | 15 +++++- .../TestBasicBackwardsCompatibility.java | 5 +- .../org/apache/lucene/index/SegmentInfos.java | 49 +++++++++++++------ 4 files changed, 53 insertions(+), 18 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 3ddcb951284a..84200d0b3f9e 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -122,6 +122,8 @@ Other Applications using SecurityManager now need to grant SerializablePermission("serialFilter") to the analysis-smartcn module. (Uwe Schindler, Isaac David) +* GITHUB#14607: Index open performs version check on each segment, ignores indexCreatedVersionMajor (Rahul Goswami) + Build --------------------- * Upgrade forbiddenapis to version 3.10. (Uwe Schindler) diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestAncientIndicesCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestAncientIndicesCompatibility.java index 88adfadf1c88..a06a96b2ed57 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestAncientIndicesCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestAncientIndicesCompatibility.java @@ -45,6 +45,7 @@ import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.Version; public class TestAncientIndicesCompatibility extends LuceneTestCase { static final Set UNSUPPORTED_INDEXES; @@ -198,7 +199,7 @@ public void testUnsupportedOldIndexes() throws Exception { checker.setInfoStream(new PrintStream(bos, false, UTF_8)); checker.setLevel(CheckIndex.Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS); CheckIndex.Status indexStatus = checker.checkIndex(); - if (version.startsWith("8.")) { + if (getVersion(version).onOrAfter(Version.fromBits(8, 6, 0))) { assertTrue(indexStatus.clean); } else { assertFalse(indexStatus.clean); @@ -216,6 +217,18 @@ public void testUnsupportedOldIndexes() throws Exception { } } + private Version getVersion(String version) { + if (version.startsWith("5x")) { + // couple of indices in unsupported_indices.txt start with "5x' + return Version.fromBits(5, 0, 0); + } + String[] versionBitsStr = version.split("[.\\-]"); + return Version.fromBits( + Integer.parseInt(versionBitsStr[0]), + Integer.parseInt(versionBitsStr[1]), + Integer.parseInt(versionBitsStr[2])); + } + // #12895: test on a carefully crafted 9.8.0 index (from a small contiguous subset // of wikibigall unique terms) that shows the read-time exception of // IntersectTermsEnum (used by WildcardQuery) diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java index cf50b9e1526d..c2aa65cced9c 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBasicBackwardsCompatibility.java @@ -861,7 +861,10 @@ public void testFailOpenOldIndex() throws IOException { () -> StandardDirectoryReader.open(commit, Version.LATEST.major, null)); assertTrue( ex.getMessage() - .contains("only supports reading from version " + Version.LATEST.major + " upwards.")); + .contains( + "This Lucene version only supports indexes with major version " + + Version.LATEST.major + + " or later")); // now open with allowed min version StandardDirectoryReader.open(commit, Version.MIN_SUPPORTED_MAJOR, null).close(); } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index 3b65e1cbff8d..131518983a81 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -328,7 +328,7 @@ public static final SegmentInfos readCommit( throw new IndexFormatTooOldException( input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC); } - format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_74, VERSION_CURRENT); + format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_86, VERSION_CURRENT); byte[] id = new byte[StringHelper.ID_LENGTH]; input.readBytes(id, 0, id.length); CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX)); @@ -346,25 +346,12 @@ public static final SegmentInfos readCommit( input); } - if (indexCreatedVersion < minSupportedMajorVersion) { - throw new IndexFormatTooOldException( - input, - "This index was initially created with Lucene " - + indexCreatedVersion - + ".x while the current version is " - + Version.LATEST - + " and Lucene only supports reading" - + (minSupportedMajorVersion == Version.MIN_SUPPORTED_MAJOR - ? " the current and previous major versions" - : " from version " + minSupportedMajorVersion + " upwards")); - } - SegmentInfos infos = new SegmentInfos(indexCreatedVersion); infos.id = id; infos.generation = generation; infos.lastGeneration = generation; infos.luceneVersion = luceneVersion; - parseSegmentInfos(directory, input, infos, format); + parseSegmentInfos(directory, input, infos, format, minSupportedMajorVersion); return infos; } catch (Throwable t) { @@ -380,7 +367,12 @@ public static final SegmentInfos readCommit( } private static void parseSegmentInfos( - Directory directory, DataInput input, SegmentInfos infos, int format) throws IOException { + Directory directory, + DataInput input, + SegmentInfos infos, + int format, + int minSupportedMajorVersion) + throws IOException { infos.version = CodecUtil.readBELong(input); // System.out.println("READ sis version=" + infos.version); infos.counter = input.readVLong(); @@ -397,6 +389,7 @@ private static void parseSegmentInfos( } long totalDocs = 0; + for (int seg = 0; seg < numSegments; seg++) { String segName = input.readString(); byte[] segmentID = new byte[StringHelper.ID_LENGTH]; @@ -490,6 +483,30 @@ private static void parseSegmentInfos( + infos.indexCreatedVersionMajor, input); } + + int createdOrSegmentMinVersion = + info.getMinVersion() == null + ? infos.indexCreatedVersionMajor + : info.getMinVersion().major; + + // version >=7 are expected to record minVersion + if (info.getMinVersion() == null || info.getMinVersion().major < minSupportedMajorVersion) { + throw new IndexFormatTooOldException( + input, + "Index has segments derived from Lucene version " + + createdOrSegmentMinVersion + + ".x and is not supported by Lucene " + + Version.LATEST + + ". This Lucene version only supports indexes with major version " + + minSupportedMajorVersion + + " or later (found: " + + createdOrSegmentMinVersion + + ", minimum supported: " + + minSupportedMajorVersion + + "). To resolve this issue re-index your data using Lucene " + + minSupportedMajorVersion + + ".x or later."); + } } infos.userData = input.readMapOfStrings(); From 22a4fe1f9addb7d100cf746dc87e8b5dcd1a5ffc Mon Sep 17 00:00:00 2001 From: Rahul Goswami Date: Tue, 18 Nov 2025 02:52:14 -0500 Subject: [PATCH 2/5] CHANGES.txt --- lucene/CHANGES.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 84200d0b3f9e..868abf9db811 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -122,7 +122,7 @@ Other Applications using SecurityManager now need to grant SerializablePermission("serialFilter") to the analysis-smartcn module. (Uwe Schindler, Isaac David) -* GITHUB#14607: Index open performs version check on each segment, ignores indexCreatedVersionMajor (Rahul Goswami) +* GITHUB#15431: Index open performs version check on each segment, ignores indexCreatedVersionMajor (Rahul Goswami) Build --------------------- From 243d32195f2ebdc9f9ec814e2a4f578ce0fb6916 Mon Sep 17 00:00:00 2001 From: Rahul Goswami Date: Tue, 25 Nov 2025 00:22:57 -0500 Subject: [PATCH 3/5] Revert to VERSION_74 in SegmentInfos header check. SegmentInfos.readCodec() throws IndexFormatTooOldException if a default Lucene codec is not found --- .../TestAncientIndicesCompatibility.java | 15 +-------------- .../org/apache/lucene/index/SegmentInfos.java | 12 ++++++++---- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestAncientIndicesCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestAncientIndicesCompatibility.java index a06a96b2ed57..88adfadf1c88 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestAncientIndicesCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestAncientIndicesCompatibility.java @@ -45,7 +45,6 @@ import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.Version; public class TestAncientIndicesCompatibility extends LuceneTestCase { static final Set UNSUPPORTED_INDEXES; @@ -199,7 +198,7 @@ public void testUnsupportedOldIndexes() throws Exception { checker.setInfoStream(new PrintStream(bos, false, UTF_8)); checker.setLevel(CheckIndex.Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS); CheckIndex.Status indexStatus = checker.checkIndex(); - if (getVersion(version).onOrAfter(Version.fromBits(8, 6, 0))) { + if (version.startsWith("8.")) { assertTrue(indexStatus.clean); } else { assertFalse(indexStatus.clean); @@ -217,18 +216,6 @@ public void testUnsupportedOldIndexes() throws Exception { } } - private Version getVersion(String version) { - if (version.startsWith("5x")) { - // couple of indices in unsupported_indices.txt start with "5x' - return Version.fromBits(5, 0, 0); - } - String[] versionBitsStr = version.split("[.\\-]"); - return Version.fromBits( - Integer.parseInt(versionBitsStr[0]), - Integer.parseInt(versionBitsStr[1]), - Integer.parseInt(versionBitsStr[2])); - } - // #12895: test on a carefully crafted 9.8.0 index (from a small contiguous subset // of wikibigall unique terms) that shows the read-time exception of // IntersectTermsEnum (used by WildcardQuery) diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index 131518983a81..2754efda0299 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -328,7 +328,7 @@ public static final SegmentInfos readCommit( throw new IndexFormatTooOldException( input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC); } - format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_86, VERSION_CURRENT); + format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_74, VERSION_CURRENT); byte[] id = new byte[StringHelper.ID_LENGTH]; input.readBytes(id, 0, id.length); CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX)); @@ -529,11 +529,15 @@ private static Codec readCodec(DataInput input) throws IOException { } catch (IllegalArgumentException e) { // maybe it's an old default codec that moved if (name.startsWith("Lucene")) { - throw new IllegalArgumentException( + throw new IndexFormatTooOldException( + input, "Could not load codec '" + name - + "'. Did you forget to add lucene-backward-codecs.jar?", - e); + + "'. " + + e.getMessage() + + ". This Lucene version only supports indexes with major version " + + Version.MIN_SUPPORTED_MAJOR + + " or later. Or did you forget to add lucene-backward-codecs.jar?"); } throw e; } From 8bf4f40eaf94ad8ce42eb25913bd4e31bed7b7d9 Mon Sep 17 00:00:00 2001 From: Rahul Goswami Date: Tue, 25 Nov 2025 01:40:17 -0500 Subject: [PATCH 4/5] message change --- .../core/src/java/org/apache/lucene/index/SegmentInfos.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index 2754efda0299..5cd4551f6ced 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -535,9 +535,7 @@ private static Codec readCodec(DataInput input) throws IOException { + name + "'. " + e.getMessage() - + ". This Lucene version only supports indexes with major version " - + Version.MIN_SUPPORTED_MAJOR - + " or later. Or did you forget to add lucene-backward-codecs.jar?"); + + ". Did you forget to add lucene-backward-codecs.jar?"); } throw e; } From 66a778b4adece3a4db5c443bae4ead377dd81859 Mon Sep 17 00:00:00 2001 From: Rahul Goswami Date: Tue, 25 Nov 2025 01:43:37 -0500 Subject: [PATCH 5/5] Changes.txt --- lucene/CHANGES.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 868abf9db811..69f3334ad988 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -122,7 +122,7 @@ Other Applications using SecurityManager now need to grant SerializablePermission("serialFilter") to the analysis-smartcn module. (Uwe Schindler, Isaac David) -* GITHUB#15431: Index open performs version check on each segment, ignores indexCreatedVersionMajor (Rahul Goswami) +* GITHUB#15431: Index open performs version check on each segment, ignores indexCreatedVersionMajor (Rahul Goswami, Mike Sokolov) Build ---------------------