Skip to content

Commit 20f7bae

Browse files
rahulgoswamirgoswami-cv
authored andcommitted
SOLR-17725: Add MergePolicy to block older segments from participating in merges (#3883)
Helps with Solr upgrades in conjunction with re-indexing. New: LatestVersionFilterMergePolicy --------- Co-authored-by: Rahul Goswami <[email protected]>
1 parent f0732fd commit 20f7bae

File tree

3 files changed

+162
-0
lines changed

3 files changed

+162
-0
lines changed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
2+
title: Merge policy to upgrade index for compatibility with future Solr version
3+
type: added
4+
authors:
5+
- name: Rahul Goswami
6+
links:
7+
- name: SOLR-17725
8+
url: https://issues.apache.org/jira/browse/SOLR-17725
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.solr.index;
19+
20+
import java.io.IOException;
21+
import java.util.Map;
22+
import org.apache.lucene.index.FilterMergePolicy;
23+
import org.apache.lucene.index.MergePolicy;
24+
import org.apache.lucene.index.MergeTrigger;
25+
import org.apache.lucene.index.SegmentCommitInfo;
26+
import org.apache.lucene.index.SegmentInfos;
27+
import org.apache.lucene.util.Version;
28+
29+
/**
30+
* Prevents any older version segment (i.e. older than current lucene major version), either
31+
* original or one derived as a result of merging with an older version segment, from being
32+
* considered for merges. That way a snapshot of older segments remains consistent. This assists in
33+
* upgrading to a future Lucene major version if existing documents are reindexed in the current
34+
* version with this merge policy in place.
35+
*/
36+
public class LatestVersionFilterMergePolicy extends FilterMergePolicy {
37+
38+
public LatestVersionFilterMergePolicy(MergePolicy in) {
39+
super(in);
40+
}
41+
42+
@Override
43+
public MergeSpecification findMerges(
44+
MergeTrigger mergeTrigger, SegmentInfos infos, MergeContext mergeContext) throws IOException {
45+
return in.findMerges(mergeTrigger, getFilteredInfos(infos), mergeContext);
46+
}
47+
48+
@Override
49+
public MergeSpecification findForcedMerges(
50+
SegmentInfos infos,
51+
int maxSegmentCount,
52+
Map<SegmentCommitInfo, Boolean> segmentsToMerge,
53+
MergeContext mergeContext)
54+
throws IOException {
55+
return in.findForcedMerges(
56+
getFilteredInfos(infos), maxSegmentCount, segmentsToMerge, mergeContext);
57+
}
58+
59+
@Override
60+
public MergeSpecification findForcedDeletesMerges(SegmentInfos infos, MergeContext mergeContext)
61+
throws IOException {
62+
return in.findForcedDeletesMerges(getFilteredInfos(infos), mergeContext);
63+
}
64+
65+
@Override
66+
public MergeSpecification findFullFlushMerges(
67+
MergeTrigger mergeTrigger, SegmentInfos infos, MergeContext mergeContext) throws IOException {
68+
return in.findFullFlushMerges(mergeTrigger, getFilteredInfos(infos), mergeContext);
69+
}
70+
71+
private SegmentInfos getFilteredInfos(SegmentInfos infos) {
72+
SegmentInfos infosClone = null;
73+
74+
for (SegmentCommitInfo info : infos) {
75+
if (!allowSegmentForMerge(info)) {
76+
// There are older version segments present.
77+
// We should not remove from the original SegmentInfos. Hence we clone.
78+
infosClone = infos.clone();
79+
infosClone.clear();
80+
break;
81+
}
82+
}
83+
84+
if (infosClone == null) {
85+
// All segments are latest major version and allowed to participate in merge
86+
return infos;
87+
} else {
88+
// Either mixed versions or all older version segments.
89+
// If we are here, most runs should fall in the former case.
90+
// The latter case should only happen once right after an upgrade, so we are ok with incurring
91+
// this redundant iteration for that one time to keep the logic simple
92+
for (SegmentCommitInfo info : infos) {
93+
if (allowSegmentForMerge(info)) {
94+
infosClone.add(info);
95+
}
96+
}
97+
}
98+
99+
return infosClone;
100+
}
101+
102+
/**
103+
* Determines if a SegmentCommitInfo should be part of the candidate set of segments that will be
104+
* considered for merges. By default, we only allow LATEST version segments to participate in
105+
* merges.
106+
*/
107+
protected boolean allowSegmentForMerge(SegmentCommitInfo info) {
108+
return info.info.getMinVersion() != null
109+
&& info.info.getMinVersion().major == Version.LATEST.major;
110+
}
111+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.solr.index;
18+
19+
import org.apache.lucene.index.MergePolicy;
20+
import org.apache.lucene.index.TieredMergePolicy;
21+
import org.apache.solr.core.SolrResourceLoader;
22+
import org.apache.solr.schema.IndexSchema;
23+
24+
/**
25+
* A {@link MergePolicyFactory} for {@link LatestVersionFilterMergePolicy} objects. The returned
26+
* LatestVersionFilterMergePolicy instance blocks older version segments (&lt; current version of
27+
* Lucene) from participating in merges and delegates the merging to a TieredMergePolicy instance by
28+
* default. This can be used to reindex the data and ensure all segments are the latest version
29+
* segments by the end of the reindexing. This can help prepare the index for upgrade to a later
30+
* version of Solr/Lucene even if it was initially created on a now unsupported version
31+
*/
32+
public class LatestVersionMergePolicyFactory extends SimpleMergePolicyFactory {
33+
34+
public LatestVersionMergePolicyFactory(
35+
SolrResourceLoader resourceLoader, MergePolicyFactoryArgs args, IndexSchema schema) {
36+
super(resourceLoader, args, schema);
37+
}
38+
39+
@Override
40+
protected MergePolicy getMergePolicyInstance() {
41+
return new LatestVersionFilterMergePolicy(new TieredMergePolicy());
42+
}
43+
}

0 commit comments

Comments
 (0)