Skip to content

[opt](nereids) catch all exceptions in StatsCalculator #49415

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,15 @@
import org.apache.doris.nereids.trees.expressions.literal.Literal;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.DataType;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.ColumnStatisticBuilder;
import org.apache.doris.statistics.Statistics;

import com.google.common.base.Preconditions;
import org.apache.commons.collections.CollectionUtils;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

import java.time.Instant;
import java.time.LocalDate;
Expand All @@ -112,7 +115,7 @@
* Used to estimate for expressions that not producing boolean value.
*/
public class ExpressionEstimation extends ExpressionVisitor<ColumnStatistic, Statistics> {

public static final Logger LOG = LogManager.getLogger(ExpressionEstimation.class);
public static final long DAYS_FROM_0_TO_1970 = 719528;
public static final long DAYS_FROM_0_TO_9999 = 3652424;
private static final ExpressionEstimation INSTANCE = new ExpressionEstimation();
Expand All @@ -121,11 +124,20 @@ public class ExpressionEstimation extends ExpressionVisitor<ColumnStatistic, Sta
* returned columnStat is newly created or a copy of stats
*/
public static ColumnStatistic estimate(Expression expression, Statistics stats) {
ColumnStatistic columnStatistic = expression.accept(INSTANCE, stats);
if (columnStatistic == null) {
return ColumnStatistic.UNKNOWN;
try {
ColumnStatistic columnStatistic = expression.accept(INSTANCE, stats);
if (columnStatistic == null) {
return ColumnStatistic.createUnknownByDataType(expression.getDataType());
}
return columnStatistic;
} catch (Exception e) {
// in regression test, feDebug is true so that the exception is thrown in order to detect problems.
if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().feDebug) {
throw e;
}
LOG.warn("ExpressionEstimation failed : " + expression, e);
return ColumnStatistic.createUnknownByDataType(expression.getDataType());
}
return columnStatistic;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -301,13 +301,33 @@ public static void estimate(GroupExpression groupExpression, CascadesContext con

private void estimate() {
Plan plan = groupExpression.getPlan();
Statistics newStats = plan.accept(this, null);
Statistics newStats;
try {
newStats = plan.accept(this, null);
} catch (Exception e) {
// throw exception in debug mode
if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().feDebug) {
throw e;
}
LOG.warn("stats calculation failed, plan " + plan.toString(), e);
// use unknown stats or the first child's stats
if (plan.children().isEmpty() || !(plan.child(0) instanceof GroupPlan)) {
Map<Expression, ColumnStatistic> columnStatisticMap = new HashMap<>();
for (Slot slot : plan.getOutput()) {
columnStatisticMap.put(slot, ColumnStatistic.createUnknownByDataType(slot.getDataType()));
}
newStats = new Statistics(1, 1, columnStatisticMap);
} else {
newStats = ((GroupPlan) plan.child(0)).getStats();
}
}
newStats.normalizeColumnStatistics();

// We ensure that the rowCount remains unchanged in order to make the cost of each plan comparable.
final Statistics tmpStats = newStats;
if (groupExpression.getOwnerGroup().getStatistics() == null) {
boolean isReliable = groupExpression.getPlan().getExpressions().stream()
.noneMatch(e -> newStats.isInputSlotsUnknown(e.getInputSlots()));
.noneMatch(e -> tmpStats.isInputSlotsUnknown(e.getInputSlots()));
groupExpression.getOwnerGroup().setStatsReliable(isReliable);
groupExpression.getOwnerGroup().setStatistics(newStats);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ public List<? extends Expression> getExpressions() {

@Override
public Statistics getStats() {
throw new IllegalStateException("GroupPlan can not invoke getStats()");
return group.getStatistics();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2271,6 +2271,12 @@ public void setDetailShapePlanNodes(String detailShapePlanNodes) {
"use other health replica when the use_fix_replica meet error" })
public boolean fallbackOtherReplicaWhenFixedCorrupt = false;

public static final String FE_DEBUG = "fe_debug";
@VariableMgr.VarAttr(name = FE_DEBUG, needForward = true, fuzzy = true,
description = {"when set true, FE will throw exceptions instead swallow them. This is used for test",
"when set true, FE will throw exceptions instead swallow them. This is used for test"})
public boolean feDebug = false;

@VariableMgr.VarAttr(name = SHOW_ALL_FE_CONNECTION,
description = {"when it's true show processlist statement list all fe's connection",
"当变量为true时,show processlist命令展示所有fe的连接"})
Expand Down Expand Up @@ -2505,6 +2511,7 @@ public boolean isEnableESParallelScroll() {
@SuppressWarnings("checkstyle:Indentation")
public void initFuzzyModeVariables() {
Random random = new SecureRandom();
this.feDebug = true;
this.parallelPipelineTaskNum = random.nextInt(8);
this.parallelPrepareThreshold = random.nextInt(32) + 1;
this.enableCommonExprPushdown = random.nextBoolean();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.nereids.types.DataType;
import org.apache.doris.nereids.types.coercion.CharacterType;
import org.apache.doris.persist.gson.GsonUtils;
import org.apache.doris.statistics.util.StatisticsUtil;

Expand Down Expand Up @@ -369,4 +371,28 @@ public boolean isUnKnown() {
public ColumnStatistic withAvgSizeByte(double avgSizeByte) {
return new ColumnStatisticBuilder(this).setAvgSizeByte(avgSizeByte).build();
}

public static ColumnStatistic createUnknownByDataType(DataType dataType) {
if (dataType instanceof CharacterType) {
return new ColumnStatisticBuilder(1)
.setAvgSizeByte(Math.max(1, Math.min(dataType.width(), CharacterType.DEFAULT_WIDTH)))
.setNdv(1)
.setNumNulls(1)
.setMaxValue(Double.POSITIVE_INFINITY)
.setMinValue(Double.NEGATIVE_INFINITY)
.setIsUnknown(true)
.setUpdatedTime("")
.build();
} else {
return new ColumnStatisticBuilder(1)
.setAvgSizeByte(dataType.width())
.setNdv(1)
.setNumNulls(1)
.setMaxValue(Double.POSITIVE_INFINITY)
.setMinValue(Double.NEGATIVE_INFINITY)
.setIsUnknown(true)
.setUpdatedTime("")
.build();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -447,4 +447,13 @@ public void testLiteral() {
Assertions.assertEquals(est.avgSizeByte, 1);
Assertions.assertEquals(est.numNulls, 1);
}

@Test
public void testThrowException() {
SlotReference a = new SlotReference("a", StringType.INSTANCE);
Cast cast = new Cast(a, DateType.INSTANCE);
// do not throw any exception
ColumnStatistic est = ExpressionEstimation.estimate(cast, null);
Assertions.assertTrue(est.isUnKnown());
}
}
Loading