Skip to content

Commit f9c19b8

Browse files
zation99facebook-github-bot
authored andcommitted
fix(analyzer): materialized view with logical view and cte
Summary: If a materialized view is a part of a logical view, the logical view's where predicate is not pushed down to materialized view so that it doesn't check the overlap correctly. It caused the comparison between mv's data and ALL base table data instead of the ones specified in the query. This diff fixes it by storing the where predicate when processing a logical view. So mv can combine the where predicate in logical view as well when getting mv status. It also fixes the issue during with using the logical view/mv in cte. Differential Revision: D87928199
1 parent d5c8633 commit f9c19b8

File tree

3 files changed

+272
-3
lines changed

3 files changed

+272
-3
lines changed

presto-analyzer/src/main/java/com/facebook/presto/sql/analyzer/Analysis.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,9 @@ public class Analysis
221221
// Keeps track of the subquery we are visiting, so we have access to base query information when processing materialized view status
222222
private Optional<QuerySpecification> currentQuerySpecification = Optional.empty();
223223

224+
// Track WHERE clause from the query accessing a view for subquery analysis such as materialized view
225+
private Optional<Expression> viewAccessorWhereClause = Optional.empty();
226+
224227
// Maps each output Field to its originating SourceColumn(s) for column-level lineage tracking.
225228
private final Multimap<Field, SourceColumn> originColumnDetails = ArrayListMultimap.create();
226229

@@ -1112,11 +1115,27 @@ public void setCurrentSubquery(QuerySpecification currentSubQuery)
11121115
{
11131116
this.currentQuerySpecification = Optional.of(currentSubQuery);
11141117
}
1118+
11151119
public Optional<QuerySpecification> getCurrentQuerySpecification()
11161120
{
11171121
return currentQuerySpecification;
11181122
}
11191123

1124+
public void setViewAccessorWhereClause(Optional<Expression> whereClause)
1125+
{
1126+
this.viewAccessorWhereClause = whereClause;
1127+
}
1128+
1129+
public void clearViewAccessorWhereClause()
1130+
{
1131+
this.viewAccessorWhereClause = Optional.empty();
1132+
}
1133+
1134+
public Optional<Expression> getViewAccessorWhereClause()
1135+
{
1136+
return viewAccessorWhereClause;
1137+
}
1138+
11201139
public void setTargetQuery(QuerySpecification targetQuery)
11211140
{
11221141
this.targetQuery = Optional.of(targetQuery);

presto-hive/src/test/java/com/facebook/presto/hive/TestHiveMaterializedViewLogicalPlanner.java

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,233 @@ protected QueryRunner createQueryRunner()
106106
Optional.empty());
107107
}
108108

109+
@Test
110+
public void testMaterializedViewPartitionFilteringThroughLogicalView()
111+
{
112+
QueryRunner queryRunner = getQueryRunner();
113+
String table = "orders_partitioned_lv_test";
114+
String materializedView = "orders_mv_lv_test";
115+
String logicalView = "orders_lv_test";
116+
117+
try {
118+
// Create a table partitioned by 'ds' (date string)
119+
queryRunner.execute(format("CREATE TABLE %s WITH (partitioned_by = ARRAY['ds']) AS " +
120+
"SELECT orderkey, totalprice, '2025-11-10' AS ds FROM orders WHERE orderkey < 1000 " +
121+
"UNION ALL " +
122+
"SELECT orderkey, totalprice, '2025-11-11' AS ds FROM orders WHERE orderkey >= 1000 AND orderkey < 2000 " +
123+
"UNION ALL " +
124+
"SELECT orderkey, totalprice, '2025-11-12' AS ds FROM orders WHERE orderkey >= 2000 AND orderkey < 3000", table));
125+
126+
// Create a materialized view partitioned by 'ds'
127+
queryRunner.execute(format("CREATE MATERIALIZED VIEW %s WITH (partitioned_by = ARRAY['ds']) AS " +
128+
"SELECT max(totalprice) as max_price, orderkey, ds FROM %s GROUP BY orderkey, ds", materializedView, table));
129+
130+
assertTrue(getQueryRunner().tableExists(getSession(), materializedView));
131+
132+
// Only refresh partition for '2025-11-10', leaving '2025-11-11' and '2025-11-12' missing
133+
assertUpdate(format("REFRESH MATERIALIZED VIEW %s WHERE ds='2025-11-10'", materializedView), 255);
134+
135+
// Create a logical view on top of the materialized view
136+
queryRunner.execute(format("CREATE VIEW %s AS SELECT * FROM %s", logicalView, materializedView));
137+
138+
setReferencedMaterializedViews((DistributedQueryRunner) queryRunner, table, ImmutableList.of(materializedView));
139+
140+
Session session = Session.builder(getQueryRunner().getDefaultSession())
141+
.setSystemProperty(CONSIDER_QUERY_FILTERS_FOR_MATERIALIZED_VIEW_PARTITIONS, "true")
142+
.setCatalogSessionProperty(HIVE_CATALOG, MATERIALIZED_VIEW_MISSING_PARTITIONS_THRESHOLD, Integer.toString(1))
143+
.build();
144+
145+
// Query the logical view with a predicate
146+
// The predicate should be pushed down to the materialized view
147+
// Since only ds='2025-11-10' is refreshed and that's what we're querying,
148+
// the materialized view should be used (not fall back to base table)
149+
String logicalViewQuery = format("SELECT max_price, orderkey FROM %s WHERE ds='2025-11-10' ORDER BY orderkey", logicalView);
150+
String directMvQuery = format("SELECT max_price, orderkey FROM %s WHERE ds='2025-11-10' ORDER BY orderkey", materializedView);
151+
String baseTableQuery = format("SELECT max(totalprice) as max_price, orderkey FROM %s " +
152+
"WHERE ds='2025-11-10' " +
153+
"GROUP BY orderkey ORDER BY orderkey", table);
154+
155+
MaterializedResult baseQueryResult = computeActual(session, baseTableQuery);
156+
MaterializedResult logicalViewResult = computeActual(session, logicalViewQuery);
157+
MaterializedResult directMvResult = computeActual(session, directMvQuery);
158+
159+
// All three queries should return the same results
160+
assertEquals(baseQueryResult, logicalViewResult);
161+
assertEquals(baseQueryResult, directMvResult);
162+
163+
// The plan for the logical view query should use the materialized view
164+
// (not fall back to base table) because we're only querying the refreshed partition
165+
assertPlan(session, logicalViewQuery, anyTree(
166+
constrainedTableScan(
167+
materializedView,
168+
ImmutableMap.of("ds", singleValue(createVarcharType(10), utf8Slice("2025-11-10"))),
169+
ImmutableMap.of())));
170+
171+
// Test query for a missing partition through logical view
172+
// This should fall back to base table because ds='2025-11-11' is not refreshed
173+
String logicalViewQueryMissing = format("SELECT max_price, orderkey FROM %s WHERE ds='2025-11-11' ORDER BY orderkey", logicalView);
174+
String baseTableQueryMissing = format("SELECT max(totalprice) as max_price, orderkey FROM %s " +
175+
"WHERE ds='2025-11-11' " +
176+
"GROUP BY orderkey ORDER BY orderkey", table);
177+
178+
MaterializedResult baseQueryResultMissing = computeActual(session, baseTableQueryMissing);
179+
MaterializedResult logicalViewResultMissing = computeActual(session, logicalViewQueryMissing);
180+
181+
assertEquals(baseQueryResultMissing, logicalViewResultMissing);
182+
183+
// Should fall back to base table for missing partition
184+
assertPlan(session, logicalViewQueryMissing, anyTree(
185+
constrainedTableScan(table, ImmutableMap.of(), ImmutableMap.of())));
186+
}
187+
finally {
188+
queryRunner.execute("DROP VIEW IF EXISTS " + logicalView);
189+
queryRunner.execute("DROP MATERIALIZED VIEW IF EXISTS " + materializedView);
190+
queryRunner.execute("DROP TABLE IF EXISTS " + table);
191+
}
192+
}
193+
194+
@Test
195+
public void testMaterializedViewPartitionFilteringThroughLogicalViewWithCTE()
196+
{
197+
QueryRunner queryRunner = getQueryRunner();
198+
String table = "orders_partitioned_cte_test";
199+
String materializedView = "orders_mv_cte_test";
200+
String logicalView = "orders_lv_cte_test";
201+
202+
try {
203+
// Create a table partitioned by 'ds' (date string)
204+
queryRunner.execute(format("CREATE TABLE %s WITH (partitioned_by = ARRAY['ds']) AS " +
205+
"SELECT orderkey, totalprice, '2025-11-10' AS ds FROM orders WHERE orderkey < 1000 " +
206+
"UNION ALL " +
207+
"SELECT orderkey, totalprice, '2025-11-11' AS ds FROM orders WHERE orderkey >= 1000 AND orderkey < 2000 " +
208+
"UNION ALL " +
209+
"SELECT orderkey, totalprice, '2025-11-12' AS ds FROM orders WHERE orderkey >= 2000 AND orderkey < 3000", table));
210+
211+
// Create a materialized view partitioned by 'ds'
212+
queryRunner.execute(format("CREATE MATERIALIZED VIEW %s WITH (partitioned_by = ARRAY['ds']) AS " +
213+
"SELECT max(totalprice) as max_price, orderkey, ds FROM %s GROUP BY orderkey, ds", materializedView, table));
214+
215+
assertTrue(getQueryRunner().tableExists(getSession(), materializedView));
216+
217+
// Only refresh partition for '2025-11-11', leaving '2025-11-10' and '2025-11-12' missing
218+
assertUpdate(format("REFRESH MATERIALIZED VIEW %s WHERE ds='2025-11-11'", materializedView), 248);
219+
220+
// Create a logical view on top of the materialized view
221+
queryRunner.execute(format("CREATE VIEW %s AS SELECT * FROM %s", logicalView, materializedView));
222+
223+
setReferencedMaterializedViews((DistributedQueryRunner) queryRunner, table, ImmutableList.of(materializedView));
224+
225+
Session session = Session.builder(getQueryRunner().getDefaultSession())
226+
.setSystemProperty(CONSIDER_QUERY_FILTERS_FOR_MATERIALIZED_VIEW_PARTITIONS, "true")
227+
.setCatalogSessionProperty(HIVE_CATALOG, MATERIALIZED_VIEW_MISSING_PARTITIONS_THRESHOLD, Integer.toString(1))
228+
.build();
229+
230+
// Query the logical view through a CTE with a predicate
231+
// The predicate should be pushed down to the materialized view
232+
String cteQuery = format("WITH PreQuery AS (SELECT * FROM %s WHERE ds='2025-11-11') " +
233+
"SELECT max_price, orderkey FROM PreQuery ORDER BY orderkey", logicalView);
234+
String baseTableQuery = format("SELECT max(totalprice) as max_price, orderkey FROM %s " +
235+
"WHERE ds='2025-11-11' " +
236+
"GROUP BY orderkey ORDER BY orderkey", table);
237+
238+
MaterializedResult baseQueryResult = computeActual(session, baseTableQuery);
239+
MaterializedResult cteQueryResult = computeActual(session, cteQuery);
240+
241+
// Both queries should return the same results
242+
assertEquals(baseQueryResult, cteQueryResult);
243+
244+
// The plan for the CTE query should use the materialized view
245+
// (not fall back to base table) because we're only querying the refreshed partition
246+
assertPlan(session, cteQuery, anyTree(
247+
constrainedTableScan(
248+
materializedView,
249+
ImmutableMap.of("ds", singleValue(createVarcharType(10), utf8Slice("2025-11-11"))),
250+
ImmutableMap.of())));
251+
}
252+
finally {
253+
queryRunner.execute("DROP VIEW IF EXISTS " + logicalView);
254+
queryRunner.execute("DROP MATERIALIZED VIEW IF EXISTS " + materializedView);
255+
queryRunner.execute("DROP TABLE IF EXISTS " + table);
256+
}
257+
}
258+
259+
@Test
260+
public void testMaterializedViewPartitionFilteringInCTE()
261+
{
262+
QueryRunner queryRunner = getQueryRunner();
263+
String table = "orders_partitioned_mv_cte_test";
264+
String materializedView = "orders_mv_direct_cte_test";
265+
266+
try {
267+
// Create a table partitioned by 'ds' (date string)
268+
queryRunner.execute(format("CREATE TABLE %s WITH (partitioned_by = ARRAY['ds']) AS " +
269+
"SELECT orderkey, totalprice, '2025-11-10' AS ds FROM orders WHERE orderkey < 1000 " +
270+
"UNION ALL " +
271+
"SELECT orderkey, totalprice, '2025-11-11' AS ds FROM orders WHERE orderkey >= 1000 AND orderkey < 2000 " +
272+
"UNION ALL " +
273+
"SELECT orderkey, totalprice, '2025-11-12' AS ds FROM orders WHERE orderkey >= 2000 AND orderkey < 3000", table));
274+
275+
// Create a materialized view partitioned by 'ds'
276+
queryRunner.execute(format("CREATE MATERIALIZED VIEW %s WITH (partitioned_by = ARRAY['ds']) AS " +
277+
"SELECT max(totalprice) as max_price, orderkey, ds FROM %s GROUP BY orderkey, ds", materializedView, table));
278+
279+
assertTrue(getQueryRunner().tableExists(getSession(), materializedView));
280+
281+
// Only refresh partition for '2025-11-10', leaving '2025-11-11' and '2025-11-12' missing
282+
assertUpdate(format("REFRESH MATERIALIZED VIEW %s WHERE ds='2025-11-10'", materializedView), 255);
283+
284+
setReferencedMaterializedViews((DistributedQueryRunner) queryRunner, table, ImmutableList.of(materializedView));
285+
286+
Session session = Session.builder(getQueryRunner().getDefaultSession())
287+
.setSystemProperty(CONSIDER_QUERY_FILTERS_FOR_MATERIALIZED_VIEW_PARTITIONS, "true")
288+
.setCatalogSessionProperty(HIVE_CATALOG, MATERIALIZED_VIEW_MISSING_PARTITIONS_THRESHOLD, Integer.toString(1))
289+
.build();
290+
291+
// Query the materialized view directly through a CTE with a predicate
292+
// The predicate should be used to determine which partitions are needed
293+
String cteQuery = format("WITH PreQuery AS (SELECT * FROM %s WHERE ds='2025-11-10') " +
294+
"SELECT max_price, orderkey FROM PreQuery ORDER BY orderkey", materializedView);
295+
String baseTableQuery = format("SELECT max(totalprice) as max_price, orderkey FROM %s " +
296+
"WHERE ds='2025-11-10' " +
297+
"GROUP BY orderkey ORDER BY orderkey", table);
298+
299+
MaterializedResult baseQueryResult = computeActual(session, baseTableQuery);
300+
MaterializedResult cteQueryResult = computeActual(session, cteQuery);
301+
302+
// Both queries should return the same results
303+
assertEquals(baseQueryResult, cteQueryResult);
304+
305+
// The plan for the CTE query should use the materialized view
306+
// (not fall back to base table) because we're only querying the refreshed partition
307+
assertPlan(session, cteQuery, anyTree(
308+
constrainedTableScan(
309+
materializedView,
310+
ImmutableMap.of("ds", singleValue(createVarcharType(10), utf8Slice("2025-11-10"))),
311+
ImmutableMap.of())));
312+
313+
// Test query for a missing partition through CTE
314+
// This should fall back to base table because ds='2025-11-11' is not refreshed
315+
String cteQueryMissing = format("WITH PreQuery AS (SELECT * FROM %s WHERE ds='2025-11-11') " +
316+
"SELECT max_price, orderkey FROM PreQuery ORDER BY orderkey", materializedView);
317+
String baseTableQueryMissing = format("SELECT max(totalprice) as max_price, orderkey FROM %s " +
318+
"WHERE ds='2025-11-11' " +
319+
"GROUP BY orderkey ORDER BY orderkey", table);
320+
321+
MaterializedResult baseQueryResultMissing = computeActual(session, baseTableQueryMissing);
322+
MaterializedResult cteQueryResultMissing = computeActual(session, cteQueryMissing);
323+
324+
assertEquals(baseQueryResultMissing, cteQueryResultMissing);
325+
326+
// Should fall back to base table for missing partition
327+
assertPlan(session, cteQueryMissing, anyTree(
328+
constrainedTableScan(table, ImmutableMap.of(), ImmutableMap.of())));
329+
}
330+
finally {
331+
queryRunner.execute("DROP MATERIALIZED VIEW IF EXISTS " + materializedView);
332+
queryRunner.execute("DROP TABLE IF EXISTS " + table);
333+
}
334+
}
335+
109336
@Test
110337
public void testMaterializedViewOptimization()
111338
{

presto-main-base/src/main/java/com/facebook/presto/sql/analyzer/StatementAnalyzer.java

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2372,12 +2372,26 @@ private Scope processView(Table table, Optional<Scope> scope, QualifiedObjectNam
23722372

23732373
analysis.getAccessControlReferences().addViewDefinitionReference(name, view);
23742374

2375+
Optional<Expression> savedViewAccessorWhereClause = Optional.empty();
2376+
if (analysis.getCurrentQuerySpecification().isPresent()) {
2377+
QuerySpecification currentQuerySpec = analysis.getCurrentQuerySpecification().get();
2378+
savedViewAccessorWhereClause = currentQuerySpec.getWhere();
2379+
if (savedViewAccessorWhereClause.isPresent()) {
2380+
analysis.setViewAccessorWhereClause(savedViewAccessorWhereClause);
2381+
}
2382+
}
2383+
23752384
Query query = parseView(view.getOriginalSql(), name, table);
23762385

23772386
analysis.registerNamedQuery(table, query, true);
23782387
analysis.registerTableForView(table);
23792388
RelationType descriptor = analyzeView(query, name, view.getCatalog(), view.getSchema(), view.getOwner(), table);
23802389
analysis.unregisterTableForView();
2390+
2391+
if (savedViewAccessorWhereClause.isPresent()) {
2392+
analysis.clearViewAccessorWhereClause();
2393+
}
2394+
23812395
if (isViewStale(view.getColumns(), descriptor.getVisibleFields())) {
23822396
throw new SemanticException(VIEW_IS_STALE, table, "View '%s' is stale; it must be re-created", name);
23832397
}
@@ -2566,15 +2580,24 @@ private MaterializedViewStatus getMaterializedViewStatus(QualifiedObjectName mat
25662580
checkArgument(analysis.getCurrentQuerySpecification().isPresent(), "Current subquery should be set when processing materialized view");
25672581
QuerySpecification currentSubquery = analysis.getCurrentQuerySpecification().get();
25682582

2569-
if (currentSubquery.getWhere().isPresent() && isMaterializedViewPartitionFilteringEnabled(session)) {
2583+
// Collect where clause from both current subquery and possible logical view
2584+
List<Expression> wherePredicates = new ArrayList<>();
2585+
if (currentSubquery.getWhere().isPresent()) {
2586+
wherePredicates.add(currentSubquery.getWhere().get());
2587+
}
2588+
if (analysis.getViewAccessorWhereClause().isPresent()) {
2589+
wherePredicates.add(analysis.getViewAccessorWhereClause().get());
2590+
}
2591+
2592+
if (!wherePredicates.isEmpty() && isMaterializedViewPartitionFilteringEnabled(session)) {
25702593
Optional<MaterializedViewDefinition> materializedViewDefinition = getMaterializedViewDefinition(session, metadataResolver, analysis.getMetadataHandle(), materializedViewName);
25712594
if (!materializedViewDefinition.isPresent()) {
25722595
log.warn("Materialized view definition not present as expected when fetching materialized view status");
25732596
return metadataResolver.getMaterializedViewStatus(materializedViewName, baseQueryDomain);
25742597
}
25752598

25762599
Scope sourceScope = getScopeFromTable(table, scope);
2577-
Expression viewQueryWhereClause = currentSubquery.getWhere().get();
2600+
Expression combinedWhereClause = ExpressionUtils.combineConjuncts(wherePredicates);
25782601

25792602
// Extract column names from materialized view scope
25802603
Set<QualifiedName> materializedViewColumns = sourceScope.getRelationType().getAllFields().stream()
@@ -2585,7 +2608,7 @@ private MaterializedViewStatus getMaterializedViewStatus(QualifiedObjectName mat
25852608
.collect(Collectors.toSet());
25862609

25872610
// Only proceed with partition filtering if there are conjuncts that reference MV columns
2588-
List<Expression> conjuncts = ExpressionUtils.extractConjuncts(viewQueryWhereClause);
2611+
List<Expression> conjuncts = ExpressionUtils.extractConjuncts(combinedWhereClause);
25892612
List<Expression> mvConjuncts = conjuncts.stream()
25902613
.filter(conjunct -> {
25912614
Set<QualifiedName> referencedColumns = VariablesExtractor.extractNames(conjunct, analysis.getColumnReferences());

0 commit comments

Comments
 (0)