Skip to content

Commit b1b129f

Browse files
authored
Query plan caching (Azure#17965)
* This PR adds some optimizations to query execution time by caching query plan where possible. * Removing LRUCache and switching to ConcurrentMap. * Queryplan caching can now be enabled using a environment paramater * Removing public API related code which was added earlier
1 parent 45f0680 commit b1b129f

File tree

7 files changed

+284
-55
lines changed

7 files changed

+284
-55
lines changed

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import com.azure.cosmos.implementation.caches.RxClientCollectionCache;
1313
import com.azure.cosmos.implementation.caches.RxPartitionKeyRangeCache;
1414
import com.azure.cosmos.implementation.clientTelemetry.ClientTelemetry;
15+
import com.azure.cosmos.implementation.query.PartitionedQueryExecutionInfo;
1516
import com.azure.cosmos.models.CosmosItemIdentity;
1617
import com.azure.cosmos.models.CosmosQueryRequestOptions;
1718
import com.azure.cosmos.models.FeedRange;
@@ -24,6 +25,8 @@
2425
import java.net.URI;
2526
import java.net.URISyntaxException;
2627
import java.util.List;
28+
import java.util.concurrent.ConcurrentMap;
29+
2730
/**
2831
* Provides a client-side logical representation of the Azure Cosmos DB
2932
* database service. This async client is used to configure and execute requests
@@ -1475,6 +1478,8 @@ Flux<FeedResponse<Document>> readAllDocuments(
14751478
CosmosQueryRequestOptions options
14761479
);
14771480

1481+
ConcurrentMap<String, PartitionedQueryExecutionInfo> getQueryPlanCache();
1482+
14781483
/**
14791484
* Gets the collection cache.
14801485
*

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ public class Configs {
4343
private static final String ADDRESS_REFRESH_RESPONSE_TIMEOUT_IN_SECONDS = "COSMOS.ADDRESS_REFRESH_RESPONSE_TIMEOUT_IN_SECONDS";
4444
private static final String CLIENT_TELEMETRY_ENABLED = "COSMOS.CLIENT_TELEMETRY_ENABLED";
4545
private static final String CLIENT_TELEMETRY_SCHEDULING_IN_SECONDS = "COSMOS.CLIENT_TELEMETRY_SCHEDULING_IN_SECONDS";
46+
private static final String QUERYPLAN_CACHING_ENABLED = "COSMOS.QUERYPLAN_CACHING_ENABLED";
4647

4748
private static final int DEFAULT_CLIENT_TELEMETRY_SCHEDULING_IN_SECONDS = 10 * 60;
4849
private static final int DEFAULT_UNAVAILABLE_LOCATIONS_EXPIRATION_TIME_IN_SECONDS = 5 * 60;
@@ -214,6 +215,11 @@ public static boolean isClientTelemetryEnabled(boolean defaultValue) {
214215
return getJVMConfigAsBoolean(CLIENT_TELEMETRY_ENABLED, defaultValue);
215216
}
216217

218+
public static boolean isQueryPlanCachingEnabled() {
219+
// Queryplan caching will be disabled by default
220+
return getJVMConfigAsBoolean(QUERYPLAN_CACHING_ENABLED, false);
221+
}
222+
217223
public static int getAddressRefreshResponseTimeoutInSeconds() {
218224
return getJVMConfigAsInt(ADDRESS_REFRESH_RESPONSE_TIMEOUT_IN_SECONDS, DEFAULT_ADDRESS_REFRESH_RESPONSE_TIMEOUT_IN_SECONDS);
219225
}

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import com.azure.cosmos.implementation.query.IDocumentQueryClient;
4141
import com.azure.cosmos.implementation.query.IDocumentQueryExecutionContext;
4242
import com.azure.cosmos.implementation.query.Paginator;
43+
import com.azure.cosmos.implementation.query.PartitionedQueryExecutionInfo;
4344
import com.azure.cosmos.implementation.query.PipelinedDocumentQueryExecutionContext;
4445
import com.azure.cosmos.implementation.query.QueryInfo;
4546
import com.azure.cosmos.implementation.routing.CollectionRoutingMap;
@@ -138,6 +139,7 @@ public class RxDocumentClientImpl implements AsyncDocumentClient, IAuthorization
138139
private RxPartitionKeyRangeCache partitionKeyRangeCache;
139140
private Map<String, List<PartitionKeyAndResourceTokenPair>> resourceTokensMap;
140141
private final boolean contentResponseOnWriteEnabled;
142+
private ConcurrentMap<String, PartitionedQueryExecutionInfo> queryPlanCache;
141143

142144
private final AtomicBoolean closed = new AtomicBoolean(false);
143145
private final int clientId;
@@ -343,6 +345,7 @@ private RxDocumentClientImpl(URI serviceEndpoint,
343345
this.retryPolicy = new RetryPolicy(this, this.globalEndpointManager, this.connectionPolicy);
344346
this.resetSessionTokenRetryPolicy = retryPolicy;
345347
CpuMemoryMonitor.register(this);
348+
this.queryPlanCache = new ConcurrentHashMap<>();
346349
} catch (RuntimeException e) {
347350
logger.error("unexpected failure in initializing client.", e);
348351
close();
@@ -408,6 +411,7 @@ public void init() {
408411
connectionPolicy.getConnectionMode(), globalEndpointManager.getLatestDatabaseAccount().getId(),
409412
null, null, httpClient(), connectionPolicy.isClientTelemetryEnabled());
410413
clientTelemetry.init();
414+
this.queryPlanCache = new ConcurrentHashMap<>();
411415
} catch (Exception e) {
412416
logger.error("unexpected failure in initializing client.", e);
413417
close();
@@ -729,7 +733,10 @@ private <T extends Resource> Flux<FeedResponse<T>> createQueryInternal(
729733
UUID activityId) {
730734

731735
Flux<? extends IDocumentQueryExecutionContext<T>> executionContext =
732-
DocumentQueryExecutionContextFactory.createDocumentQueryExecutionContextAsync(this, queryClient, resourceTypeEnum, klass, sqlQuery , options, resourceLink, false, activityId);
736+
DocumentQueryExecutionContextFactory
737+
.createDocumentQueryExecutionContextAsync(this, queryClient, resourceTypeEnum, klass, sqlQuery,
738+
options, resourceLink, false, activityId,
739+
Configs.isQueryPlanCachingEnabled(), queryPlanCache);
733740

734741
AtomicBoolean isFirstResponse = new AtomicBoolean(true);
735742
return executionContext.flatMap(iDocumentQueryExecutionContext -> {
@@ -2283,6 +2290,11 @@ public Flux<FeedResponse<Document>> readAllDocuments(
22832290
});
22842291
}
22852292

2293+
@Override
2294+
public ConcurrentMap<String, PartitionedQueryExecutionInfo> getQueryPlanCache() {
2295+
return queryPlanCache;
2296+
}
2297+
22862298
@Override
22872299
public Flux<FeedResponse<PartitionKeyRange>> readPartitionKeyRanges(final String collectionLink,
22882300
CosmosQueryRequestOptions options) {

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextFactory.java

Lines changed: 109 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,18 @@
2121
import com.azure.cosmos.implementation.routing.Range;
2222
import com.azure.cosmos.models.ModelBridgeInternal;
2323
import com.azure.cosmos.implementation.apachecommons.lang.StringUtils;
24+
import org.slf4j.Logger;
25+
import org.slf4j.LoggerFactory;
2426
import reactor.core.publisher.Flux;
2527
import reactor.core.publisher.Mono;
2628

2729
import java.time.Instant;
2830
import java.util.Collections;
31+
import java.util.ConcurrentModificationException;
2932
import java.util.List;
3033
import java.util.Map;
3134
import java.util.UUID;
35+
import java.util.concurrent.ConcurrentMap;
3236

3337
/**
3438
* While this class is public, but it is not part of our published public APIs.
@@ -37,7 +41,9 @@
3741
public class DocumentQueryExecutionContextFactory {
3842

3943
private final static int PageSizeFactorForTop = 5;
40-
44+
private static final Logger logger = LoggerFactory.getLogger(DocumentQueryExecutionContextFactory.class);
45+
// Limiting cache size to 1000 for now. Can be updated in future based on need
46+
private static final int MAX_CACHE_SIZE = 1000;
4147
private static Mono<Utils.ValueHolder<DocumentCollection>> resolveCollection(DiagnosticsClientContext diagnosticsClientContext,
4248
IDocumentQueryClient client,
4349
ResourceType resourceTypeEnum,
@@ -61,7 +67,8 @@ private static <T extends Resource> Mono<Pair<List<PartitionKeyRange>,QueryInfo>
6167
CosmosQueryRequestOptions cosmosQueryRequestOptions,
6268
String resourceLink,
6369
DocumentCollection collection,
64-
DefaultDocumentQueryExecutionContext<T> queryExecutionContext) {
70+
DefaultDocumentQueryExecutionContext<T> queryExecutionContext, boolean queryPlanCachingEnabled,
71+
ConcurrentMap<String, PartitionedQueryExecutionInfo> queryPlanCache) {
6572

6673
// The partitionKeyRangeIdInternal is no more a public API on
6774
// FeedOptions, but have the below condition
@@ -78,49 +85,108 @@ private static <T extends Resource> Mono<Pair<List<PartitionKeyRange>,QueryInfo>
7885
}
7986

8087
Instant startTime = Instant.now();
81-
Mono<PartitionedQueryExecutionInfo> queryExecutionInfoMono =
82-
QueryPlanRetriever
83-
.getQueryPlanThroughGatewayAsync(diagnosticsClientContext, client, query, resourceLink);
88+
Mono<PartitionedQueryExecutionInfo> queryExecutionInfoMono;
89+
if (queryPlanCachingEnabled &&
90+
isScopedToSinglePartition(cosmosQueryRequestOptions) &&
91+
queryPlanCache.containsKey(query.getQueryText())) {
92+
Instant endTime = Instant.now(); // endTime for query plan diagnostics
93+
PartitionedQueryExecutionInfo partitionedQueryExecutionInfo = queryPlanCache.get(query.getQueryText());
94+
if (partitionedQueryExecutionInfo != null) {
95+
return getTargetRangesFromQueryPlan(cosmosQueryRequestOptions, collection, queryExecutionContext,
96+
partitionedQueryExecutionInfo, startTime, endTime);
97+
}
98+
}
99+
100+
queryExecutionInfoMono = QueryPlanRetriever
101+
.getQueryPlanThroughGatewayAsync(diagnosticsClientContext, client, query,
102+
resourceLink);
84103

85104
return queryExecutionInfoMono.flatMap(
86105
partitionedQueryExecutionInfo -> {
87106

88107
Instant endTime = Instant.now();
89-
QueryInfo queryInfo =
90-
partitionedQueryExecutionInfo.getQueryInfo();
91-
queryInfo.setQueryPlanDiagnosticsContext(new QueryInfo.QueryPlanDiagnosticsContext(startTime, endTime));
92-
93-
List<Range<String>> queryRanges =
94-
partitionedQueryExecutionInfo.getQueryRanges();
95-
96-
if (cosmosQueryRequestOptions != null
97-
&& cosmosQueryRequestOptions.getPartitionKey() != null
98-
&& cosmosQueryRequestOptions.getPartitionKey() != PartitionKey.NONE) {
99-
PartitionKeyInternal internalPartitionKey =
100-
BridgeInternal.getPartitionKeyInternal(cosmosQueryRequestOptions.getPartitionKey());
101-
Range<String> range = Range
102-
.getPointRange(internalPartitionKey
103-
.getEffectivePartitionKeyString(internalPartitionKey, collection.getPartitionKey()));
104-
queryRanges = Collections.singletonList(range);
108+
109+
if (queryPlanCachingEnabled) {
110+
tryCacheQueryPlan(query, partitionedQueryExecutionInfo, queryPlanCache);
105111
}
106-
return
107-
queryExecutionContext.getTargetPartitionKeyRanges(collection.getResourceId(), queryRanges)
108-
.map(pkRanges -> Pair.of(
109-
pkRanges,
110-
partitionedQueryExecutionInfo.getQueryInfo()));
112+
113+
return getTargetRangesFromQueryPlan(cosmosQueryRequestOptions, collection, queryExecutionContext,
114+
partitionedQueryExecutionInfo, startTime, endTime);
111115
});
112116
}
113117

118+
private static <T extends Resource> Mono<Pair<List<PartitionKeyRange>, QueryInfo>> getTargetRangesFromQueryPlan(
119+
CosmosQueryRequestOptions cosmosQueryRequestOptions, DocumentCollection collection,
120+
DefaultDocumentQueryExecutionContext<T> queryExecutionContext,
121+
PartitionedQueryExecutionInfo partitionedQueryExecutionInfo, Instant planFetchStartTime,
122+
Instant planFetchEndTime) {
123+
QueryInfo queryInfo =
124+
partitionedQueryExecutionInfo.getQueryInfo();
125+
queryInfo.setQueryPlanDiagnosticsContext(new QueryInfo.QueryPlanDiagnosticsContext(planFetchStartTime,
126+
planFetchEndTime));
127+
List<Range<String>> queryRanges =
128+
partitionedQueryExecutionInfo.getQueryRanges();
129+
130+
if (isScopedToSinglePartition(cosmosQueryRequestOptions)) {
131+
PartitionKeyInternal internalPartitionKey =
132+
BridgeInternal.getPartitionKeyInternal(cosmosQueryRequestOptions.getPartitionKey());
133+
Range<String> range = Range
134+
.getPointRange(internalPartitionKey
135+
.getEffectivePartitionKeyString(internalPartitionKey,
136+
collection
137+
.getPartitionKey()));
138+
queryRanges = Collections.singletonList(range);
139+
}
140+
return
141+
queryExecutionContext.getTargetPartitionKeyRanges(collection.getResourceId(), queryRanges)
142+
.map(pkRanges -> Pair.of(
143+
pkRanges,
144+
partitionedQueryExecutionInfo.getQueryInfo()));
145+
}
146+
147+
private static void tryCacheQueryPlan(
148+
SqlQuerySpec query,
149+
PartitionedQueryExecutionInfo partitionedQueryExecutionInfo,
150+
ConcurrentMap<String, PartitionedQueryExecutionInfo> queryPlanCache) {
151+
QueryInfo queryInfo = partitionedQueryExecutionInfo.getQueryInfo();
152+
if (canCacheQuery(queryInfo) && !queryPlanCache.containsKey(query.getQueryText())) {
153+
if (queryPlanCache.size() > MAX_CACHE_SIZE) {
154+
// Clearing query plan cache if size is above max size. This can be optimized in future by using
155+
// a threadsafe LRU cache
156+
queryPlanCache.clear();
157+
}
158+
queryPlanCache.put(query.getQueryText(), partitionedQueryExecutionInfo);
159+
}
160+
}
161+
162+
private static boolean canCacheQuery(QueryInfo queryInfo) {
163+
// Query plan will not be cached for the types below
164+
return !queryInfo.hasAggregates()
165+
&& !queryInfo.hasDistinct()
166+
&& !queryInfo.hasGroupBy()
167+
&& !queryInfo.hasLimit()
168+
&& !queryInfo.hasTop()
169+
&& !queryInfo.hasOffset();
170+
}
171+
172+
private static boolean isScopedToSinglePartition(CosmosQueryRequestOptions cosmosQueryRequestOptions) {
173+
return cosmosQueryRequestOptions != null
174+
&& cosmosQueryRequestOptions.getPartitionKey() != null
175+
&& cosmosQueryRequestOptions.getPartitionKey() != PartitionKey.NONE;
176+
}
177+
114178
public static <T extends Resource> Flux<? extends IDocumentQueryExecutionContext<T>> createDocumentQueryExecutionContextAsync(
115-
DiagnosticsClientContext diagnosticsClientContext,
116-
IDocumentQueryClient client,
117-
ResourceType resourceTypeEnum,
118-
Class<T> resourceType,
119-
SqlQuerySpec query,
120-
CosmosQueryRequestOptions cosmosQueryRequestOptions,
121-
String resourceLink,
122-
boolean isContinuationExpected,
123-
UUID correlatedActivityId) {
179+
DiagnosticsClientContext diagnosticsClientContext,
180+
IDocumentQueryClient client,
181+
ResourceType resourceTypeEnum,
182+
Class<T> resourceType,
183+
SqlQuerySpec query,
184+
CosmosQueryRequestOptions cosmosQueryRequestOptions,
185+
String resourceLink,
186+
boolean isContinuationExpected,
187+
UUID correlatedActivityId,
188+
boolean queryPlanCachingEnabled,
189+
ConcurrentMap<String, PartitionedQueryExecutionInfo> queryPlanCache) {
124190

125191
// return proxy
126192
Flux<Utils.ValueHolder<DocumentCollection>> collectionObs = Flux.just(new Utils.ValueHolder<>(null));
@@ -146,12 +212,14 @@ public static <T extends Resource> Flux<? extends IDocumentQueryExecutionContext
146212

147213
return collectionObs.single().flatMap(collectionValueHolder -> {
148214
Mono<Pair<List<PartitionKeyRange>, QueryInfo>> queryPlanTask = getPartitionKeyRangesAndQueryInfo(diagnosticsClientContext,
149-
client,
150-
query,
151-
cosmosQueryRequestOptions,
152-
resourceLink,
153-
collectionValueHolder.v,
154-
queryExecutionContext);
215+
client,
216+
query,
217+
cosmosQueryRequestOptions,
218+
resourceLink,
219+
collectionValueHolder.v,
220+
queryExecutionContext,
221+
queryPlanCachingEnabled,
222+
queryPlanCache);
155223

156224
return queryPlanTask
157225
.flatMap(queryPlan -> createSpecializedDocumentQueryExecutionContextAsync(diagnosticsClientContext,

sdk/cosmos/azure-cosmos/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ public RxDocumentClientUnderTest(URI serviceEndpoint,
3838
Configs configs,
3939
AzureKeyCredential credential,
4040
boolean contentResponseOnWriteEnabled) {
41-
super(serviceEndpoint, masterKey, connectionPolicy, consistencyLevel, configs, credential, null, false, false, contentResponseOnWriteEnabled);
41+
super(serviceEndpoint, masterKey, connectionPolicy, consistencyLevel, configs, credential, null, false,
42+
false, contentResponseOnWriteEnabled);
4243
init();
4344
}
4445

0 commit comments

Comments
 (0)