-
Notifications
You must be signed in to change notification settings - Fork 14.8k
KAFKA-19763: Parallel remote reads cause memory leak in broker #20654
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1c6f2f1
fe28d13
b545caf
a5c8747
ca1091d
c5c7eec
2fc615b
0ee2bf2
4c8819d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -251,9 +251,12 @@ class ReplicaManager(val config: KafkaConfig, | |
| new DelayedOperationPurgatory[DelayedDeleteRecords]( | ||
| "DeleteRecords", config.brokerId, | ||
| config.deleteRecordsPurgatoryPurgeIntervalRequests)) | ||
| // delayedRemoteFetchPurgatory purgeInterval is set to 0 to release the references of completed DelayedRemoteFetch | ||
| // instances immediately for GC. The DelayedRemoteFetch instance internally holds the RemoteLogReadResult that can be | ||
| // up to the size of `fetch.max.bytes` which defaults to 50 MB. | ||
| val delayedRemoteFetchPurgatory = delayedRemoteFetchPurgatoryParam.getOrElse( | ||
| new DelayedOperationPurgatory[DelayedRemoteFetch]( | ||
| "RemoteFetch", config.brokerId)) | ||
| "RemoteFetch", config.brokerId, 0)) | ||
| val delayedRemoteListOffsetsPurgatory = delayedRemoteListOffsetsPurgatoryParam.getOrElse( | ||
| new DelayedOperationPurgatory[DelayedRemoteListOffsets]( | ||
| "RemoteListOffsets", config.brokerId)) | ||
|
|
@@ -1637,7 +1640,7 @@ class ReplicaManager(val config: KafkaConfig, | |
| params: FetchParams, | ||
| responseCallback: Seq[(TopicIdPartition, FetchPartitionData)] => Unit, | ||
| logReadResults: Seq[(TopicIdPartition, LogReadResult)], | ||
| remoteFetchPartitionStatus: Seq[(TopicIdPartition, FetchPartitionStatus)]): Unit = { | ||
| fetchPartitionStatus: Seq[(TopicIdPartition, FetchPartitionStatus)]): Unit = { | ||
| val remoteFetchTasks = new util.HashMap[TopicIdPartition, Future[Void]] | ||
| val remoteFetchResults = new util.HashMap[TopicIdPartition, CompletableFuture[RemoteLogReadResult]] | ||
|
|
||
|
|
@@ -1649,10 +1652,10 @@ class ReplicaManager(val config: KafkaConfig, | |
|
|
||
| val remoteFetchMaxWaitMs = config.remoteLogManagerConfig.remoteFetchMaxWaitMs().toLong | ||
| val remoteFetch = new DelayedRemoteFetch(remoteFetchTasks, remoteFetchResults, remoteFetchInfos, remoteFetchMaxWaitMs, | ||
| remoteFetchPartitionStatus, params, logReadResults, this, responseCallback) | ||
| fetchPartitionStatus, params, logReadResults, this, responseCallback) | ||
|
|
||
| // create a list of (topic, partition) pairs to use as keys for this delayed fetch operation | ||
| val delayedFetchKeys = remoteFetchPartitionStatus.map { case (tp, _) => new TopicPartitionOperationKey(tp) }.toList | ||
kamalcph marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| val delayedFetchKeys = remoteFetchTasks.asScala.map { case (tp, _) => new TopicPartitionOperationKey(tp) }.toList | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, I don't get the fix. I'm really surprised this fixes the memory leak. Could you explain why it leaks memory here? I thought we only use the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the review!
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the explanation! Yes, you're right. If we have purgeInterval = 1000, and each
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we go with the solution to reduce the purgeInterval from 1000 to 10. The worst case, we will keep up to 10 remote fetched records in memory, and each fetched total record size is bounded by Another solution I came out, is that we added a WDYT?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
We can add The new attribute
The purgeInterval can be reduced to 0. The
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reaper thread is invoked every |
||
| delayedRemoteFetchPurgatory.tryCompleteElseWatch(remoteFetch, delayedFetchKeys.asJava) | ||
| } | ||
|
|
||
|
|
@@ -1737,6 +1740,8 @@ class ReplicaManager(val config: KafkaConfig, | |
| // try to complete the request immediately, otherwise put it into the purgatory; | ||
| // this is because while the delayed fetch operation is being created, new requests | ||
| // may arrive and hence make this operation completable. | ||
| // We only guarantee eventual cleanup via the next FETCH request for the same set of partitions or | ||
| // using reaper-thread. | ||
| delayedFetchPurgatory.tryCompleteElseWatch(delayedFetch, delayedFetchKeys.asJava) | ||
| } | ||
| } | ||
|
|
@@ -1926,11 +1931,14 @@ class ReplicaManager(val config: KafkaConfig, | |
| Optional.empty() | ||
| ) | ||
| } else { | ||
| // For consume fetch requests, create a dummy FetchDataInfo with the remote storage fetch information. | ||
| // For the topic-partitions that need remote data, we will use this information to read the data in another thread. | ||
| new FetchDataInfo(new LogOffsetMetadata(offset), MemoryRecords.EMPTY, false, Optional.empty(), | ||
| Optional.of(new RemoteStorageFetchInfo(adjustedMaxBytes, minOneMessage, tp, | ||
| fetchInfo, params.isolation))) | ||
| val remoteStorageFetchInfoOpt = if (adjustedMaxBytes > 0) { | ||
| // For consume fetch requests, create a dummy FetchDataInfo with the remote storage fetch information. | ||
| // For the topic-partitions that need remote data, we will use this information to read the data in another thread. | ||
| Optional.of(new RemoteStorageFetchInfo(adjustedMaxBytes, minOneMessage, tp, fetchInfo, params.isolation)) | ||
kamalcph marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } else { | ||
| Optional.empty[RemoteStorageFetchInfo]() | ||
| } | ||
| new FetchDataInfo(new LogOffsetMetadata(offset), MemoryRecords.EMPTY, false, Optional.empty(), remoteStorageFetchInfoOpt) | ||
| } | ||
|
|
||
| new LogReadResult(fetchDataInfo, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.