Skip to content

Commit fd71d0e

Browse files
jorgeechristopher-hakkaartpditommasoclaude
authored
Limit S3 concurrent downloads to fix Java Heap OOM (#6402)
Signed-off-by: jorgee <[email protected]> Signed-off-by: Jorge Ejarque <[email protected]> Signed-off-by: Paolo Di Tommaso <[email protected]> Co-authored-by: Chris Hakkaart <[email protected]> Co-authored-by: Paolo Di Tommaso <[email protected]> Co-authored-by: Claude <[email protected]>
1 parent 771b8b6 commit fd71d0e

File tree

9 files changed

+492
-253
lines changed

9 files changed

+492
-253
lines changed

docs/guides/aws-java-sdk-v2.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,10 @@ The *S3 transfer manager* is a subsystem of SDK v2 that handles S3 uploads and d
2222

2323
You can configure the concurrency and throughput of the S3 transfer manager manually using the `aws.client.maxConcurrency` and `aws.client.maxNativeMemory` configuration options. Alternatively, you can use the `aws.client.targetThroughputInGbps` option to set both values automatically based on a target throughput.
2424

25-
## Multi-part uploads
25+
## Multi-part transfers
2626

27-
Multi-part uploads are handled by the S3 transfer manager. You can use the `aws.client.minimumPartSize` and `aws.client.multipartThreshold` config options to control when and how multi-part uploads are performed.
27+
Multi-part transfer are handled by the S3 transfer manager. You can use the `aws.client.minimumPartSize` and `aws.client.multipartThreshold` config options to control when and how multi-part transfers are performed.
28+
Concurrent multi-part downloads can consume large heap memory space due to the buffer size created per transfer. To avoid out of memory errors, the size consumed by these buffers is limited to 400 MB. You can use `aws.client.maxDownloadHeapMemory` to change this value.
2829

2930
The following multi-part upload config options are no longer supported:
3031

docs/reference/config.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,9 @@ The following settings are available:
175175
`aws.client.maxConnections`
176176
: The maximum number of open HTTP connections used by the S3 transfer manager (default: `50`).
177177

178+
`aws.client.maxDownloadHeapMemory`
179+
: The maximum size for the heap memory buffer used by concurrent downloads. It must be at least 10 times the `minimumPartSize` (default:`400 MB`).
180+
178181
`aws.client.maxErrorRetry`
179182
: The maximum number of retry attempts for failed retryable requests (default: `-1`).
180183

plugins/nf-amazon/build.gradle

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -55,20 +55,20 @@ dependencies {
5555
compileOnly 'org.pf4j:pf4j:3.12.0'
5656

5757
api ('javax.xml.bind:jaxb-api:2.4.0-b180830.0359')
58-
api ('software.amazon.awssdk:s3:2.31.64')
59-
api ('software.amazon.awssdk:ec2:2.31.64')
60-
api ('software.amazon.awssdk:batch:2.31.64')
61-
api ('software.amazon.awssdk:iam:2.31.64')
62-
api ('software.amazon.awssdk:ecs:2.31.64')
63-
api ('software.amazon.awssdk:cloudwatchlogs:2.31.64')
64-
api ('software.amazon.awssdk:codecommit:2.31.64')
65-
api ('software.amazon.awssdk:sts:2.31.64')
66-
api ('software.amazon.awssdk:ses:2.31.64')
67-
api ('software.amazon.awssdk:sso:2.31.64')
68-
api ('software.amazon.awssdk:ssooidc:2.31.64')
69-
api ('software.amazon.awssdk:s3-transfer-manager:2.31.64')
70-
api ('software.amazon.awssdk:apache-client:2.31.64')
71-
api ('software.amazon.awssdk:aws-crt-client:2.31.64')
58+
api ('software.amazon.awssdk:s3:2.33.2')
59+
api ('software.amazon.awssdk:ec2:2.33.2')
60+
api ('software.amazon.awssdk:batch:2.33.2')
61+
api ('software.amazon.awssdk:iam:2.33.2')
62+
api ('software.amazon.awssdk:ecs:2.33.2')
63+
api ('software.amazon.awssdk:cloudwatchlogs:2.33.2')
64+
api ('software.amazon.awssdk:codecommit:2.33.2')
65+
api ('software.amazon.awssdk:sts:2.33.2')
66+
api ('software.amazon.awssdk:ses:2.33.2')
67+
api ('software.amazon.awssdk:sso:2.33.2')
68+
api ('software.amazon.awssdk:ssooidc:2.33.2')
69+
api ('software.amazon.awssdk:s3-transfer-manager:2.33.2')
70+
api ('software.amazon.awssdk:apache-client:2.33.2')
71+
api ('software.amazon.awssdk:aws-crt-client:2.33.2')
7272

7373
constraints {
7474
api 'com.fasterxml.jackson.core:jackson-databind:2.12.7.1'

plugins/nf-amazon/src/main/nextflow/cloud/aws/config/AwsS3Config.groovy

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ class AwsS3Config implements ConfigScope {
7070
""")
7171
final Integer maxConnections
7272

73+
@ConfigOption
74+
@Description("""
75+
The maximum size for the heap memory buffer used by concurrent downloads. It must be at least 10 times the `minimumPartSize` (default:`400 MB`).
76+
""")
77+
final MemoryUnit maxDownloadHeapMemory
78+
7379
@ConfigOption
7480
@Description("""
7581
The maximum number of retry attempts for failed retryable requests (default: `-1`).
@@ -215,6 +221,13 @@ class AwsS3Config implements ConfigScope {
215221
""")
216222
final String uploadStorageClass
217223

224+
private static final long _1MB = 1024 * 1024;
225+
// According to CRT Async client docs https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/s3/S3CrtAsyncClientBuilder.html
226+
public static final long DEFAULT_PART_SIZE = 8 * _1MB;
227+
public static final int DEFAULT_INIT_BUFFER_PARTS = 10;
228+
// Maximum heap buffer size
229+
public static final long DEFAULT_MAX_DOWNLOAD_BUFFER_SIZE = 400 * _1MB;
230+
218231
AwsS3Config(Map opts) {
219232
this.anonymous = opts.anonymous as Boolean
220233
this.connectionTimeout = opts.connectionTimeout as Integer
@@ -224,6 +237,7 @@ class AwsS3Config implements ConfigScope {
224237
throw new IllegalArgumentException("S3 endpoint must begin with http:// or https:// prefix - offending value: '${endpoint}'")
225238
this.maxConcurrency = opts.maxConcurrency as Integer
226239
this.maxConnections = opts.maxConnections as Integer
240+
this.maxDownloadHeapMemory = opts.maxDownloadHeapMemory as MemoryUnit
227241
this.maxErrorRetry = opts.maxErrorRetry as Integer
228242
this.maxNativeMemory = opts.maxNativeMemory as MemoryUnit
229243
this.minimumPartSize = opts.minimumPartSize as MemoryUnit
@@ -246,6 +260,7 @@ class AwsS3Config implements ConfigScope {
246260
this.uploadMaxAttempts = opts.uploadMaxAttempts as Integer
247261
this.uploadMaxThreads = opts.uploadMaxThreads as Integer
248262
this.uploadRetrySleep = opts.uploadRetrySleep as Duration
263+
checkDownloadBufferParams()
249264
}
250265

251266
private String parseStorageClass(String value) {
@@ -283,6 +298,7 @@ class AwsS3Config implements ConfigScope {
283298
connection_timeout: connectionTimeout?.toString(),
284299
max_concurrency: maxConcurrency?.toString(),
285300
max_connections: maxConnections?.toString(),
301+
max_download_heap_memory: maxDownloadHeapMemory?.toBytes()?.toString(),
286302
max_error_retry: maxErrorRetry?.toString(),
287303
max_native_memory: maxNativeMemory?.toBytes()?.toString(),
288304
minimum_part_size: minimumPartSize?.toBytes()?.toString(),
@@ -306,4 +322,21 @@ class AwsS3Config implements ConfigScope {
306322
upload_storage_class: storageClass?.toString()
307323
].findAll { k, v -> v != null }
308324
}
325+
326+
void checkDownloadBufferParams() {
327+
if( maxDownloadHeapMemory != null && maxDownloadHeapMemory.toBytes() == 0L ) {
328+
throw new IllegalArgumentException("Configuration option `aws.client.maxDownloadHeapMemory` can't be 0")
329+
}
330+
if( minimumPartSize != null && minimumPartSize.toBytes() == 0L ) {
331+
throw new IllegalArgumentException("Configuration option `aws.client.minimumPartSize` can't be 0")
332+
}
333+
if( maxDownloadHeapMemory != null || minimumPartSize != null ) {
334+
final maxBuffer = maxDownloadHeapMemory ? maxDownloadHeapMemory.toBytes() : DEFAULT_MAX_DOWNLOAD_BUFFER_SIZE
335+
final partSize = minimumPartSize ? minimumPartSize.toBytes() : DEFAULT_PART_SIZE
336+
if( maxBuffer < DEFAULT_INIT_BUFFER_PARTS * partSize ) {
337+
throw new IllegalArgumentException("Configuration option `aws.client.maxDownloadHeapMemory` must be at least " + DEFAULT_INIT_BUFFER_PARTS + " times `aws.client.minimumPartSize`")
338+
}
339+
}
340+
341+
}
309342
}

0 commit comments

Comments
 (0)