Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
6cb37fe
Initial setup
jterapin Aug 27, 2025
d31ae4d
Minor adjustments
jterapin Aug 29, 2025
74ed189
Directory downloader impl
jterapin Aug 29, 2025
4e8db17
Directory uploader impl
jterapin Aug 29, 2025
098049d
Merge branch 'version-3' into tm-directory-features
jterapin Aug 29, 2025
8f387d2
Merge branch 'version-3' into tm-directory-features
jterapin Sep 8, 2025
7749ba5
Add default executor
jterapin Sep 9, 2025
99f0de6
Add running check to default executor
jterapin Sep 9, 2025
441fa82
Refactor MultipartFileUploader with executor
jterapin Sep 9, 2025
c792439
Fix typo in MultipartFileUploader
jterapin Sep 9, 2025
adce496
Update TM upload file with executor
jterapin Sep 9, 2025
012c2bc
Merge branch 'version-3' into tm-directory-features
jterapin Sep 9, 2025
ee9c9da
Merge branch 'version-3' into tm-directory-features
jterapin Sep 10, 2025
75df844
Merge from version-3
jterapin Sep 12, 2025
e5d3245
Merge branch 'version-3' into tm-directory-features
jterapin Sep 15, 2025
173f5e4
Merge branch 'version-3' into tm-directory-features
jterapin Sep 17, 2025
cf88ff2
Merge branch 'version-3' into tm-directory-features
jterapin Sep 17, 2025
2758c4d
Update to only spawn workers when needed
jterapin Sep 17, 2025
b92d3b3
Update directory uploader
jterapin Sep 18, 2025
6afb495
Update directory uploader
jterapin Sep 18, 2025
86b53e8
Update uploader
jterapin Sep 18, 2025
d587ae1
Merge branch 'version-3' into tm-directory-features
jterapin Sep 18, 2025
eae3814
Add minor improvements to directory uploader
jterapin Sep 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions gems/aws-sdk-s3/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
Unreleased Changes
------------------

* Feature - TODO

1.199.0 (2025-09-08)
------------------

Expand Down
3 changes: 3 additions & 0 deletions gems/aws-sdk-s3/lib/aws-sdk-s3/customizations.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ module S3
autoload :BucketRegionCache, 'aws-sdk-s3/bucket_region_cache'
autoload :Encryption, 'aws-sdk-s3/encryption'
autoload :EncryptionV2, 'aws-sdk-s3/encryption_v2'
autoload :DefaultExecutor, 'aws-sdk-s3/default_executor'
autoload :FilePart, 'aws-sdk-s3/file_part'
autoload :FileUploader, 'aws-sdk-s3/file_uploader'
autoload :FileDownloader, 'aws-sdk-s3/file_downloader'
Expand All @@ -18,6 +19,8 @@ module S3
autoload :ObjectMultipartCopier, 'aws-sdk-s3/object_multipart_copier'
autoload :PresignedPost, 'aws-sdk-s3/presigned_post'
autoload :Presigner, 'aws-sdk-s3/presigner'
autoload :DirectoryUploader, 'aws-sdk-s3/directory_uploader'
autoload :DirectoryDownloader, 'aws-sdk-s3/directory_downloader'
autoload :TransferManager, 'aws-sdk-s3/transfer_manager'

# s3 express session auth
Expand Down
55 changes: 55 additions & 0 deletions gems/aws-sdk-s3/lib/aws-sdk-s3/default_executor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# frozen_string_literal: true

module Aws
module S3
# @api private
class DefaultExecutor
def initialize(options = {})
@queue = Queue.new
@max_threads = options[:max_threads] || 10
@pool = []
@running = true
@mutex = Mutex.new
end

def post(*args, &block)
raise 'Executor is not running' unless @running

@queue << [args, block]
ensure_worker_available
end

def shutdown
@running = false
@max_threads.times { @queue << :shutdown }
@pool.each(&:join)
@pool.clear
true
end

def running?
@running
end

private

def ensure_worker_available
@mutex.synchronize do
@pool.select!(&:alive?)
@pool << spawn_worker if @pool.size < @max_threads
end
end

def spawn_worker
Thread.new do
while (job = @queue.shift)
break if job == :shutdown

args, block = job
block.call(*args)
end
end
end
end
end
end
105 changes: 105 additions & 0 deletions gems/aws-sdk-s3/lib/aws-sdk-s3/directory_downloader.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# frozen_string_literal: true

module Aws
module S3
# Raised when DirectoryDownloader fails to download objects from S3 bucket
class DirectoryDownloadError < StandardError
def initialize(message, errors = [])
@errors = errors
super(message)
end

# @return [Array<StandardError>] The list of errors encountered when downloading objects
attr_reader :errors
end

# @api private
class DirectoryDownloader
def initialize(options = {})
@client = options[:client] || Client.new
@executor = options[:executor]
end

attr_reader :client, :executor

def download(destination, bucket:, **options)
if File.exist?(destination)
raise ArgumentError 'invalid destination, expected a directory' unless File.directory?(destination)
else
FileUtils.mkdir_p(destination)
end

download_opts = options.dup
@destination = destination
@bucket = bucket
@recursive = download_opts.delete(:recursive) || false
@s3_prefix = download_opts.delete(:s3_prefix)
@s3_delimiter = download_opts.delete(:s3_delimiter) || '/'
@failure_policy = download_opts.delete(:failure_policy) || :abort

downloader = FileDownloader.new(client: client, executor: @executor)
@download_queue = SizedQueue.new(100)
@abort_download = false
@errors = []

Thread.new do
stream_keys
@download_queue << :done
end

download_attempts = 0
completion_queue = Queue.new
while (queue_key = @download_queue.shift) != :done
break if @abort_download

download_attempts += 1
@executor.post(queue_key) do |k|
normalized_key = normalize_key(k)
full_path = File.join(@destination, normalized_key)
dir_path = File.dirname(full_path)
FileUtils.mkdir_p(dir_path) unless dir_path == @destination || Dir.exist?(dir_path)

downloader.download(full_path, download_opts.merge(bucket: @bucket, key: k))
rescue StandardError => e
@errors << e
@abort_download = true if @failure_policy == :abort
ensure
completion_queue << :done
end
end

download_attempts.times { completion_queue.pop }

if @abort_download
msg = "failed to download directory: attempt to download #{download_attempts} objects " \
"but failed to download #{@errors.count} objects."
raise DirectoryDownloadError, msg + @errors.to_s
else
{
downloaded: download_attempts - @errors.count,
errors: @errors.count
}
end
end

def normalize_key(key)
key = key.delete_prefix(@s3_prefix) if @s3_prefix
return key.tr('/', @s3_delimiter) if @s3_delimiter != '/'
return key if File::SEPARATOR == '/'

key.tr('/', File::SEPARATOR)
end

def stream_keys(continuation_token: nil)
resp = @client.list_objects_v2(bucket: @bucket, continuation_token: continuation_token)
resp.contents.each do |o|
break if @abort_download
next if o.key.end_with?('/')

@download_queue << o.key
end
stream_keys(continuation_token: resp.next_continuation_token) if resp.next_continuation_token
end
end
end
end
169 changes: 169 additions & 0 deletions gems/aws-sdk-s3/lib/aws-sdk-s3/directory_uploader.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
# frozen_string_literal: true

require 'set'

module Aws
module S3
# Raised when DirectoryUploader fails to upload files to S3 bucket
class DirectoryUploadError < StandardError
def initialize(message, errors = [])
@errors = errors
super(message)
end

# @return [Array<StandardError>] The list of errors encountered when uploading files
attr_reader :errors
end

# @api private
class DirectoryUploader
def initialize(options = {})
@client = options[:client] || Client.new
@executor = options[:executor] || DefaultExecutor.new
@options = options
end

# @return [Client]
attr_reader :client

def upload(source, bucket:, **options)
raise ArgumentError, 'Invalid directory' unless Dir.exist?(source)

upload_opts = options.dup
@source = source
@s3_prefix = upload_opts.delete(:s3_prefix)
@recursive = upload_opts.delete(:recursive) || false
@follow_symlinks = upload_opts.delete(:follow_symlinks) || false
@ignore_failure = upload_opts.delete(:failure_policy) || false
@filter_callback = upload_opts.delete(:filter_callback)
@abort_upload = false
@upload_queue = SizedQueue.new(100)
@errors = []

uploader = FileUploader.new(
multipart_threshold: upload_opts.delete(:multipart_threshold),
client: @client,
executor: @executor
)
queue_files
upload_attempts = 0
completion_queue = Queue.new
queue_executor = DefaultExecutor.new

while (file = @upload_queue.shift) != :done
break if @abort_upload

upload_attempts += 1
queue_executor.post(file) do |f|
uploader.upload(f[:path], upload_opts.merge(bucket: bucket, key: f[:key]))
rescue StandardError => e
@errors << e
@abort_upload = true unless @ignore_failure
ensure
completion_queue << :done
end
end
upload_attempts.times { completion_queue.pop }
build_result(upload_attempts)
ensure
queue_executor.shutdown
@executor.shutdown unless @options[:executor]
end

private

def build_result(upload_count)
if @abort_upload
msg = "failed to upload directory: uploaded #{upload_count - @errors.count} files " \
"but failed to upload #{@errors.count} files."
raise DirectoryUploadError.new(msg, @errors)
else
result = { completed_uploads: upload_count - @errors.count, failed_uploads: @errors.count }
result[:errors] = @errors if @errors.any?
result
end
end

def direct_traverse
Dir.each_child(@source) do |entry|
break if @abort_upload

full_path = File.join(@source, entry)
next unless @filter_callback&.call(full_path, entry)
next unless valid_entry?(full_path)

queue_file(full_path, entry)
rescue StandardError => e
@errors << e
@abort_upload = true unless @ignore_failure
end
end

def traverse_recursively
if @follow_symlinks
visited = Set.new
visited << File.stat(@source).ino
traverse_directory(@source, visited: visited)
else
traverse_directory(@source)
end
end

def traverse_directory(dir_path, prefix: '', visited: nil)
return if @abort_upload

Dir.each_child(dir_path) do |entry|
break if @abort_upload

full_path = File.join(dir_path, entry)
next unless @filter_callback&.call(full_path, entry)
next if !@follow_symlinks && File.symlink?(full_path)

if File.directory?(full_path)
process_directory(full_path, entry, prefix, visited)
elsif File.file?(full_path) || File.symlink?(full_path)
key = prefix.empty? ? entry : File.join(prefix, entry)
queue_file(full_path, key)
end
rescue StandardError => e
@errors << e
@abort_upload = true unless @ignore_failure
end
end

def process_directory(path, dir, prefix, visited)
if @follow_symlinks && visited
stat = File.stat(path)
return if visited.include?(stat.ino)

visited << stat.ino
end
new_prefix = prefix.empty? ? dir : File.join(prefix, dir)
traverse_directory(path, prefix: new_prefix, visited: visited)
end

def queue_files
Thread.new do
if @recursive
traverse_recursively
else
direct_traverse
end
@upload_queue << :done
end
end

def queue_file(path, key)
entry = { path: path }
entry[:key] = @s3_prefix ? File.join(@s3_prefix, key) : key
@upload_queue << entry
end

def valid_entry?(path)
return false if File.directory?(path) || (!@follow_symlinks && File.symlink?(path))

File.file?(path) || File.symlink?(path)
end
end
end
end
Loading
Loading